1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8}, /* cost of loading integer registers */
69 2, /* cost of moving MMX register */
70 {4, 8}, /* cost of loading MMX registers
71 in SImode and DImode */
72 {4, 8}, /* cost of storing MMX registers
73 in SImode and DImode */
74 2, /* cost of moving SSE register */
75 {4, 8, 16}, /* cost of loading SSE registers
76 in SImode, DImode and TImode */
77 {4, 8, 16}, /* cost of storing SSE registers
78 in SImode, DImode and TImode */
79 3, /* MMX or SSE register to integer */
82 struct processor_costs i486_cost = { /* 486 specific costs */
83 1, /* cost of an add instruction */
84 1, /* cost of a lea instruction */
85 3, /* variable shift costs */
86 2, /* constant shift costs */
87 12, /* cost of starting a multiply */
88 1, /* cost of multiply per each bit set */
89 40, /* cost of a divide/mod */
90 15, /* "large" insn */
92 4, /* cost for loading QImode using movzbl */
93 {2, 4, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 4, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {8, 8, 8}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {8, 8, 8}, /* cost of loading integer registers */
101 2, /* cost of moving MMX register */
102 {4, 8}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {4, 8}, /* cost of storing MMX registers
105 in SImode and DImode */
106 2, /* cost of moving SSE register */
107 {4, 8, 16}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {4, 8, 16}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3 /* MMX or SSE register to integer */
114 struct processor_costs pentium_cost = {
115 1, /* cost of an add instruction */
116 1, /* cost of a lea instruction */
117 4, /* variable shift costs */
118 1, /* constant shift costs */
119 11, /* cost of starting a multiply */
120 0, /* cost of multiply per each bit set */
121 25, /* cost of a divide/mod */
122 8, /* "large" insn */
124 6, /* cost for loading QImode using movzbl */
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 6}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {4, 4, 6}, /* cost of loading integer registers */
133 8, /* cost of moving MMX register */
134 {8, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {8, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3 /* MMX or SSE register to integer */
146 struct processor_costs pentiumpro_cost = {
147 1, /* cost of an add instruction */
148 1, /* cost of a lea instruction */
149 1, /* variable shift costs */
150 1, /* constant shift costs */
151 4, /* cost of starting a multiply */
152 0, /* cost of multiply per each bit set */
153 17, /* cost of a divide/mod */
154 8, /* "large" insn */
156 2, /* cost for loading QImode using movzbl */
157 {4, 4, 4}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 6}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {4, 4, 6}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {2, 2}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {2, 2}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {2, 2, 8}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {2, 2, 8}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3 /* MMX or SSE register to integer */
178 struct processor_costs k6_cost = {
179 1, /* cost of an add instruction */
180 2, /* cost of a lea instruction */
181 1, /* variable shift costs */
182 1, /* constant shift costs */
183 3, /* cost of starting a multiply */
184 0, /* cost of multiply per each bit set */
185 18, /* cost of a divide/mod */
186 8, /* "large" insn */
188 3, /* cost for loading QImode using movzbl */
189 {4, 5, 4}, /* cost of loading integer registers
190 in QImode, HImode and SImode.
191 Relative to reg-reg move (2). */
192 {2, 3, 2}, /* cost of storing integer registers */
193 4, /* cost of reg,reg fld/fst */
194 {6, 6, 6}, /* cost of loading fp registers
195 in SFmode, DFmode and XFmode */
196 {4, 4, 4}, /* cost of loading integer registers */
197 2, /* cost of moving MMX register */
198 {2, 2}, /* cost of loading MMX registers
199 in SImode and DImode */
200 {2, 2}, /* cost of storing MMX registers
201 in SImode and DImode */
202 2, /* cost of moving SSE register */
203 {2, 2, 8}, /* cost of loading SSE registers
204 in SImode, DImode and TImode */
205 {2, 2, 8}, /* cost of storing SSE registers
206 in SImode, DImode and TImode */
207 6 /* MMX or SSE register to integer */
210 struct processor_costs athlon_cost = {
211 1, /* cost of an add instruction */
212 2, /* cost of a lea instruction */
213 1, /* variable shift costs */
214 1, /* constant shift costs */
215 5, /* cost of starting a multiply */
216 0, /* cost of multiply per each bit set */
217 42, /* cost of a divide/mod */
218 8, /* "large" insn */
220 4, /* cost for loading QImode using movzbl */
221 {4, 5, 4}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
223 Relative to reg-reg move (2). */
224 {2, 3, 2}, /* cost of storing integer registers */
225 4, /* cost of reg,reg fld/fst */
226 {6, 6, 20}, /* cost of loading fp registers
227 in SFmode, DFmode and XFmode */
228 {4, 4, 16}, /* cost of loading integer registers */
229 2, /* cost of moving MMX register */
230 {2, 2}, /* cost of loading MMX registers
231 in SImode and DImode */
232 {2, 2}, /* cost of storing MMX registers
233 in SImode and DImode */
234 2, /* cost of moving SSE register */
235 {2, 2, 8}, /* cost of loading SSE registers
236 in SImode, DImode and TImode */
237 {2, 2, 8}, /* cost of storing SSE registers
238 in SImode, DImode and TImode */
239 6 /* MMX or SSE register to integer */
242 struct processor_costs *ix86_cost = &pentium_cost;
244 /* Processor feature/optimization bitmasks. */
245 #define m_386 (1<<PROCESSOR_I386)
246 #define m_486 (1<<PROCESSOR_I486)
247 #define m_PENT (1<<PROCESSOR_PENTIUM)
248 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
249 #define m_K6 (1<<PROCESSOR_K6)
250 #define m_ATHLON (1<<PROCESSOR_ATHLON)
252 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
253 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
254 const int x86_zero_extend_with_and = m_486 | m_PENT;
255 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
256 const int x86_double_with_add = ~m_386;
257 const int x86_use_bit_test = m_386;
258 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
259 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
260 const int x86_use_any_reg = m_486;
261 const int x86_cmove = m_PPRO | m_ATHLON;
262 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
263 const int x86_use_sahf = m_PPRO | m_K6;
264 const int x86_partial_reg_stall = m_PPRO;
265 const int x86_use_loop = m_K6;
266 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
267 const int x86_use_mov0 = m_K6;
268 const int x86_use_cltd = ~(m_PENT | m_K6);
269 const int x86_read_modify_write = ~m_PENT;
270 const int x86_read_modify = ~(m_PENT | m_PPRO);
271 const int x86_split_long_moves = m_PPRO;
272 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
273 const int x86_single_stringop = m_386;
274 const int x86_qimode_math = ~(0);
275 const int x86_promote_qi_regs = 0;
276 const int x86_himode_math = ~(m_PPRO);
277 const int x86_promote_hi_regs = m_PPRO;
278 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
279 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
280 const int x86_add_esp_4 = m_ATHLON | m_K6;
281 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
282 const int x86_integer_DFmode_moves = ~m_ATHLON;
283 const int x86_partial_reg_dependency = m_ATHLON;
284 const int x86_memory_mismatch_stall = m_ATHLON;
286 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
288 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
289 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
290 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
292 /* Array of the smallest class containing reg number REGNO, indexed by
293 REGNO. Used by REGNO_REG_CLASS in i386.h. */
295 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
298 AREG, DREG, CREG, BREG,
300 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
302 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
303 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
306 /* flags, fpsr, dirflag, frame */
307 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
308 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
310 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
314 /* The "default" register map. */
316 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
318 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
319 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
320 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
321 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
322 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
325 /* Define the register numbers to be used in Dwarf debugging information.
326 The SVR4 reference port C compiler uses the following register numbers
327 in its Dwarf output code:
328 0 for %eax (gcc regno = 0)
329 1 for %ecx (gcc regno = 2)
330 2 for %edx (gcc regno = 1)
331 3 for %ebx (gcc regno = 3)
332 4 for %esp (gcc regno = 7)
333 5 for %ebp (gcc regno = 6)
334 6 for %esi (gcc regno = 4)
335 7 for %edi (gcc regno = 5)
336 The following three DWARF register numbers are never generated by
337 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
338 believes these numbers have these meanings.
339 8 for %eip (no gcc equivalent)
340 9 for %eflags (gcc regno = 17)
341 10 for %trapno (no gcc equivalent)
342 It is not at all clear how we should number the FP stack registers
343 for the x86 architecture. If the version of SDB on x86/svr4 were
344 a bit less brain dead with respect to floating-point then we would
345 have a precedent to follow with respect to DWARF register numbers
346 for x86 FP registers, but the SDB on x86/svr4 is so completely
347 broken with respect to FP registers that it is hardly worth thinking
348 of it as something to strive for compatibility with.
349 The version of x86/svr4 SDB I have at the moment does (partially)
350 seem to believe that DWARF register number 11 is associated with
351 the x86 register %st(0), but that's about all. Higher DWARF
352 register numbers don't seem to be associated with anything in
353 particular, and even for DWARF regno 11, SDB only seems to under-
354 stand that it should say that a variable lives in %st(0) (when
355 asked via an `=' command) if we said it was in DWARF regno 11,
356 but SDB still prints garbage when asked for the value of the
357 variable in question (via a `/' command).
358 (Also note that the labels SDB prints for various FP stack regs
359 when doing an `x' command are all wrong.)
360 Note that these problems generally don't affect the native SVR4
361 C compiler because it doesn't allow the use of -O with -g and
362 because when it is *not* optimizing, it allocates a memory
363 location for each floating-point variable, and the memory
364 location is what gets described in the DWARF AT_location
365 attribute for the variable in question.
366 Regardless of the severe mental illness of the x86/svr4 SDB, we
367 do something sensible here and we use the following DWARF
368 register numbers. Note that these are all stack-top-relative
370 11 for %st(0) (gcc regno = 8)
371 12 for %st(1) (gcc regno = 9)
372 13 for %st(2) (gcc regno = 10)
373 14 for %st(3) (gcc regno = 11)
374 15 for %st(4) (gcc regno = 12)
375 16 for %st(5) (gcc regno = 13)
376 17 for %st(6) (gcc regno = 14)
377 18 for %st(7) (gcc regno = 15)
379 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
381 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
382 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
383 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
384 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
385 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
388 /* Test and compare insns in i386.md store the information needed to
389 generate branch and scc insns here. */
391 struct rtx_def *ix86_compare_op0 = NULL_RTX;
392 struct rtx_def *ix86_compare_op1 = NULL_RTX;
394 #define MAX_386_STACK_LOCALS 2
396 /* Define the structure for the machine field in struct function. */
397 struct machine_function
399 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
400 int accesses_prev_frame;
403 #define ix86_stack_locals (cfun->machine->stack_locals)
405 /* Structure describing stack frame layout.
406 Stack grows downward:
412 saved frame pointer if frame_pointer_needed
413 <- HARD_FRAME_POINTER
419 > to_allocate <- FRAME_POINTER
430 int outgoing_arguments_size;
432 HOST_WIDE_INT to_allocate;
433 /* The offsets relative to ARG_POINTER. */
434 HOST_WIDE_INT frame_pointer_offset;
435 HOST_WIDE_INT hard_frame_pointer_offset;
436 HOST_WIDE_INT stack_pointer_offset;
439 /* which cpu are we scheduling for */
440 enum processor_type ix86_cpu;
442 /* which instruction set architecture to use. */
445 /* Strings to hold which cpu and instruction set architecture to use. */
446 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
447 const char *ix86_arch_string; /* for -march=<xxx> */
449 /* # of registers to use to pass arguments. */
450 const char *ix86_regparm_string;
452 /* ix86_regparm_string as a number */
455 /* Alignment to use for loops and jumps: */
457 /* Power of two alignment for loops. */
458 const char *ix86_align_loops_string;
460 /* Power of two alignment for non-loop jumps. */
461 const char *ix86_align_jumps_string;
463 /* Power of two alignment for stack boundary in bytes. */
464 const char *ix86_preferred_stack_boundary_string;
466 /* Preferred alignment for stack boundary in bits. */
467 int ix86_preferred_stack_boundary;
469 /* Values 1-5: see jump.c */
470 int ix86_branch_cost;
471 const char *ix86_branch_cost_string;
473 /* Power of two alignment for functions. */
474 int ix86_align_funcs;
475 const char *ix86_align_funcs_string;
477 /* Power of two alignment for loops. */
478 int ix86_align_loops;
480 /* Power of two alignment for non-loop jumps. */
481 int ix86_align_jumps;
483 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
484 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
486 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
487 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
489 static rtx gen_push PARAMS ((rtx));
490 static int memory_address_length PARAMS ((rtx addr));
491 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
492 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
493 static int ix86_safe_length PARAMS ((rtx));
494 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
495 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
496 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
497 static void ix86_dump_ppro_packet PARAMS ((FILE *));
498 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
499 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
501 static void ix86_init_machine_status PARAMS ((struct function *));
502 static void ix86_mark_machine_status PARAMS ((struct function *));
503 static void ix86_free_machine_status PARAMS ((struct function *));
504 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
505 static int ix86_safe_length_prefix PARAMS ((rtx));
506 static int ix86_nsaved_regs PARAMS((void));
507 static void ix86_emit_save_regs PARAMS((void));
508 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
509 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
510 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
511 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
512 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
513 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
517 rtx base, index, disp;
521 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
523 struct builtin_description;
524 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
526 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
528 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
529 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
530 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
531 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
532 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
533 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
534 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
538 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
540 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
541 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
542 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
543 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
544 static int ix86_save_reg PARAMS ((int));
545 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
547 /* Sometimes certain combinations of command options do not make
548 sense on a particular target machine. You can define a macro
549 `OVERRIDE_OPTIONS' to take account of this. This macro, if
550 defined, is executed once just after all the command options have
553 Don't use this macro to turn on various extra optimizations for
554 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
560 /* Comes from final.c -- no real reason to change it. */
561 #define MAX_CODE_ALIGN 16
565 struct processor_costs *cost; /* Processor costs */
566 int target_enable; /* Target flags to enable. */
567 int target_disable; /* Target flags to disable. */
568 int align_loop; /* Default alignments. */
573 const processor_target_table[PROCESSOR_max] =
575 {&i386_cost, 0, 0, 2, 2, 2, 1},
576 {&i486_cost, 0, 0, 4, 4, 4, 1},
577 {&pentium_cost, 0, 0, -4, -4, -4, 1},
578 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
579 {&k6_cost, 0, 0, -5, -5, 4, 1},
580 {&athlon_cost, 0, 0, 4, -4, 4, 1}
585 const char *name; /* processor name or nickname. */
586 enum processor_type processor;
588 const processor_alias_table[] =
590 {"i386", PROCESSOR_I386},
591 {"i486", PROCESSOR_I486},
592 {"i586", PROCESSOR_PENTIUM},
593 {"pentium", PROCESSOR_PENTIUM},
594 {"i686", PROCESSOR_PENTIUMPRO},
595 {"pentiumpro", PROCESSOR_PENTIUMPRO},
596 {"k6", PROCESSOR_K6},
597 {"athlon", PROCESSOR_ATHLON},
600 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
602 #ifdef SUBTARGET_OVERRIDE_OPTIONS
603 SUBTARGET_OVERRIDE_OPTIONS;
606 ix86_arch = PROCESSOR_I386;
607 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
609 if (ix86_arch_string != 0)
611 for (i = 0; i < pta_size; i++)
612 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
614 ix86_arch = processor_alias_table[i].processor;
615 /* Default cpu tuning to the architecture. */
616 ix86_cpu = ix86_arch;
621 error ("bad value (%s) for -march= switch", ix86_arch_string);
624 if (ix86_cpu_string != 0)
626 for (i = 0; i < pta_size; i++)
627 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
629 ix86_cpu = processor_alias_table[i].processor;
633 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
636 ix86_cost = processor_target_table[ix86_cpu].cost;
637 target_flags |= processor_target_table[ix86_cpu].target_enable;
638 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
640 /* Arrange to set up i386_stack_locals for all functions. */
641 init_machine_status = ix86_init_machine_status;
642 mark_machine_status = ix86_mark_machine_status;
643 free_machine_status = ix86_free_machine_status;
645 /* Validate -mregparm= value. */
646 if (ix86_regparm_string)
648 i = atoi (ix86_regparm_string);
649 if (i < 0 || i > REGPARM_MAX)
650 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
655 /* Validate -malign-loops= value, or provide default. */
656 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
657 if (ix86_align_loops_string)
659 i = atoi (ix86_align_loops_string);
660 if (i < 0 || i > MAX_CODE_ALIGN)
661 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
663 ix86_align_loops = i;
666 /* Validate -malign-jumps= value, or provide default. */
667 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
668 if (ix86_align_jumps_string)
670 i = atoi (ix86_align_jumps_string);
671 if (i < 0 || i > MAX_CODE_ALIGN)
672 error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
674 ix86_align_jumps = i;
677 /* Validate -malign-functions= value, or provide default. */
678 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
679 if (ix86_align_funcs_string)
681 i = atoi (ix86_align_funcs_string);
682 if (i < 0 || i > MAX_CODE_ALIGN)
683 error ("-malign-functions=%d is not between 0 and %d",
686 ix86_align_funcs = i;
689 /* Validate -mpreferred-stack-boundary= value, or provide default.
690 The default of 128 bits is for Pentium III's SSE __m128. */
691 ix86_preferred_stack_boundary = 128;
692 if (ix86_preferred_stack_boundary_string)
694 i = atoi (ix86_preferred_stack_boundary_string);
696 error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
698 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
701 /* Validate -mbranch-cost= value, or provide default. */
702 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
703 if (ix86_branch_cost_string)
705 i = atoi (ix86_branch_cost_string);
707 error ("-mbranch-cost=%d is not between 0 and 5", i);
709 ix86_branch_cost = i;
712 /* Keep nonleaf frame pointers. */
713 if (TARGET_OMIT_LEAF_FRAME_POINTER)
714 flag_omit_frame_pointer = 1;
716 /* If we're doing fast math, we don't care about comparison order
717 wrt NaNs. This lets us use a shorter comparison sequence. */
719 target_flags &= ~MASK_IEEE_FP;
721 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
724 target_flags |= MASK_MMX;
728 optimization_options (level, size)
730 int size ATTRIBUTE_UNUSED;
732 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
733 make the problem with not enough registers even worse. */
734 #ifdef INSN_SCHEDULING
736 flag_schedule_insns = 0;
740 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
741 attribute for DECL. The attributes in ATTRIBUTES have previously been
745 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
746 tree decl ATTRIBUTE_UNUSED;
747 tree attributes ATTRIBUTE_UNUSED;
748 tree identifier ATTRIBUTE_UNUSED;
749 tree args ATTRIBUTE_UNUSED;
754 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
755 attribute for TYPE. The attributes in ATTRIBUTES have previously been
759 ix86_valid_type_attribute_p (type, attributes, identifier, args)
761 tree attributes ATTRIBUTE_UNUSED;
765 if (TREE_CODE (type) != FUNCTION_TYPE
766 && TREE_CODE (type) != METHOD_TYPE
767 && TREE_CODE (type) != FIELD_DECL
768 && TREE_CODE (type) != TYPE_DECL)
771 /* Stdcall attribute says callee is responsible for popping arguments
772 if they are not variable. */
773 if (is_attribute_p ("stdcall", identifier))
774 return (args == NULL_TREE);
776 /* Cdecl attribute says the callee is a normal C declaration. */
777 if (is_attribute_p ("cdecl", identifier))
778 return (args == NULL_TREE);
780 /* Regparm attribute specifies how many integer arguments are to be
781 passed in registers. */
782 if (is_attribute_p ("regparm", identifier))
786 if (! args || TREE_CODE (args) != TREE_LIST
787 || TREE_CHAIN (args) != NULL_TREE
788 || TREE_VALUE (args) == NULL_TREE)
791 cst = TREE_VALUE (args);
792 if (TREE_CODE (cst) != INTEGER_CST)
795 if (compare_tree_int (cst, REGPARM_MAX) > 0)
804 /* Return 0 if the attributes for two types are incompatible, 1 if they
805 are compatible, and 2 if they are nearly compatible (which causes a
806 warning to be generated). */
809 ix86_comp_type_attributes (type1, type2)
813 /* Check for mismatch of non-default calling convention. */
814 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
816 if (TREE_CODE (type1) != FUNCTION_TYPE)
819 /* Check for mismatched return types (cdecl vs stdcall). */
820 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
821 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
826 /* Value is the number of bytes of arguments automatically
827 popped when returning from a subroutine call.
828 FUNDECL is the declaration node of the function (as a tree),
829 FUNTYPE is the data type of the function (as a tree),
830 or for a library call it is an identifier node for the subroutine name.
831 SIZE is the number of bytes of arguments passed on the stack.
833 On the 80386, the RTD insn may be used to pop them if the number
834 of args is fixed, but if the number is variable then the caller
835 must pop them all. RTD can't be used for library calls now
836 because the library is compiled with the Unix compiler.
837 Use of RTD is a selectable option, since it is incompatible with
838 standard Unix calling sequences. If the option is not selected,
839 the caller must always pop the args.
841 The attribute stdcall is equivalent to RTD on a per module basis. */
844 ix86_return_pops_args (fundecl, funtype, size)
849 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
851 /* Cdecl functions override -mrtd, and never pop the stack. */
852 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
854 /* Stdcall functions will pop the stack if not variable args. */
855 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
859 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
860 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
865 /* Lose any fake structure return argument. */
866 if (aggregate_value_p (TREE_TYPE (funtype)))
867 return GET_MODE_SIZE (Pmode);
872 /* Argument support functions. */
874 /* Initialize a variable CUM of type CUMULATIVE_ARGS
875 for a call to a function whose data type is FNTYPE.
876 For a library call, FNTYPE is 0. */
879 init_cumulative_args (cum, fntype, libname)
880 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
881 tree fntype; /* tree ptr for function decl */
882 rtx libname; /* SYMBOL_REF of library name or 0 */
884 static CUMULATIVE_ARGS zero_cum;
885 tree param, next_param;
887 if (TARGET_DEBUG_ARG)
889 fprintf (stderr, "\ninit_cumulative_args (");
891 fprintf (stderr, "fntype code = %s, ret code = %s",
892 tree_code_name[(int) TREE_CODE (fntype)],
893 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
895 fprintf (stderr, "no fntype");
898 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
903 /* Set up the number of registers to use for passing arguments. */
904 cum->nregs = ix86_regparm;
907 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
910 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
913 /* Determine if this function has variable arguments. This is
914 indicated by the last argument being 'void_type_mode' if there
915 are no variable arguments. If there are variable arguments, then
916 we won't pass anything in registers */
920 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
921 param != 0; param = next_param)
923 next_param = TREE_CHAIN (param);
924 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
929 if (TARGET_DEBUG_ARG)
930 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
935 /* Update the data in CUM to advance over an argument
936 of mode MODE and data type TYPE.
937 (TYPE is null for libcalls where that information may not be available.) */
940 function_arg_advance (cum, mode, type, named)
941 CUMULATIVE_ARGS *cum; /* current arg information */
942 enum machine_mode mode; /* current arg mode */
943 tree type; /* type of the argument or 0 if lib support */
944 int named; /* whether or not the argument was named */
947 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
948 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
950 if (TARGET_DEBUG_ARG)
952 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
953 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
954 if (TARGET_SSE && mode == TImode)
956 cum->sse_words += words;
959 if (cum->sse_nregs <= 0)
980 /* Define where to put the arguments to a function.
981 Value is zero to push the argument on the stack,
982 or a hard register in which to store the argument.
984 MODE is the argument's machine mode.
985 TYPE is the data type of the argument (as a tree).
986 This is null for libcalls where that information may
988 CUM is a variable of type CUMULATIVE_ARGS which gives info about
989 the preceding args and about the function being called.
990 NAMED is nonzero if this argument is a named parameter
991 (otherwise it is an extra parameter matching an ellipsis). */
994 function_arg (cum, mode, type, named)
995 CUMULATIVE_ARGS *cum; /* current arg information */
996 enum machine_mode mode; /* current arg mode */
997 tree type; /* type of the argument or 0 if lib support */
998 int named; /* != 0 for normal args, == 0 for ... args */
1002 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1003 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1007 /* For now, pass fp/complex values on the stack. */
1016 if (words <= cum->nregs)
1017 ret = gen_rtx_REG (mode, cum->regno);
1021 ret = gen_rtx_REG (mode, cum->sse_regno);
1025 if (TARGET_DEBUG_ARG)
1028 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1029 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1032 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1034 fprintf (stderr, ", stack");
1036 fprintf (stderr, " )\n");
1043 /* Return nonzero if OP is (const_int 1), else return zero. */
1046 const_int_1_operand (op, mode)
1048 enum machine_mode mode ATTRIBUTE_UNUSED;
1050 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1053 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1054 reference and a constant. */
1057 symbolic_operand (op, mode)
1059 enum machine_mode mode ATTRIBUTE_UNUSED;
1061 switch (GET_CODE (op))
1069 if (GET_CODE (op) == SYMBOL_REF
1070 || GET_CODE (op) == LABEL_REF
1071 || (GET_CODE (op) == UNSPEC
1072 && XINT (op, 1) >= 6
1073 && XINT (op, 1) <= 7))
1075 if (GET_CODE (op) != PLUS
1076 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1080 if (GET_CODE (op) == SYMBOL_REF
1081 || GET_CODE (op) == LABEL_REF)
1083 /* Only @GOTOFF gets offsets. */
1084 if (GET_CODE (op) != UNSPEC
1085 || XINT (op, 1) != 7)
1088 op = XVECEXP (op, 0, 0);
1089 if (GET_CODE (op) == SYMBOL_REF
1090 || GET_CODE (op) == LABEL_REF)
1099 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1102 pic_symbolic_operand (op, mode)
1104 enum machine_mode mode ATTRIBUTE_UNUSED;
1106 if (GET_CODE (op) == CONST)
1109 if (GET_CODE (op) == UNSPEC)
1111 if (GET_CODE (op) != PLUS
1112 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1115 if (GET_CODE (op) == UNSPEC)
1121 /* Test for a valid operand for a call instruction. Don't allow the
1122 arg pointer register or virtual regs since they may decay into
1123 reg + const, which the patterns can't handle. */
1126 call_insn_operand (op, mode)
1128 enum machine_mode mode ATTRIBUTE_UNUSED;
1130 /* Disallow indirect through a virtual register. This leads to
1131 compiler aborts when trying to eliminate them. */
1132 if (GET_CODE (op) == REG
1133 && (op == arg_pointer_rtx
1134 || op == frame_pointer_rtx
1135 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1136 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1139 /* Disallow `call 1234'. Due to varying assembler lameness this
1140 gets either rejected or translated to `call .+1234'. */
1141 if (GET_CODE (op) == CONST_INT)
1144 /* Explicitly allow SYMBOL_REF even if pic. */
1145 if (GET_CODE (op) == SYMBOL_REF)
1148 /* Half-pic doesn't allow anything but registers and constants.
1149 We've just taken care of the later. */
1151 return register_operand (op, Pmode);
1153 /* Otherwise we can allow any general_operand in the address. */
1154 return general_operand (op, Pmode);
1158 constant_call_address_operand (op, mode)
1160 enum machine_mode mode ATTRIBUTE_UNUSED;
1162 if (GET_CODE (op) == CONST
1163 && GET_CODE (XEXP (op, 0)) == PLUS
1164 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1165 op = XEXP (XEXP (op, 0), 0);
1166 return GET_CODE (op) == SYMBOL_REF;
1169 /* Match exactly zero and one. */
1172 const0_operand (op, mode)
1174 enum machine_mode mode;
1176 return op == CONST0_RTX (mode);
1180 const1_operand (op, mode)
1182 enum machine_mode mode ATTRIBUTE_UNUSED;
1184 return op == const1_rtx;
1187 /* Match 2, 4, or 8. Used for leal multiplicands. */
1190 const248_operand (op, mode)
1192 enum machine_mode mode ATTRIBUTE_UNUSED;
1194 return (GET_CODE (op) == CONST_INT
1195 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1198 /* True if this is a constant appropriate for an increment or decremenmt. */
1201 incdec_operand (op, mode)
1203 enum machine_mode mode;
1205 if (op == const1_rtx || op == constm1_rtx)
1207 if (GET_CODE (op) != CONST_INT)
1209 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1211 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1213 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1218 /* Return false if this is the stack pointer, or any other fake
1219 register eliminable to the stack pointer. Otherwise, this is
1222 This is used to prevent esp from being used as an index reg.
1223 Which would only happen in pathological cases. */
1226 reg_no_sp_operand (op, mode)
1228 enum machine_mode mode;
1231 if (GET_CODE (t) == SUBREG)
1233 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1236 return register_operand (op, mode);
1240 mmx_reg_operand (op, mode)
1242 enum machine_mode mode ATTRIBUTE_UNUSED;
1244 return MMX_REG_P (op);
1247 /* Return false if this is any eliminable register. Otherwise
1251 general_no_elim_operand (op, mode)
1253 enum machine_mode mode;
1256 if (GET_CODE (t) == SUBREG)
1258 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1259 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1260 || t == virtual_stack_dynamic_rtx)
1263 return general_operand (op, mode);
1266 /* Return false if this is any eliminable register. Otherwise
1267 register_operand or const_int. */
1270 nonmemory_no_elim_operand (op, mode)
1272 enum machine_mode mode;
1275 if (GET_CODE (t) == SUBREG)
1277 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1278 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1279 || t == virtual_stack_dynamic_rtx)
1282 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1285 /* Return true if op is a Q_REGS class register. */
1288 q_regs_operand (op, mode)
1290 enum machine_mode mode;
1292 if (mode != VOIDmode && GET_MODE (op) != mode)
1294 if (GET_CODE (op) == SUBREG)
1295 op = SUBREG_REG (op);
1296 return QI_REG_P (op);
1299 /* Return true if op is a NON_Q_REGS class register. */
1302 non_q_regs_operand (op, mode)
1304 enum machine_mode mode;
1306 if (mode != VOIDmode && GET_MODE (op) != mode)
1308 if (GET_CODE (op) == SUBREG)
1309 op = SUBREG_REG (op);
1310 return NON_QI_REG_P (op);
1313 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1316 sse_comparison_operator (op, mode)
1318 enum machine_mode mode ATTRIBUTE_UNUSED;
1320 enum rtx_code code = GET_CODE (op);
1321 return code == EQ || code == LT || code == LE || code == UNORDERED;
1323 /* Return 1 if OP is a valid comparison operator in valid mode. */
1325 ix86_comparison_operator (op, mode)
1327 enum machine_mode mode;
1329 enum machine_mode inmode;
1330 enum rtx_code code = GET_CODE (op);
1331 if (mode != VOIDmode && GET_MODE (op) != mode)
1333 if (GET_RTX_CLASS (code) != '<')
1335 inmode = GET_MODE (XEXP (op, 0));
1337 if (inmode == CCFPmode || inmode == CCFPUmode)
1339 enum rtx_code second_code, bypass_code;
1340 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1341 return (bypass_code == NIL && second_code == NIL);
1348 if (inmode == CCmode || inmode == CCGCmode
1349 || inmode == CCGOCmode || inmode == CCNOmode)
1352 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1353 if (inmode == CCmode)
1357 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1365 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1368 fcmov_comparison_operator (op, mode)
1370 enum machine_mode mode;
1372 enum machine_mode inmode;
1373 enum rtx_code code = GET_CODE (op);
1374 if (mode != VOIDmode && GET_MODE (op) != mode)
1376 if (GET_RTX_CLASS (code) != '<')
1378 inmode = GET_MODE (XEXP (op, 0));
1379 if (inmode == CCFPmode || inmode == CCFPUmode)
1381 enum rtx_code second_code, bypass_code;
1382 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1383 if (bypass_code != NIL || second_code != NIL)
1385 code = ix86_fp_compare_code_to_integer (code);
1387 /* i387 supports just limited amount of conditional codes. */
1390 case LTU: case GTU: case LEU: case GEU:
1391 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1394 case ORDERED: case UNORDERED:
1402 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1405 promotable_binary_operator (op, mode)
1407 enum machine_mode mode ATTRIBUTE_UNUSED;
1409 switch (GET_CODE (op))
1412 /* Modern CPUs have same latency for HImode and SImode multiply,
1413 but 386 and 486 do HImode multiply faster. */
1414 return ix86_cpu > PROCESSOR_I486;
1426 /* Nearly general operand, but accept any const_double, since we wish
1427 to be able to drop them into memory rather than have them get pulled
1431 cmp_fp_expander_operand (op, mode)
1433 enum machine_mode mode;
1435 if (mode != VOIDmode && mode != GET_MODE (op))
1437 if (GET_CODE (op) == CONST_DOUBLE)
1439 return general_operand (op, mode);
1442 /* Match an SI or HImode register for a zero_extract. */
1445 ext_register_operand (op, mode)
1447 enum machine_mode mode ATTRIBUTE_UNUSED;
1449 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1451 return register_operand (op, VOIDmode);
1454 /* Return 1 if this is a valid binary floating-point operation.
1455 OP is the expression matched, and MODE is its mode. */
1458 binary_fp_operator (op, mode)
1460 enum machine_mode mode;
1462 if (mode != VOIDmode && mode != GET_MODE (op))
1465 switch (GET_CODE (op))
1471 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1479 mult_operator(op, mode)
1481 enum machine_mode mode ATTRIBUTE_UNUSED;
1483 return GET_CODE (op) == MULT;
1487 div_operator(op, mode)
1489 enum machine_mode mode ATTRIBUTE_UNUSED;
1491 return GET_CODE (op) == DIV;
1495 arith_or_logical_operator (op, mode)
1497 enum machine_mode mode;
1499 return ((mode == VOIDmode || GET_MODE (op) == mode)
1500 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1501 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1504 /* Returns 1 if OP is memory operand with a displacement. */
1507 memory_displacement_operand (op, mode)
1509 enum machine_mode mode;
1511 struct ix86_address parts;
1513 if (! memory_operand (op, mode))
1516 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1519 return parts.disp != NULL_RTX;
1522 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1523 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1525 ??? It seems likely that this will only work because cmpsi is an
1526 expander, and no actual insns use this. */
1529 cmpsi_operand (op, mode)
1531 enum machine_mode mode;
1533 if (general_operand (op, mode))
1536 if (GET_CODE (op) == AND
1537 && GET_MODE (op) == SImode
1538 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1539 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1540 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1541 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1542 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1543 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1549 /* Returns 1 if OP is memory operand that can not be represented by the
1553 long_memory_operand (op, mode)
1555 enum machine_mode mode;
1557 if (! memory_operand (op, mode))
1560 return memory_address_length (op) != 0;
1563 /* Return nonzero if the rtx is known aligned. */
1566 aligned_operand (op, mode)
1568 enum machine_mode mode;
1570 struct ix86_address parts;
1572 if (!general_operand (op, mode))
1575 /* Registers and immediate operands are always "aligned". */
1576 if (GET_CODE (op) != MEM)
1579 /* Don't even try to do any aligned optimizations with volatiles. */
1580 if (MEM_VOLATILE_P (op))
1585 /* Pushes and pops are only valid on the stack pointer. */
1586 if (GET_CODE (op) == PRE_DEC
1587 || GET_CODE (op) == POST_INC)
1590 /* Decode the address. */
1591 if (! ix86_decompose_address (op, &parts))
1594 /* Look for some component that isn't known to be aligned. */
1598 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1603 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1608 if (GET_CODE (parts.disp) != CONST_INT
1609 || (INTVAL (parts.disp) & 3) != 0)
1613 /* Didn't find one -- this must be an aligned address. */
1617 /* Return true if the constant is something that can be loaded with
1618 a special instruction. Only handle 0.0 and 1.0; others are less
1622 standard_80387_constant_p (x)
1625 if (GET_CODE (x) != CONST_DOUBLE)
1628 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1634 if (setjmp (handler))
1637 set_float_handler (handler);
1638 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1639 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1640 is1 = REAL_VALUES_EQUAL (d, dconst1);
1641 set_float_handler (NULL_PTR);
1649 /* Note that on the 80387, other constants, such as pi,
1650 are much slower to load as standard constants
1651 than to load from doubles in memory! */
1652 /* ??? Not true on K6: all constants are equal cost. */
1659 /* Returns 1 if OP contains a symbol reference */
1662 symbolic_reference_mentioned_p (op)
1665 register const char *fmt;
1668 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1671 fmt = GET_RTX_FORMAT (GET_CODE (op));
1672 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1678 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1679 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1683 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1690 /* Return 1 if it is appropriate to emit `ret' instructions in the
1691 body of a function. Do this only if the epilogue is simple, needing a
1692 couple of insns. Prior to reloading, we can't tell how many registers
1693 must be saved, so return 0 then. Return 0 if there is no frame
1694 marker to de-allocate.
1696 If NON_SAVING_SETJMP is defined and true, then it is not possible
1697 for the epilogue to be simple, so return 0. This is a special case
1698 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1699 until final, but jump_optimize may need to know sooner if a
1703 ix86_can_use_return_insn_p ()
1705 struct ix86_frame frame;
1707 #ifdef NON_SAVING_SETJMP
1708 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1711 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1712 if (profile_block_flag == 2)
1716 if (! reload_completed || frame_pointer_needed)
1719 /* Don't allow more than 32 pop, since that's all we can do
1720 with one instruction. */
1721 if (current_function_pops_args
1722 && current_function_args_size >= 32768)
1725 ix86_compute_frame_layout (&frame);
1726 return frame.to_allocate == 0 && frame.nregs == 0;
1729 /* Value should be nonzero if functions must have frame pointers.
1730 Zero means the frame pointer need not be set up (and parms may
1731 be accessed via the stack pointer) in functions that seem suitable. */
1734 ix86_frame_pointer_required ()
1736 /* If we accessed previous frames, then the generated code expects
1737 to be able to access the saved ebp value in our frame. */
1738 if (cfun->machine->accesses_prev_frame)
1741 /* Several x86 os'es need a frame pointer for other reasons,
1742 usually pertaining to setjmp. */
1743 if (SUBTARGET_FRAME_POINTER_REQUIRED)
1746 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
1747 the frame pointer by default. Turn it back on now if we've not
1748 got a leaf function. */
1749 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
1755 /* Record that the current function accesses previous call frames. */
1758 ix86_setup_frame_addresses ()
1760 cfun->machine->accesses_prev_frame = 1;
1763 static char pic_label_name[32];
1765 /* This function generates code for -fpic that loads %ebx with
1766 the return address of the caller and then returns. */
1769 ix86_asm_file_end (file)
1774 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
1777 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
1778 to updating relocations to a section being discarded such that this
1779 doesn't work. Ought to detect this at configure time. */
1780 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
1781 /* The trick here is to create a linkonce section containing the
1782 pic label thunk, but to refer to it with an internal label.
1783 Because the label is internal, we don't have inter-dso name
1784 binding issues on hosts that don't support ".hidden".
1786 In order to use these macros, however, we must create a fake
1789 tree decl = build_decl (FUNCTION_DECL,
1790 get_identifier ("i686.get_pc_thunk"),
1792 DECL_ONE_ONLY (decl) = 1;
1793 UNIQUE_SECTION (decl, 0);
1794 named_section (decl, NULL, 0);
1800 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1801 internal (non-global) label that's being emitted, it didn't make
1802 sense to have .type information for local labels. This caused
1803 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1804 me debug info for a label that you're declaring non-global?) this
1805 was changed to call ASM_OUTPUT_LABEL() instead. */
1807 ASM_OUTPUT_LABEL (file, pic_label_name);
1809 xops[0] = pic_offset_table_rtx;
1810 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
1811 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1812 output_asm_insn ("ret", xops);
1816 load_pic_register ()
1820 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1822 if (TARGET_DEEP_BRANCH_PREDICTION)
1824 if (! pic_label_name[0])
1825 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1826 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1830 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1833 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1835 if (! TARGET_DEEP_BRANCH_PREDICTION)
1836 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1838 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1841 /* Generate an SImode "push" pattern for input ARG. */
1847 return gen_rtx_SET (VOIDmode,
1848 gen_rtx_MEM (SImode,
1849 gen_rtx_PRE_DEC (SImode,
1850 stack_pointer_rtx)),
1854 /* Return 1 if we need to save REGNO. */
1856 ix86_save_reg (regno)
1859 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1860 || current_function_uses_const_pool);
1861 return ((regs_ever_live[regno] && !call_used_regs[regno]
1862 && !fixed_regs[regno]
1863 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
1864 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used));
1868 /* Return number of registers to be saved on the stack. */
1876 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1877 if (ix86_save_reg (regno))
1882 /* Return the offset between two registers, one to be eliminated, and the other
1883 its replacement, at the start of a routine. */
1886 ix86_initial_elimination_offset (from, to)
1890 struct ix86_frame frame;
1891 ix86_compute_frame_layout (&frame);
1893 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1894 return frame.hard_frame_pointer_offset;
1895 else if (from == FRAME_POINTER_REGNUM
1896 && to == HARD_FRAME_POINTER_REGNUM)
1897 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
1900 if (to != STACK_POINTER_REGNUM)
1902 else if (from == ARG_POINTER_REGNUM)
1903 return frame.stack_pointer_offset;
1904 else if (from != FRAME_POINTER_REGNUM)
1907 return frame.stack_pointer_offset - frame.frame_pointer_offset;
1911 /* Fill structure ix86_frame about frame of currently computed function. */
1914 ix86_compute_frame_layout (frame)
1915 struct ix86_frame *frame;
1917 HOST_WIDE_INT total_size;
1918 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1920 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1921 HOST_WIDE_INT size = get_frame_size ();
1923 frame->nregs = ix86_nsaved_regs ();
1926 /* Skip return value and save base pointer. */
1927 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
1929 frame->hard_frame_pointer_offset = offset;
1931 /* Do some sanity checking of stack_alignment_needed and
1932 preferred_alignment, since i386 port is the only using those features
1933 that may break easilly. */
1935 if (size && !stack_alignment_needed)
1937 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1939 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1941 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1944 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
1945 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
1947 /* Register save area */
1948 offset += frame->nregs * UNITS_PER_WORD;
1950 /* Align start of frame for local function. */
1951 frame->padding1 = ((offset + stack_alignment_needed - 1)
1952 & -stack_alignment_needed) - offset;
1954 offset += frame->padding1;
1956 /* Frame pointer points here. */
1957 frame->frame_pointer_offset = offset;
1961 /* Add outgoing arguments area. */
1962 if (ACCUMULATE_OUTGOING_ARGS)
1964 offset += current_function_outgoing_args_size;
1965 frame->outgoing_arguments_size = current_function_outgoing_args_size;
1968 frame->outgoing_arguments_size = 0;
1970 /* Align stack boundary. */
1971 frame->padding2 = ((offset + preferred_alignment - 1)
1972 & -preferred_alignment) - offset;
1974 offset += frame->padding2;
1976 /* We've reached end of stack frame. */
1977 frame->stack_pointer_offset = offset;
1979 /* Size prologue needs to allocate. */
1980 frame->to_allocate =
1981 (size + frame->padding1 + frame->padding2
1982 + frame->outgoing_arguments_size);
1985 fprintf (stderr, "nregs: %i\n", frame->nregs);
1986 fprintf (stderr, "size: %i\n", size);
1987 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
1988 fprintf (stderr, "padding1: %i\n", frame->padding1);
1989 fprintf (stderr, "padding2: %i\n", frame->padding2);
1990 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
1991 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
1992 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
1993 frame->hard_frame_pointer_offset);
1994 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
1998 /* Emit code to save registers in the prologue. */
2001 ix86_emit_save_regs ()
2006 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2007 if (ix86_save_reg (regno))
2009 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
2010 RTX_FRAME_RELATED_P (insn) = 1;
2014 /* Expand the prologue into a bunch of separate insns. */
2017 ix86_expand_prologue ()
2020 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2021 || current_function_uses_const_pool);
2022 struct ix86_frame frame;
2024 ix86_compute_frame_layout (&frame);
2026 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2027 slower on all targets. Also sdb doesn't like it. */
2029 if (frame_pointer_needed)
2031 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
2032 RTX_FRAME_RELATED_P (insn) = 1;
2034 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2035 RTX_FRAME_RELATED_P (insn) = 1;
2038 ix86_emit_save_regs ();
2040 if (frame.to_allocate == 0)
2042 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
2044 if (frame_pointer_needed)
2045 insn = emit_insn (gen_pro_epilogue_adjust_stack
2046 (stack_pointer_rtx, stack_pointer_rtx,
2047 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
2049 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2050 GEN_INT (-frame.to_allocate)));
2051 RTX_FRAME_RELATED_P (insn) = 1;
2055 /* ??? Is this only valid for Win32? */
2059 arg0 = gen_rtx_REG (SImode, 0);
2060 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
2062 sym = gen_rtx_MEM (FUNCTION_MODE,
2063 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2064 insn = emit_call_insn (gen_call (sym, const0_rtx));
2066 CALL_INSN_FUNCTION_USAGE (insn)
2067 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2068 CALL_INSN_FUNCTION_USAGE (insn));
2071 #ifdef SUBTARGET_PROLOGUE
2076 load_pic_register ();
2078 /* If we are profiling, make sure no instructions are scheduled before
2079 the call to mcount. However, if -fpic, the above call will have
2081 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2082 emit_insn (gen_blockage ());
2085 /* Emit code to add TSIZE to esp value. Use POP instruction when
2089 ix86_emit_epilogue_esp_adjustment (tsize)
2092 /* If a frame pointer is present, we must be sure to tie the sp
2093 to the fp so that we don't mis-schedule. */
2094 if (frame_pointer_needed)
2095 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2098 hard_frame_pointer_rtx));
2100 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2104 /* Emit code to restore saved registers using MOV insns. First register
2105 is restored from POINTER + OFFSET. */
2107 ix86_emit_restore_regs_using_mov (pointer, offset)
2113 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2114 if (ix86_save_reg (regno))
2116 emit_move_insn (gen_rtx_REG (Pmode, regno),
2117 adj_offsettable_operand (gen_rtx_MEM (Pmode,
2120 offset += UNITS_PER_WORD;
2124 /* Restore function stack, frame, and registers. */
2127 ix86_expand_epilogue (emit_return)
2131 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2132 struct ix86_frame frame;
2133 HOST_WIDE_INT offset;
2135 ix86_compute_frame_layout (&frame);
2137 /* Calculate start of saved registers relative to ebp. */
2138 offset = -frame.nregs * UNITS_PER_WORD;
2140 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2141 if (profile_block_flag == 2)
2143 FUNCTION_BLOCK_PROFILER_EXIT;
2147 /* If we're only restoring one register and sp is not valid then
2148 using a move instruction to restore the register since it's
2149 less work than reloading sp and popping the register.
2151 The default code result in stack adjustment using add/lea instruction,
2152 while this code results in LEAVE instruction (or discrete equivalent),
2153 so it is profitable in some other cases as well. Especially when there
2154 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2155 and there is exactly one register to pop. This heruistic may need some
2156 tuning in future. */
2157 if ((!sp_valid && frame.nregs <= 1)
2158 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2159 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2160 && frame.nregs == 1))
2162 /* Restore registers. We can use ebp or esp to address the memory
2163 locations. If both are available, default to ebp, since offsets
2164 are known to be small. Only exception is esp pointing directly to the
2165 end of block of saved registers, where we may simplify addressing
2168 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2169 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2171 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2173 if (!frame_pointer_needed)
2174 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2175 + frame.nregs * UNITS_PER_WORD);
2176 /* If not an i386, mov & pop is faster than "leave". */
2177 else if (TARGET_USE_LEAVE || optimize_size)
2178 emit_insn (gen_leave ());
2181 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2182 hard_frame_pointer_rtx,
2184 hard_frame_pointer_rtx));
2185 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2190 /* First step is to deallocate the stack frame so that we can
2191 pop the registers. */
2194 if (!frame_pointer_needed)
2196 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2197 hard_frame_pointer_rtx,
2199 hard_frame_pointer_rtx));
2201 else if (frame.to_allocate)
2202 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
2204 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2205 if (ix86_save_reg (regno))
2206 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2207 if (frame_pointer_needed)
2208 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2211 /* Sibcall epilogues don't want a return instruction. */
2215 if (current_function_pops_args && current_function_args_size)
2217 rtx popc = GEN_INT (current_function_pops_args);
2219 /* i386 can only pop 64K bytes. If asked to pop more, pop
2220 return address, do explicit add, and jump indirectly to the
2223 if (current_function_pops_args >= 65536)
2225 rtx ecx = gen_rtx_REG (SImode, 2);
2227 emit_insn (gen_popsi1 (ecx));
2228 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2229 emit_jump_insn (gen_return_indirect_internal (ecx));
2232 emit_jump_insn (gen_return_pop_internal (popc));
2235 emit_jump_insn (gen_return_internal ());
2238 /* Extract the parts of an RTL expression that is a valid memory address
2239 for an instruction. Return false if the structure of the address is
2243 ix86_decompose_address (addr, out)
2245 struct ix86_address *out;
2247 rtx base = NULL_RTX;
2248 rtx index = NULL_RTX;
2249 rtx disp = NULL_RTX;
2250 HOST_WIDE_INT scale = 1;
2251 rtx scale_rtx = NULL_RTX;
2253 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2255 else if (GET_CODE (addr) == PLUS)
2257 rtx op0 = XEXP (addr, 0);
2258 rtx op1 = XEXP (addr, 1);
2259 enum rtx_code code0 = GET_CODE (op0);
2260 enum rtx_code code1 = GET_CODE (op1);
2262 if (code0 == REG || code0 == SUBREG)
2264 if (code1 == REG || code1 == SUBREG)
2265 index = op0, base = op1; /* index + base */
2267 base = op0, disp = op1; /* base + displacement */
2269 else if (code0 == MULT)
2271 index = XEXP (op0, 0);
2272 scale_rtx = XEXP (op0, 1);
2273 if (code1 == REG || code1 == SUBREG)
2274 base = op1; /* index*scale + base */
2276 disp = op1; /* index*scale + disp */
2278 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2280 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2281 scale_rtx = XEXP (XEXP (op0, 0), 1);
2282 base = XEXP (op0, 1);
2285 else if (code0 == PLUS)
2287 index = XEXP (op0, 0); /* index + base + disp */
2288 base = XEXP (op0, 1);
2294 else if (GET_CODE (addr) == MULT)
2296 index = XEXP (addr, 0); /* index*scale */
2297 scale_rtx = XEXP (addr, 1);
2299 else if (GET_CODE (addr) == ASHIFT)
2303 /* We're called for lea too, which implements ashift on occasion. */
2304 index = XEXP (addr, 0);
2305 tmp = XEXP (addr, 1);
2306 if (GET_CODE (tmp) != CONST_INT)
2308 scale = INTVAL (tmp);
2309 if ((unsigned HOST_WIDE_INT) scale > 3)
2314 disp = addr; /* displacement */
2316 /* Extract the integral value of scale. */
2319 if (GET_CODE (scale_rtx) != CONST_INT)
2321 scale = INTVAL (scale_rtx);
2324 /* Allow arg pointer and stack pointer as index if there is not scaling */
2325 if (base && index && scale == 1
2326 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2327 || index == stack_pointer_rtx))
2334 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2335 if ((base == hard_frame_pointer_rtx
2336 || base == frame_pointer_rtx
2337 || base == arg_pointer_rtx) && !disp)
2340 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2341 Avoid this by transforming to [%esi+0]. */
2342 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2343 && base && !index && !disp
2345 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2348 /* Special case: encode reg+reg instead of reg*2. */
2349 if (!base && index && scale && scale == 2)
2350 base = index, scale = 1;
2352 /* Special case: scaling cannot be encoded without base or displacement. */
2353 if (!base && !disp && index && scale != 1)
2364 /* Return cost of the memory address x.
2365 For i386, it is better to use a complex address than let gcc copy
2366 the address into a reg and make a new pseudo. But not if the address
2367 requires to two regs - that would mean more pseudos with longer
2370 ix86_address_cost (x)
2373 struct ix86_address parts;
2376 if (!ix86_decompose_address (x, &parts))
2379 /* More complex memory references are better. */
2380 if (parts.disp && parts.disp != const0_rtx)
2383 /* Attempt to minimize number of registers in the address. */
2385 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2387 && (!REG_P (parts.index)
2388 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2392 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2394 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2395 && parts.base != parts.index)
2398 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2399 since it's predecode logic can't detect the length of instructions
2400 and it degenerates to vector decoded. Increase cost of such
2401 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2402 to split such addresses or even refuse such addresses at all.
2404 Following addressing modes are affected:
2409 The first and last case may be avoidable by explicitly coding the zero in
2410 memory address, but I don't have AMD-K6 machine handy to check this
2414 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2415 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2416 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2422 /* If X is a machine specific address (i.e. a symbol or label being
2423 referenced as a displacement from the GOT implemented using an
2424 UNSPEC), then return the base term. Otherwise return X. */
2427 ix86_find_base_term (x)
2432 if (GET_CODE (x) != PLUS
2433 || XEXP (x, 0) != pic_offset_table_rtx
2434 || GET_CODE (XEXP (x, 1)) != CONST)
2437 term = XEXP (XEXP (x, 1), 0);
2439 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2440 term = XEXP (term, 0);
2442 if (GET_CODE (term) != UNSPEC
2443 || XVECLEN (term, 0) != 1
2444 || XINT (term, 1) != 7)
2447 term = XVECEXP (term, 0, 0);
2449 if (GET_CODE (term) != SYMBOL_REF
2450 && GET_CODE (term) != LABEL_REF)
2456 /* Determine if a given CONST RTX is a valid memory displacement
2460 legitimate_pic_address_disp_p (disp)
2463 if (GET_CODE (disp) != CONST)
2465 disp = XEXP (disp, 0);
2467 if (GET_CODE (disp) == PLUS)
2469 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2471 disp = XEXP (disp, 0);
2474 if (GET_CODE (disp) != UNSPEC
2475 || XVECLEN (disp, 0) != 1)
2478 /* Must be @GOT or @GOTOFF. */
2479 if (XINT (disp, 1) != 6
2480 && XINT (disp, 1) != 7)
2483 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2484 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2490 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2491 memory address for an instruction. The MODE argument is the machine mode
2492 for the MEM expression that wants to use this address.
2494 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2495 convert common non-canonical forms to canonical form so that they will
2499 legitimate_address_p (mode, addr, strict)
2500 enum machine_mode mode;
2504 struct ix86_address parts;
2505 rtx base, index, disp;
2506 HOST_WIDE_INT scale;
2507 const char *reason = NULL;
2508 rtx reason_rtx = NULL_RTX;
2510 if (TARGET_DEBUG_ADDR)
2513 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2514 GET_MODE_NAME (mode), strict);
2518 if (! ix86_decompose_address (addr, &parts))
2520 reason = "decomposition failed";
2525 index = parts.index;
2527 scale = parts.scale;
2529 /* Validate base register.
2531 Don't allow SUBREG's here, it can lead to spill failures when the base
2532 is one word out of a two word structure, which is represented internally
2539 if (GET_CODE (base) != REG)
2541 reason = "base is not a register";
2545 if (GET_MODE (base) != Pmode)
2547 reason = "base is not in Pmode";
2551 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2552 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2554 reason = "base is not valid";
2559 /* Validate index register.
2561 Don't allow SUBREG's here, it can lead to spill failures when the index
2562 is one word out of a two word structure, which is represented internally
2569 if (GET_CODE (index) != REG)
2571 reason = "index is not a register";
2575 if (GET_MODE (index) != Pmode)
2577 reason = "index is not in Pmode";
2581 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2582 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2584 reason = "index is not valid";
2589 /* Validate scale factor. */
2592 reason_rtx = GEN_INT (scale);
2595 reason = "scale without index";
2599 if (scale != 2 && scale != 4 && scale != 8)
2601 reason = "scale is not a valid multiplier";
2606 /* Validate displacement. */
2611 if (!CONSTANT_ADDRESS_P (disp))
2613 reason = "displacement is not constant";
2617 if (GET_CODE (disp) == CONST_DOUBLE)
2619 reason = "displacement is a const_double";
2623 if (flag_pic && SYMBOLIC_CONST (disp))
2625 if (! legitimate_pic_address_disp_p (disp))
2627 reason = "displacement is an invalid pic construct";
2631 /* This code used to verify that a symbolic pic displacement
2632 includes the pic_offset_table_rtx register.
2634 While this is good idea, unfortunately these constructs may
2635 be created by "adds using lea" optimization for incorrect
2644 This code is nonsensical, but results in addressing
2645 GOT table with pic_offset_table_rtx base. We can't
2646 just refuse it easilly, since it gets matched by
2647 "addsi3" pattern, that later gets split to lea in the
2648 case output register differs from input. While this
2649 can be handled by separate addsi pattern for this case
2650 that never results in lea, this seems to be easier and
2651 correct fix for crash to disable this test. */
2653 else if (HALF_PIC_P ())
2655 if (! HALF_PIC_ADDRESS_P (disp)
2656 || (base != NULL_RTX || index != NULL_RTX))
2658 reason = "displacement is an invalid half-pic reference";
2664 /* Everything looks valid. */
2665 if (TARGET_DEBUG_ADDR)
2666 fprintf (stderr, "Success.\n");
2670 if (TARGET_DEBUG_ADDR)
2672 fprintf (stderr, "Error: %s\n", reason);
2673 debug_rtx (reason_rtx);
2678 /* Return an unique alias set for the GOT. */
2680 static HOST_WIDE_INT
2681 ix86_GOT_alias_set ()
2683 static HOST_WIDE_INT set = -1;
2685 set = new_alias_set ();
2689 /* Return a legitimate reference for ORIG (an address) using the
2690 register REG. If REG is 0, a new pseudo is generated.
2692 There are two types of references that must be handled:
2694 1. Global data references must load the address from the GOT, via
2695 the PIC reg. An insn is emitted to do this load, and the reg is
2698 2. Static data references, constant pool addresses, and code labels
2699 compute the address as an offset from the GOT, whose base is in
2700 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2701 differentiate them from global data objects. The returned
2702 address is the PIC reg + an unspec constant.
2704 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2705 reg also appears in the address. */
2708 legitimize_pic_address (orig, reg)
2716 if (GET_CODE (addr) == LABEL_REF
2717 || (GET_CODE (addr) == SYMBOL_REF
2718 && (CONSTANT_POOL_ADDRESS_P (addr)
2719 || SYMBOL_REF_FLAG (addr))))
2721 /* This symbol may be referenced via a displacement from the PIC
2722 base address (@GOTOFF). */
2724 current_function_uses_pic_offset_table = 1;
2725 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2726 new = gen_rtx_CONST (Pmode, new);
2727 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2731 emit_move_insn (reg, new);
2735 else if (GET_CODE (addr) == SYMBOL_REF)
2737 /* This symbol must be referenced via a load from the
2738 Global Offset Table (@GOT). */
2740 current_function_uses_pic_offset_table = 1;
2741 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2742 new = gen_rtx_CONST (Pmode, new);
2743 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2744 new = gen_rtx_MEM (Pmode, new);
2745 RTX_UNCHANGING_P (new) = 1;
2746 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2749 reg = gen_reg_rtx (Pmode);
2750 emit_move_insn (reg, new);
2755 if (GET_CODE (addr) == CONST)
2757 addr = XEXP (addr, 0);
2758 if (GET_CODE (addr) == UNSPEC)
2760 /* Check that the unspec is one of the ones we generate? */
2762 else if (GET_CODE (addr) != PLUS)
2765 if (GET_CODE (addr) == PLUS)
2767 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2769 /* Check first to see if this is a constant offset from a @GOTOFF
2770 symbol reference. */
2771 if ((GET_CODE (op0) == LABEL_REF
2772 || (GET_CODE (op0) == SYMBOL_REF
2773 && (CONSTANT_POOL_ADDRESS_P (op0)
2774 || SYMBOL_REF_FLAG (op0))))
2775 && GET_CODE (op1) == CONST_INT)
2777 current_function_uses_pic_offset_table = 1;
2778 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2779 new = gen_rtx_PLUS (Pmode, new, op1);
2780 new = gen_rtx_CONST (Pmode, new);
2781 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2785 emit_move_insn (reg, new);
2791 base = legitimize_pic_address (XEXP (addr, 0), reg);
2792 new = legitimize_pic_address (XEXP (addr, 1),
2793 base == reg ? NULL_RTX : reg);
2795 if (GET_CODE (new) == CONST_INT)
2796 new = plus_constant (base, INTVAL (new));
2799 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2801 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2802 new = XEXP (new, 1);
2804 new = gen_rtx_PLUS (Pmode, base, new);
2812 /* Try machine-dependent ways of modifying an illegitimate address
2813 to be legitimate. If we find one, return the new, valid address.
2814 This macro is used in only one place: `memory_address' in explow.c.
2816 OLDX is the address as it was before break_out_memory_refs was called.
2817 In some cases it is useful to look at this to decide what needs to be done.
2819 MODE and WIN are passed so that this macro can use
2820 GO_IF_LEGITIMATE_ADDRESS.
2822 It is always safe for this macro to do nothing. It exists to recognize
2823 opportunities to optimize the output.
2825 For the 80386, we handle X+REG by loading X into a register R and
2826 using R+REG. R will go in a general reg and indexing will be used.
2827 However, if REG is a broken-out memory address or multiplication,
2828 nothing needs to be done because REG can certainly go in a general reg.
2830 When -fpic is used, special handling is needed for symbolic references.
2831 See comments by legitimize_pic_address in i386.c for details. */
2834 legitimize_address (x, oldx, mode)
2836 register rtx oldx ATTRIBUTE_UNUSED;
2837 enum machine_mode mode;
2842 if (TARGET_DEBUG_ADDR)
2844 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2845 GET_MODE_NAME (mode));
2849 if (flag_pic && SYMBOLIC_CONST (x))
2850 return legitimize_pic_address (x, 0);
2852 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2853 if (GET_CODE (x) == ASHIFT
2854 && GET_CODE (XEXP (x, 1)) == CONST_INT
2855 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2858 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2859 GEN_INT (1 << log));
2862 if (GET_CODE (x) == PLUS)
2864 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2866 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2867 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2868 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2871 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2872 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2873 GEN_INT (1 << log));
2876 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2877 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2878 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2881 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2882 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2883 GEN_INT (1 << log));
2886 /* Put multiply first if it isn't already. */
2887 if (GET_CODE (XEXP (x, 1)) == MULT)
2889 rtx tmp = XEXP (x, 0);
2890 XEXP (x, 0) = XEXP (x, 1);
2895 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2896 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2897 created by virtual register instantiation, register elimination, and
2898 similar optimizations. */
2899 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2902 x = gen_rtx_PLUS (Pmode,
2903 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2904 XEXP (XEXP (x, 1), 0)),
2905 XEXP (XEXP (x, 1), 1));
2909 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2910 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2911 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2912 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2913 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2914 && CONSTANT_P (XEXP (x, 1)))
2917 rtx other = NULL_RTX;
2919 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2921 constant = XEXP (x, 1);
2922 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2924 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2926 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2927 other = XEXP (x, 1);
2935 x = gen_rtx_PLUS (Pmode,
2936 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2937 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2938 plus_constant (other, INTVAL (constant)));
2942 if (changed && legitimate_address_p (mode, x, FALSE))
2945 if (GET_CODE (XEXP (x, 0)) == MULT)
2948 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2951 if (GET_CODE (XEXP (x, 1)) == MULT)
2954 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2958 && GET_CODE (XEXP (x, 1)) == REG
2959 && GET_CODE (XEXP (x, 0)) == REG)
2962 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2965 x = legitimize_pic_address (x, 0);
2968 if (changed && legitimate_address_p (mode, x, FALSE))
2971 if (GET_CODE (XEXP (x, 0)) == REG)
2973 register rtx temp = gen_reg_rtx (Pmode);
2974 register rtx val = force_operand (XEXP (x, 1), temp);
2976 emit_move_insn (temp, val);
2982 else if (GET_CODE (XEXP (x, 1)) == REG)
2984 register rtx temp = gen_reg_rtx (Pmode);
2985 register rtx val = force_operand (XEXP (x, 0), temp);
2987 emit_move_insn (temp, val);
2997 /* Print an integer constant expression in assembler syntax. Addition
2998 and subtraction are the only arithmetic that may appear in these
2999 expressions. FILE is the stdio stream to write to, X is the rtx, and
3000 CODE is the operand print code from the output string. */
3003 output_pic_addr_const (file, x, code)
3010 switch (GET_CODE (x))
3020 assemble_name (file, XSTR (x, 0));
3021 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3022 fputs ("@PLT", file);
3029 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3030 assemble_name (asm_out_file, buf);
3034 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3038 /* This used to output parentheses around the expression,
3039 but that does not work on the 386 (either ATT or BSD assembler). */
3040 output_pic_addr_const (file, XEXP (x, 0), code);
3044 if (GET_MODE (x) == VOIDmode)
3046 /* We can use %d if the number is <32 bits and positive. */
3047 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
3048 fprintf (file, "0x%lx%08lx",
3049 (unsigned long) CONST_DOUBLE_HIGH (x),
3050 (unsigned long) CONST_DOUBLE_LOW (x));
3052 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
3055 /* We can't handle floating point constants;
3056 PRINT_OPERAND must handle them. */
3057 output_operand_lossage ("floating constant misused");
3061 /* Some assemblers need integer constants to appear first. */
3062 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3064 output_pic_addr_const (file, XEXP (x, 0), code);
3066 output_pic_addr_const (file, XEXP (x, 1), code);
3068 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3070 output_pic_addr_const (file, XEXP (x, 1), code);
3072 output_pic_addr_const (file, XEXP (x, 0), code);
3079 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3080 output_pic_addr_const (file, XEXP (x, 0), code);
3082 output_pic_addr_const (file, XEXP (x, 1), code);
3083 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3087 if (XVECLEN (x, 0) != 1)
3089 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3090 switch (XINT (x, 1))
3093 fputs ("@GOT", file);
3096 fputs ("@GOTOFF", file);
3099 fputs ("@PLT", file);
3102 output_operand_lossage ("invalid UNSPEC as operand");
3108 output_operand_lossage ("invalid expression as operand");
3112 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3113 We need to handle our special PIC relocations. */
3116 i386_dwarf_output_addr_const (file, x)
3120 fprintf (file, "%s", INT_ASM_OP);
3122 output_pic_addr_const (file, x, '\0');
3124 output_addr_const (file, x);
3128 /* In the name of slightly smaller debug output, and to cater to
3129 general assembler losage, recognize PIC+GOTOFF and turn it back
3130 into a direct symbol reference. */
3133 i386_simplify_dwarf_addr (orig_x)
3138 if (GET_CODE (x) != PLUS
3139 || GET_CODE (XEXP (x, 0)) != REG
3140 || GET_CODE (XEXP (x, 1)) != CONST)
3143 x = XEXP (XEXP (x, 1), 0);
3144 if (GET_CODE (x) == UNSPEC
3145 && (XINT (x, 1) == 6
3146 || XINT (x, 1) == 7))
3147 return XVECEXP (x, 0, 0);
3149 if (GET_CODE (x) == PLUS
3150 && GET_CODE (XEXP (x, 0)) == UNSPEC
3151 && GET_CODE (XEXP (x, 1)) == CONST_INT
3152 && (XINT (XEXP (x, 0), 1) == 6
3153 || XINT (XEXP (x, 0), 1) == 7))
3154 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3160 put_condition_code (code, mode, reverse, fp, file)
3162 enum machine_mode mode;
3168 if (mode == CCFPmode || mode == CCFPUmode)
3170 enum rtx_code second_code, bypass_code;
3171 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3172 if (bypass_code != NIL || second_code != NIL)
3174 code = ix86_fp_compare_code_to_integer (code);
3178 code = reverse_condition (code);
3189 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3194 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3195 Those same assemblers have the same but opposite losage on cmov. */
3198 suffix = fp ? "nbe" : "a";
3201 if (mode == CCNOmode || mode == CCGOCmode)
3203 else if (mode == CCmode || mode == CCGCmode)
3214 if (mode == CCNOmode || mode == CCGOCmode)
3216 else if (mode == CCmode || mode == CCGCmode)
3225 suffix = fp ? "nb" : "ae";
3228 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3238 suffix = fp ? "u" : "p";
3241 suffix = fp ? "nu" : "np";
3246 fputs (suffix, file);
3250 print_reg (x, code, file)
3255 if (REGNO (x) == ARG_POINTER_REGNUM
3256 || REGNO (x) == FRAME_POINTER_REGNUM
3257 || REGNO (x) == FLAGS_REG
3258 || REGNO (x) == FPSR_REG)
3261 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3266 else if (code == 'b')
3268 else if (code == 'k')
3270 else if (code == 'y')
3272 else if (code == 'h')
3274 else if (code == 'm' || MMX_REG_P (x))
3277 code = GET_MODE_SIZE (GET_MODE (x));
3282 fputs (hi_reg_name[REGNO (x)], file);
3285 if (STACK_TOP_P (x))
3287 fputs ("st(0)", file);
3294 if (! ANY_FP_REG_P (x))
3299 fputs (hi_reg_name[REGNO (x)], file);
3302 fputs (qi_reg_name[REGNO (x)], file);
3305 fputs (qi_high_reg_name[REGNO (x)], file);
3313 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3314 C -- print opcode suffix for set/cmov insn.
3315 c -- like C, but print reversed condition
3316 R -- print the prefix for register names.
3317 z -- print the opcode suffix for the size of the current operand.
3318 * -- print a star (in certain assembler syntax)
3319 A -- print an absolute memory reference.
3320 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3321 s -- print a shift double count, followed by the assemblers argument
3323 b -- print the QImode name of the register for the indicated operand.
3324 %b0 would print %al if operands[0] is reg 0.
3325 w -- likewise, print the HImode name of the register.
3326 k -- likewise, print the SImode name of the register.
3327 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3328 y -- print "st(0)" instead of "st" as a register.
3329 m -- print "st(n)" as an mmx register. */
3332 print_operand (file, x, code)
3342 if (ASSEMBLER_DIALECT == 0)
3347 if (ASSEMBLER_DIALECT == 0)
3349 else if (ASSEMBLER_DIALECT == 1)
3351 /* Intel syntax. For absolute addresses, registers should not
3352 be surrounded by braces. */
3353 if (GET_CODE (x) != REG)
3356 PRINT_OPERAND (file, x, 0);
3362 PRINT_OPERAND (file, x, 0);
3367 if (ASSEMBLER_DIALECT == 0)
3372 if (ASSEMBLER_DIALECT == 0)
3377 if (ASSEMBLER_DIALECT == 0)
3382 if (ASSEMBLER_DIALECT == 0)
3387 if (ASSEMBLER_DIALECT == 0)
3392 if (ASSEMBLER_DIALECT == 0)
3397 /* 387 opcodes don't get size suffixes if the operands are
3400 if (STACK_REG_P (x))
3403 /* this is the size of op from size of operand */
3404 switch (GET_MODE_SIZE (GET_MODE (x)))
3407 #ifdef HAVE_GAS_FILDS_FISTS
3413 if (GET_MODE (x) == SFmode)
3428 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3430 #ifdef GAS_MNEMONICS
3456 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3458 PRINT_OPERAND (file, x, 0);
3464 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3467 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3470 /* Like above, but reverse condition */
3472 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3475 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3481 sprintf (str, "invalid operand code `%c'", code);
3482 output_operand_lossage (str);
3487 if (GET_CODE (x) == REG)
3489 PRINT_REG (x, code, file);
3492 else if (GET_CODE (x) == MEM)
3494 /* No `byte ptr' prefix for call instructions. */
3495 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3498 switch (GET_MODE_SIZE (GET_MODE (x)))
3500 case 1: size = "BYTE"; break;
3501 case 2: size = "WORD"; break;
3502 case 4: size = "DWORD"; break;
3503 case 8: size = "QWORD"; break;
3504 case 12: size = "XWORD"; break;
3505 case 16: size = "XMMWORD"; break;
3510 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3513 else if (code == 'w')
3515 else if (code == 'k')
3519 fputs (" PTR ", file);
3523 if (flag_pic && CONSTANT_ADDRESS_P (x))
3524 output_pic_addr_const (file, x, code);
3529 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3534 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3535 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3537 if (ASSEMBLER_DIALECT == 0)
3539 fprintf (file, "0x%lx", l);
3542 /* These float cases don't actually occur as immediate operands. */
3543 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3548 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3549 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3550 fprintf (file, "%s", dstr);
3553 else if (GET_CODE (x) == CONST_DOUBLE
3554 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
3559 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3560 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3561 fprintf (file, "%s", dstr);
3567 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3569 if (ASSEMBLER_DIALECT == 0)
3572 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3573 || GET_CODE (x) == LABEL_REF)
3575 if (ASSEMBLER_DIALECT == 0)
3578 fputs ("OFFSET FLAT:", file);
3581 if (GET_CODE (x) == CONST_INT)
3582 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3584 output_pic_addr_const (file, x, code);
3586 output_addr_const (file, x);
3590 /* Print a memory operand whose address is ADDR. */
3593 print_operand_address (file, addr)
3597 struct ix86_address parts;
3598 rtx base, index, disp;
3601 if (! ix86_decompose_address (addr, &parts))
3605 index = parts.index;
3607 scale = parts.scale;
3609 if (!base && !index)
3611 /* Displacement only requires special attention. */
3613 if (GET_CODE (disp) == CONST_INT)
3615 if (ASSEMBLER_DIALECT != 0)
3617 if (USER_LABEL_PREFIX[0] == 0)
3619 fputs ("ds:", file);
3621 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3624 output_pic_addr_const (file, addr, 0);
3626 output_addr_const (file, addr);
3630 if (ASSEMBLER_DIALECT == 0)
3635 output_pic_addr_const (file, disp, 0);
3636 else if (GET_CODE (disp) == LABEL_REF)
3637 output_asm_label (disp);
3639 output_addr_const (file, disp);
3644 PRINT_REG (base, 0, file);
3648 PRINT_REG (index, 0, file);
3650 fprintf (file, ",%d", scale);
3656 rtx offset = NULL_RTX;
3660 /* Pull out the offset of a symbol; print any symbol itself. */
3661 if (GET_CODE (disp) == CONST
3662 && GET_CODE (XEXP (disp, 0)) == PLUS
3663 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3665 offset = XEXP (XEXP (disp, 0), 1);
3666 disp = gen_rtx_CONST (VOIDmode,
3667 XEXP (XEXP (disp, 0), 0));
3671 output_pic_addr_const (file, disp, 0);
3672 else if (GET_CODE (disp) == LABEL_REF)
3673 output_asm_label (disp);
3674 else if (GET_CODE (disp) == CONST_INT)
3677 output_addr_const (file, disp);
3683 PRINT_REG (base, 0, file);
3686 if (INTVAL (offset) >= 0)
3688 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3692 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3699 PRINT_REG (index, 0, file);
3701 fprintf (file, "*%d", scale);
3708 /* Split one or more DImode RTL references into pairs of SImode
3709 references. The RTL can be REG, offsettable MEM, integer constant, or
3710 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3711 split and "num" is its length. lo_half and hi_half are output arrays
3712 that parallel "operands". */
3715 split_di (operands, num, lo_half, hi_half)
3718 rtx lo_half[], hi_half[];
3722 rtx op = operands[num];
3723 if (CONSTANT_P (op))
3724 split_double (op, &lo_half[num], &hi_half[num]);
3725 else if (! reload_completed)
3727 lo_half[num] = gen_lowpart (SImode, op);
3728 hi_half[num] = gen_highpart (SImode, op);
3730 else if (GET_CODE (op) == REG)
3732 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3733 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3735 else if (offsettable_memref_p (op))
3737 rtx lo_addr = XEXP (op, 0);
3738 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3739 lo_half[num] = change_address (op, SImode, lo_addr);
3740 hi_half[num] = change_address (op, SImode, hi_addr);
3747 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3748 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3749 is the expression of the binary operation. The output may either be
3750 emitted here, or returned to the caller, like all output_* functions.
3752 There is no guarantee that the operands are the same mode, as they
3753 might be within FLOAT or FLOAT_EXTEND expressions. */
3755 #ifndef SYSV386_COMPAT
3756 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3757 wants to fix the assemblers because that causes incompatibility
3758 with gcc. No-one wants to fix gcc because that causes
3759 incompatibility with assemblers... You can use the option of
3760 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3761 #define SYSV386_COMPAT 1
3765 output_387_binary_op (insn, operands)
3769 static char buf[30];
3772 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
3774 #ifdef ENABLE_CHECKING
3775 /* Even if we do not want to check the inputs, this documents input
3776 constraints. Which helps in understanding the following code. */
3777 if (STACK_REG_P (operands[0])
3778 && ((REG_P (operands[1])
3779 && REGNO (operands[0]) == REGNO (operands[1])
3780 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3781 || (REG_P (operands[2])
3782 && REGNO (operands[0]) == REGNO (operands[2])
3783 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3784 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3790 switch (GET_CODE (operands[3]))
3793 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3794 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3802 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3803 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3811 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3812 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3820 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3821 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3835 if (GET_MODE (operands[0]) == SFmode)
3836 strcat (buf, "ss\t{%2, %0|%0, %2}");
3838 strcat (buf, "sd\t{%2, %0|%0, %2}");
3843 switch (GET_CODE (operands[3]))
3847 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3849 rtx temp = operands[2];
3850 operands[2] = operands[1];
3854 /* know operands[0] == operands[1]. */
3856 if (GET_CODE (operands[2]) == MEM)
3862 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3864 if (STACK_TOP_P (operands[0]))
3865 /* How is it that we are storing to a dead operand[2]?
3866 Well, presumably operands[1] is dead too. We can't
3867 store the result to st(0) as st(0) gets popped on this
3868 instruction. Instead store to operands[2] (which I
3869 think has to be st(1)). st(1) will be popped later.
3870 gcc <= 2.8.1 didn't have this check and generated
3871 assembly code that the Unixware assembler rejected. */
3872 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3874 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3878 if (STACK_TOP_P (operands[0]))
3879 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3881 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3886 if (GET_CODE (operands[1]) == MEM)
3892 if (GET_CODE (operands[2]) == MEM)
3898 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3901 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3902 derived assemblers, confusingly reverse the direction of
3903 the operation for fsub{r} and fdiv{r} when the
3904 destination register is not st(0). The Intel assembler
3905 doesn't have this brain damage. Read !SYSV386_COMPAT to
3906 figure out what the hardware really does. */
3907 if (STACK_TOP_P (operands[0]))
3908 p = "{p\t%0, %2|rp\t%2, %0}";
3910 p = "{rp\t%2, %0|p\t%0, %2}";
3912 if (STACK_TOP_P (operands[0]))
3913 /* As above for fmul/fadd, we can't store to st(0). */
3914 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3916 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3921 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3924 if (STACK_TOP_P (operands[0]))
3925 p = "{rp\t%0, %1|p\t%1, %0}";
3927 p = "{p\t%1, %0|rp\t%0, %1}";
3929 if (STACK_TOP_P (operands[0]))
3930 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3932 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3937 if (STACK_TOP_P (operands[0]))
3939 if (STACK_TOP_P (operands[1]))
3940 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3942 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3945 else if (STACK_TOP_P (operands[1]))
3948 p = "{\t%1, %0|r\t%0, %1}";
3950 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3956 p = "{r\t%2, %0|\t%0, %2}";
3958 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3971 /* Output code for INSN to convert a float to a signed int. OPERANDS
3972 are the insn operands. The output may be [HSD]Imode and the input
3973 operand may be [SDX]Fmode. */
3976 output_fix_trunc (insn, operands)
3980 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3981 int dimode_p = GET_MODE (operands[0]) == DImode;
3984 /* Jump through a hoop or two for DImode, since the hardware has no
3985 non-popping instruction. We used to do this a different way, but
3986 that was somewhat fragile and broke with post-reload splitters. */
3987 if (dimode_p && !stack_top_dies)
3988 output_asm_insn ("fld\t%y1", operands);
3990 if (! STACK_TOP_P (operands[1]))
3993 xops[0] = GEN_INT (12);
3994 xops[1] = adj_offsettable_operand (operands[2], 1);
3995 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3997 xops[2] = operands[0];
3998 if (GET_CODE (operands[0]) != MEM)
3999 xops[2] = operands[3];
4001 output_asm_insn ("fnstcw\t%2", operands);
4002 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4003 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4004 output_asm_insn ("fldcw\t%2", operands);
4005 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
4007 if (stack_top_dies || dimode_p)
4008 output_asm_insn ("fistp%z2\t%2", xops);
4010 output_asm_insn ("fist%z2\t%2", xops);
4012 output_asm_insn ("fldcw\t%2", operands);
4014 if (GET_CODE (operands[0]) != MEM)
4018 split_di (operands+0, 1, xops+0, xops+1);
4019 split_di (operands+3, 1, xops+2, xops+3);
4020 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4021 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
4023 else if (GET_MODE (operands[0]) == SImode)
4024 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
4026 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
4032 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4033 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4034 when fucom should be used. */
4037 output_fp_compare (insn, operands, eflags_p, unordered_p)
4040 int eflags_p, unordered_p;
4043 rtx cmp_op0 = operands[0];
4044 rtx cmp_op1 = operands[1];
4045 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
4050 cmp_op1 = operands[2];
4054 if (GET_MODE (operands[0]) == SFmode)
4056 return "ucomiss\t{%1, %0|%0, %1}";
4058 return "comiss\t{%1, %0|%0, %y}";
4061 return "ucomisd\t{%1, %0|%0, %1}";
4063 return "comisd\t{%1, %0|%0, %y}";
4066 if (! STACK_TOP_P (cmp_op0))
4069 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4071 if (STACK_REG_P (cmp_op1)
4073 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4074 && REGNO (cmp_op1) != FIRST_STACK_REG)
4076 /* If both the top of the 387 stack dies, and the other operand
4077 is also a stack register that dies, then this must be a
4078 `fcompp' float compare */
4082 /* There is no double popping fcomi variant. Fortunately,
4083 eflags is immune from the fstp's cc clobbering. */
4085 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4087 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4095 return "fucompp\n\tfnstsw\t%0";
4097 return "fcompp\n\tfnstsw\t%0";
4110 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4112 static const char * const alt[24] =
4124 "fcomi\t{%y1, %0|%0, %y1}",
4125 "fcomip\t{%y1, %0|%0, %y1}",
4126 "fucomi\t{%y1, %0|%0, %y1}",
4127 "fucomip\t{%y1, %0|%0, %y1}",
4134 "fcom%z2\t%y2\n\tfnstsw\t%0",
4135 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4136 "fucom%z2\t%y2\n\tfnstsw\t%0",
4137 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4139 "ficom%z2\t%y2\n\tfnstsw\t%0",
4140 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4148 mask = eflags_p << 3;
4149 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4150 mask |= unordered_p << 1;
4151 mask |= stack_top_dies;
4163 /* Output assembler code to FILE to initialize basic-block profiling.
4165 If profile_block_flag == 2
4167 Output code to call the subroutine `__bb_init_trace_func'
4168 and pass two parameters to it. The first parameter is
4169 the address of a block allocated in the object module.
4170 The second parameter is the number of the first basic block
4173 The name of the block is a local symbol made with this statement:
4175 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4177 Of course, since you are writing the definition of
4178 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4179 can take a short cut in the definition of this macro and use the
4180 name that you know will result.
4182 The number of the first basic block of the function is
4183 passed to the macro in BLOCK_OR_LABEL.
4185 If described in a virtual assembler language the code to be
4189 parameter2 <- BLOCK_OR_LABEL
4190 call __bb_init_trace_func
4192 else if profile_block_flag != 0
4194 Output code to call the subroutine `__bb_init_func'
4195 and pass one single parameter to it, which is the same
4196 as the first parameter to `__bb_init_trace_func'.
4198 The first word of this parameter is a flag which will be nonzero if
4199 the object module has already been initialized. So test this word
4200 first, and do not call `__bb_init_func' if the flag is nonzero.
4201 Note: When profile_block_flag == 2 the test need not be done
4202 but `__bb_init_trace_func' *must* be called.
4204 BLOCK_OR_LABEL may be used to generate a label number as a
4205 branch destination in case `__bb_init_func' will not be called.
4207 If described in a virtual assembler language the code to be
4218 ix86_output_function_block_profiler (file, block_or_label)
4222 static int num_func = 0;
4224 char block_table[80], false_label[80];
4226 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4228 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4229 xops[5] = stack_pointer_rtx;
4230 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4232 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4234 switch (profile_block_flag)
4237 xops[2] = GEN_INT (block_or_label);
4238 xops[3] = gen_rtx_MEM (Pmode,
4239 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4240 xops[6] = GEN_INT (8);
4242 output_asm_insn ("push{l}\t%2", xops);
4244 output_asm_insn ("push{l}\t%1", xops);
4247 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4248 output_asm_insn ("push{l}\t%7", xops);
4250 output_asm_insn ("call\t%P3", xops);
4251 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4255 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4257 xops[0] = const0_rtx;
4258 xops[2] = gen_rtx_MEM (Pmode,
4259 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4260 xops[3] = gen_rtx_MEM (Pmode,
4261 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4262 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4263 xops[6] = GEN_INT (4);
4265 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4267 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4268 output_asm_insn ("jne\t%2", xops);
4271 output_asm_insn ("push{l}\t%1", xops);
4274 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4275 output_asm_insn ("push{l}\t%7", xops);
4277 output_asm_insn ("call\t%P3", xops);
4278 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4279 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4285 /* Output assembler code to FILE to increment a counter associated
4286 with basic block number BLOCKNO.
4288 If profile_block_flag == 2
4290 Output code to initialize the global structure `__bb' and
4291 call the function `__bb_trace_func' which will increment the
4294 `__bb' consists of two words. In the first word the number
4295 of the basic block has to be stored. In the second word
4296 the address of a block allocated in the object module
4299 The basic block number is given by BLOCKNO.
4301 The address of the block is given by the label created with
4303 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4305 by FUNCTION_BLOCK_PROFILER.
4307 Of course, since you are writing the definition of
4308 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4309 can take a short cut in the definition of this macro and use the
4310 name that you know will result.
4312 If described in a virtual assembler language the code to be
4315 move BLOCKNO -> (__bb)
4316 move LPBX0 -> (__bb+4)
4317 call __bb_trace_func
4319 Note that function `__bb_trace_func' must not change the
4320 machine state, especially the flag register. To grant
4321 this, you must output code to save and restore registers
4322 either in this macro or in the macros MACHINE_STATE_SAVE
4323 and MACHINE_STATE_RESTORE. The last two macros will be
4324 used in the function `__bb_trace_func', so you must make
4325 sure that the function prologue does not change any
4326 register prior to saving it with MACHINE_STATE_SAVE.
4328 else if profile_block_flag != 0
4330 Output code to increment the counter directly.
4331 Basic blocks are numbered separately from zero within each
4332 compiled object module. The count associated with block number
4333 BLOCKNO is at index BLOCKNO in an array of words; the name of
4334 this array is a local symbol made with this statement:
4336 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4338 Of course, since you are writing the definition of
4339 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4340 can take a short cut in the definition of this macro and use the
4341 name that you know will result.
4343 If described in a virtual assembler language the code to be
4346 inc (LPBX2+4*BLOCKNO)
4350 ix86_output_block_profiler (file, blockno)
4351 FILE *file ATTRIBUTE_UNUSED;
4354 rtx xops[8], cnt_rtx;
4356 char *block_table = counts;
4358 switch (profile_block_flag)
4361 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4363 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4364 xops[2] = GEN_INT (blockno);
4365 xops[3] = gen_rtx_MEM (Pmode,
4366 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4367 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4368 xops[5] = plus_constant (xops[4], 4);
4369 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4370 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4372 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4374 output_asm_insn ("pushf", xops);
4375 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4378 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4379 output_asm_insn ("push{l}\t%7", xops);
4380 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4381 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4382 output_asm_insn ("pop{l}\t%7", xops);
4385 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4386 output_asm_insn ("call\t%P3", xops);
4387 output_asm_insn ("popf", xops);
4392 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4393 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4394 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4397 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4400 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4402 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4403 output_asm_insn ("inc{l}\t%0", xops);
4410 ix86_expand_move (mode, operands)
4411 enum machine_mode mode;
4414 int strict = (reload_in_progress || reload_completed);
4417 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4419 /* Emit insns to move operands[1] into operands[0]. */
4421 if (GET_CODE (operands[0]) == MEM)
4422 operands[1] = force_reg (Pmode, operands[1]);
4425 rtx temp = operands[0];
4426 if (GET_CODE (temp) != REG)
4427 temp = gen_reg_rtx (Pmode);
4428 temp = legitimize_pic_address (operands[1], temp);
4429 if (temp == operands[0])
4436 if (GET_CODE (operands[0]) == MEM
4437 && (GET_MODE (operands[0]) == QImode
4438 || !push_operand (operands[0], mode))
4439 && GET_CODE (operands[1]) == MEM)
4440 operands[1] = force_reg (mode, operands[1]);
4442 if (push_operand (operands[0], mode)
4443 && ! general_no_elim_operand (operands[1], mode))
4444 operands[1] = copy_to_mode_reg (mode, operands[1]);
4446 if (FLOAT_MODE_P (mode))
4448 /* If we are loading a floating point constant to a register,
4449 force the value to memory now, since we'll get better code
4450 out the back end. */
4454 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4455 && register_operand (operands[0], mode))
4456 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4460 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4465 /* Attempt to expand a binary operator. Make the expansion closer to the
4466 actual machine, then just general_operand, which will allow 3 separate
4467 memory references (one output, two input) in a single insn. */
4470 ix86_expand_binary_operator (code, mode, operands)
4472 enum machine_mode mode;
4475 int matching_memory;
4476 rtx src1, src2, dst, op, clob;
4482 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4483 if (GET_RTX_CLASS (code) == 'c'
4484 && (rtx_equal_p (dst, src2)
4485 || immediate_operand (src1, mode)))
4492 /* If the destination is memory, and we do not have matching source
4493 operands, do things in registers. */
4494 matching_memory = 0;
4495 if (GET_CODE (dst) == MEM)
4497 if (rtx_equal_p (dst, src1))
4498 matching_memory = 1;
4499 else if (GET_RTX_CLASS (code) == 'c'
4500 && rtx_equal_p (dst, src2))
4501 matching_memory = 2;
4503 dst = gen_reg_rtx (mode);
4506 /* Both source operands cannot be in memory. */
4507 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4509 if (matching_memory != 2)
4510 src2 = force_reg (mode, src2);
4512 src1 = force_reg (mode, src1);
4515 /* If the operation is not commutable, source 1 cannot be a constant
4516 or non-matching memory. */
4517 if ((CONSTANT_P (src1)
4518 || (!matching_memory && GET_CODE (src1) == MEM))
4519 && GET_RTX_CLASS (code) != 'c')
4520 src1 = force_reg (mode, src1);
4522 /* If optimizing, copy to regs to improve CSE */
4523 if (optimize && ! no_new_pseudos)
4525 if (GET_CODE (dst) == MEM)
4526 dst = gen_reg_rtx (mode);
4527 if (GET_CODE (src1) == MEM)
4528 src1 = force_reg (mode, src1);
4529 if (GET_CODE (src2) == MEM)
4530 src2 = force_reg (mode, src2);
4533 /* Emit the instruction. */
4535 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4536 if (reload_in_progress)
4538 /* Reload doesn't know about the flags register, and doesn't know that
4539 it doesn't want to clobber it. We can only do this with PLUS. */
4546 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4547 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4550 /* Fix up the destination if needed. */
4551 if (dst != operands[0])
4552 emit_move_insn (operands[0], dst);
4555 /* Return TRUE or FALSE depending on whether the binary operator meets the
4556 appropriate constraints. */
4559 ix86_binary_operator_ok (code, mode, operands)
4561 enum machine_mode mode ATTRIBUTE_UNUSED;
4564 /* Both source operands cannot be in memory. */
4565 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4567 /* If the operation is not commutable, source 1 cannot be a constant. */
4568 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4570 /* If the destination is memory, we must have a matching source operand. */
4571 if (GET_CODE (operands[0]) == MEM
4572 && ! (rtx_equal_p (operands[0], operands[1])
4573 || (GET_RTX_CLASS (code) == 'c'
4574 && rtx_equal_p (operands[0], operands[2]))))
4576 /* If the operation is not commutable and the source 1 is memory, we must
4577 have a matching destionation. */
4578 if (GET_CODE (operands[1]) == MEM
4579 && GET_RTX_CLASS (code) != 'c'
4580 && ! rtx_equal_p (operands[0], operands[1]))
4585 /* Attempt to expand a unary operator. Make the expansion closer to the
4586 actual machine, then just general_operand, which will allow 2 separate
4587 memory references (one output, one input) in a single insn. */
4590 ix86_expand_unary_operator (code, mode, operands)
4592 enum machine_mode mode;
4595 int matching_memory;
4596 rtx src, dst, op, clob;
4601 /* If the destination is memory, and we do not have matching source
4602 operands, do things in registers. */
4603 matching_memory = 0;
4604 if (GET_CODE (dst) == MEM)
4606 if (rtx_equal_p (dst, src))
4607 matching_memory = 1;
4609 dst = gen_reg_rtx (mode);
4612 /* When source operand is memory, destination must match. */
4613 if (!matching_memory && GET_CODE (src) == MEM)
4614 src = force_reg (mode, src);
4616 /* If optimizing, copy to regs to improve CSE */
4617 if (optimize && ! no_new_pseudos)
4619 if (GET_CODE (dst) == MEM)
4620 dst = gen_reg_rtx (mode);
4621 if (GET_CODE (src) == MEM)
4622 src = force_reg (mode, src);
4625 /* Emit the instruction. */
4627 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4628 if (reload_in_progress || code == NOT)
4630 /* Reload doesn't know about the flags register, and doesn't know that
4631 it doesn't want to clobber it. */
4638 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4639 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4642 /* Fix up the destination if needed. */
4643 if (dst != operands[0])
4644 emit_move_insn (operands[0], dst);
4647 /* Return TRUE or FALSE depending on whether the unary operator meets the
4648 appropriate constraints. */
4651 ix86_unary_operator_ok (code, mode, operands)
4652 enum rtx_code code ATTRIBUTE_UNUSED;
4653 enum machine_mode mode ATTRIBUTE_UNUSED;
4654 rtx operands[2] ATTRIBUTE_UNUSED;
4656 /* If one of operands is memory, source and destination must match. */
4657 if ((GET_CODE (operands[0]) == MEM
4658 || GET_CODE (operands[1]) == MEM)
4659 && ! rtx_equal_p (operands[0], operands[1]))
4664 /* Return TRUE or FALSE depending on whether the first SET in INSN
4665 has source and destination with matching CC modes, and that the
4666 CC mode is at least as constrained as REQ_MODE. */
4669 ix86_match_ccmode (insn, req_mode)
4671 enum machine_mode req_mode;
4674 enum machine_mode set_mode;
4676 set = PATTERN (insn);
4677 if (GET_CODE (set) == PARALLEL)
4678 set = XVECEXP (set, 0, 0);
4679 if (GET_CODE (set) != SET)
4681 if (GET_CODE (SET_SRC (set)) != COMPARE)
4684 set_mode = GET_MODE (SET_DEST (set));
4688 if (req_mode != CCNOmode
4689 && (req_mode != CCmode
4690 || XEXP (SET_SRC (set), 1) != const0_rtx))
4694 if (req_mode == CCGCmode)
4698 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4702 if (req_mode == CCZmode)
4712 return (GET_MODE (SET_SRC (set)) == set_mode);
4715 /* Generate insn patterns to do an integer compare of OPERANDS. */
4718 ix86_expand_int_compare (code, op0, op1)
4722 enum machine_mode cmpmode;
4725 cmpmode = SELECT_CC_MODE (code, op0, op1);
4726 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4728 /* This is very simple, but making the interface the same as in the
4729 FP case makes the rest of the code easier. */
4730 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4731 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4733 /* Return the test that should be put into the flags user, i.e.
4734 the bcc, scc, or cmov instruction. */
4735 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4738 /* Figure out whether to use ordered or unordered fp comparisons.
4739 Return the appropriate mode to use. */
4742 ix86_fp_compare_mode (code)
4743 enum rtx_code code ATTRIBUTE_UNUSED;
4745 /* ??? In order to make all comparisons reversible, we do all comparisons
4746 non-trapping when compiling for IEEE. Once gcc is able to distinguish
4747 all forms trapping and nontrapping comparisons, we can make inequality
4748 comparisons trapping again, since it results in better code when using
4749 FCOM based compares. */
4750 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
4754 ix86_cc_mode (code, op0, op1)
4758 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4759 return ix86_fp_compare_mode (code);
4762 /* Only zero flag is needed. */
4764 case NE: /* ZF!=0 */
4766 /* Codes needing carry flag. */
4767 case GEU: /* CF=0 */
4768 case GTU: /* CF=0 & ZF=0 */
4769 case LTU: /* CF=1 */
4770 case LEU: /* CF=1 | ZF=1 */
4772 /* Codes possibly doable only with sign flag when
4773 comparing against zero. */
4774 case GE: /* SF=OF or SF=0 */
4775 case LT: /* SF<>OF or SF=1 */
4776 if (op1 == const0_rtx)
4779 /* For other cases Carry flag is not required. */
4781 /* Codes doable only with sign flag when comparing
4782 against zero, but we miss jump instruction for it
4783 so we need to use relational tests agains overflow
4784 that thus needs to be zero. */
4785 case GT: /* ZF=0 & SF=OF */
4786 case LE: /* ZF=1 | SF<>OF */
4787 if (op1 == const0_rtx)
4796 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4799 ix86_use_fcomi_compare (code)
4800 enum rtx_code code ATTRIBUTE_UNUSED;
4802 enum rtx_code swapped_code = swap_condition (code);
4803 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
4804 || (ix86_fp_comparison_cost (swapped_code)
4805 == ix86_fp_comparison_fcomi_cost (swapped_code)));
4808 /* Swap, force into registers, or otherwise massage the two operands
4809 to a fp comparison. The operands are updated in place; the new
4810 comparsion code is returned. */
4812 static enum rtx_code
4813 ix86_prepare_fp_compare_args (code, pop0, pop1)
4817 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4818 rtx op0 = *pop0, op1 = *pop1;
4819 enum machine_mode op_mode = GET_MODE (op0);
4820 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
4822 /* All of the unordered compare instructions only work on registers.
4823 The same is true of the XFmode compare instructions. The same is
4824 true of the fcomi compare instructions. */
4827 && (fpcmp_mode == CCFPUmode
4828 || op_mode == XFmode
4829 || op_mode == TFmode
4830 || ix86_use_fcomi_compare (code)))
4832 op0 = force_reg (op_mode, op0);
4833 op1 = force_reg (op_mode, op1);
4837 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4838 things around if they appear profitable, otherwise force op0
4841 if (standard_80387_constant_p (op0) == 0
4842 || (GET_CODE (op0) == MEM
4843 && ! (standard_80387_constant_p (op1) == 0
4844 || GET_CODE (op1) == MEM)))
4847 tmp = op0, op0 = op1, op1 = tmp;
4848 code = swap_condition (code);
4851 if (GET_CODE (op0) != REG)
4852 op0 = force_reg (op_mode, op0);
4854 if (CONSTANT_P (op1))
4856 if (standard_80387_constant_p (op1))
4857 op1 = force_reg (op_mode, op1);
4859 op1 = validize_mem (force_const_mem (op_mode, op1));
4863 /* Try to rearrange the comparison to make it cheaper. */
4864 if (ix86_fp_comparison_cost (code)
4865 > ix86_fp_comparison_cost (swap_condition (code))
4866 && (GET_CODE (op0) == REG || !reload_completed))
4869 tmp = op0, op0 = op1, op1 = tmp;
4870 code = swap_condition (code);
4871 if (GET_CODE (op0) != REG)
4872 op0 = force_reg (op_mode, op0);
4880 /* Convert comparison codes we use to represent FP comparison to integer
4881 code that will result in proper branch. Return UNKNOWN if no such code
4883 static enum rtx_code
4884 ix86_fp_compare_code_to_integer (code)
4914 /* Split comparison code CODE into comparisons we can do using branch
4915 instructions. BYPASS_CODE is comparison code for branch that will
4916 branch around FIRST_CODE and SECOND_CODE. If some of branches
4917 is not required, set value to NIL.
4918 We never require more than two branches. */
4920 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
4921 enum rtx_code code, *bypass_code, *first_code, *second_code;
4927 /* The fcomi comparison sets flags as follows:
4937 case GT: /* GTU - CF=0 & ZF=0 */
4938 case GE: /* GEU - CF=0 */
4939 case ORDERED: /* PF=0 */
4940 case UNORDERED: /* PF=1 */
4941 case UNEQ: /* EQ - ZF=1 */
4942 case UNLT: /* LTU - CF=1 */
4943 case UNLE: /* LEU - CF=1 | ZF=1 */
4944 case LTGT: /* EQ - ZF=0 */
4946 case LT: /* LTU - CF=1 - fails on unordered */
4948 *bypass_code = UNORDERED;
4950 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
4952 *bypass_code = UNORDERED;
4954 case EQ: /* EQ - ZF=1 - fails on unordered */
4956 *bypass_code = UNORDERED;
4958 case NE: /* NE - ZF=0 - fails on unordered */
4960 *second_code = UNORDERED;
4962 case UNGE: /* GEU - CF=0 - fails on unordered */
4964 *second_code = UNORDERED;
4966 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
4968 *second_code = UNORDERED;
4973 if (!TARGET_IEEE_FP)
4980 /* Return cost of comparison done fcom + arithmetics operations on AX.
4981 All following functions do use number of instructions as an cost metrics.
4982 In future this should be tweaked to compute bytes for optimize_size and
4983 take into account performance of various instructions on various CPUs. */
4985 ix86_fp_comparison_arithmetics_cost (code)
4988 if (!TARGET_IEEE_FP)
4990 /* The cost of code output by ix86_expand_fp_compare. */
5018 /* Return cost of comparison done using fcomi operation.
5019 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5021 ix86_fp_comparison_fcomi_cost (code)
5024 enum rtx_code bypass_code, first_code, second_code;
5025 /* Return arbitarily high cost when instruction is not supported - this
5026 prevents gcc from using it. */
5029 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5030 return (bypass_code != NIL || second_code != NIL) + 2;
5033 /* Return cost of comparison done using sahf operation.
5034 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5036 ix86_fp_comparison_sahf_cost (code)
5039 enum rtx_code bypass_code, first_code, second_code;
5040 /* Return arbitarily high cost when instruction is not preferred - this
5041 avoids gcc from using it. */
5042 if (!TARGET_USE_SAHF && !optimize_size)
5044 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5045 return (bypass_code != NIL || second_code != NIL) + 3;
5048 /* Compute cost of the comparison done using any method.
5049 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5051 ix86_fp_comparison_cost (code)
5054 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5057 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5058 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5060 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5061 if (min > sahf_cost)
5063 if (min > fcomi_cost)
5068 /* Generate insn patterns to do a floating point compare of OPERANDS. */
5071 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
5073 rtx op0, op1, scratch;
5077 enum machine_mode fpcmp_mode, intcmp_mode;
5079 int cost = ix86_fp_comparison_cost (code);
5080 enum rtx_code bypass_code, first_code, second_code;
5082 fpcmp_mode = ix86_fp_compare_mode (code);
5083 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5086 *second_test = NULL_RTX;
5088 *bypass_test = NULL_RTX;
5090 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5092 /* Do fcomi/sahf based test when profitable. */
5093 if ((bypass_code == NIL || bypass_test)
5094 && (second_code == NIL || second_test)
5095 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5099 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5100 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5106 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5107 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5108 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5109 emit_insn (gen_x86_sahf_1 (scratch));
5112 /* The FP codes work out to act like unsigned. */
5113 intcmp_mode = fpcmp_mode;
5115 if (bypass_code != NIL)
5116 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5117 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5119 if (second_code != NIL)
5120 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5121 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5126 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5127 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5128 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5129 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5131 /* In the unordered case, we have to check C2 for NaN's, which
5132 doesn't happen to work out to anything nice combination-wise.
5133 So do some bit twiddling on the value we've got in AH to come
5134 up with an appropriate set of condition codes. */
5136 intcmp_mode = CCNOmode;
5141 if (code == GT || !TARGET_IEEE_FP)
5143 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5148 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5149 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5150 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5151 intcmp_mode = CCmode;
5157 if (code == LT && TARGET_IEEE_FP)
5159 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5160 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5161 intcmp_mode = CCmode;
5166 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5172 if (code == GE || !TARGET_IEEE_FP)
5174 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5179 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5180 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5187 if (code == LE && TARGET_IEEE_FP)
5189 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5190 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5191 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5192 intcmp_mode = CCmode;
5197 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5203 if (code == EQ && TARGET_IEEE_FP)
5205 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5206 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5207 intcmp_mode = CCmode;
5212 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5219 if (code == NE && TARGET_IEEE_FP)
5221 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5222 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5228 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5234 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5238 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5247 /* Return the test that should be put into the flags user, i.e.
5248 the bcc, scc, or cmov instruction. */
5249 return gen_rtx_fmt_ee (code, VOIDmode,
5250 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5255 ix86_expand_compare (code, second_test, bypass_test)
5257 rtx *second_test, *bypass_test;
5260 op0 = ix86_compare_op0;
5261 op1 = ix86_compare_op1;
5264 *second_test = NULL_RTX;
5266 *bypass_test = NULL_RTX;
5268 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5269 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode),
5270 second_test, bypass_test);
5272 ret = ix86_expand_int_compare (code, op0, op1);
5278 ix86_expand_branch (code, label)
5284 switch (GET_MODE (ix86_compare_op0))
5289 tmp = ix86_expand_compare (code, NULL, NULL);
5290 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5291 gen_rtx_LABEL_REF (VOIDmode, label),
5293 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5300 /* Don't expand the comparison early, so that we get better code
5301 when jump or whoever decides to reverse the comparison. */
5306 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5309 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5310 ix86_compare_op0, ix86_compare_op1);
5311 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5312 gen_rtx_LABEL_REF (VOIDmode, label),
5314 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5316 use_fcomi = ix86_use_fcomi_compare (code);
5317 vec = rtvec_alloc (3 + !use_fcomi);
5318 RTVEC_ELT (vec, 0) = tmp;
5320 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5322 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5325 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5327 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5332 /* Expand DImode branch into multiple compare+branch. */
5334 rtx lo[2], hi[2], label2;
5335 enum rtx_code code1, code2, code3;
5337 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5339 tmp = ix86_compare_op0;
5340 ix86_compare_op0 = ix86_compare_op1;
5341 ix86_compare_op1 = tmp;
5342 code = swap_condition (code);
5344 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5345 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5347 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5348 avoid two branches. This costs one extra insn, so disable when
5349 optimizing for size. */
5351 if ((code == EQ || code == NE)
5353 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5358 if (hi[1] != const0_rtx)
5359 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5360 NULL_RTX, 0, OPTAB_WIDEN);
5363 if (lo[1] != const0_rtx)
5364 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5365 NULL_RTX, 0, OPTAB_WIDEN);
5367 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5368 NULL_RTX, 0, OPTAB_WIDEN);
5370 ix86_compare_op0 = tmp;
5371 ix86_compare_op1 = const0_rtx;
5372 ix86_expand_branch (code, label);
5376 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5377 op1 is a constant and the low word is zero, then we can just
5378 examine the high word. */
5380 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5383 case LT: case LTU: case GE: case GEU:
5384 ix86_compare_op0 = hi[0];
5385 ix86_compare_op1 = hi[1];
5386 ix86_expand_branch (code, label);
5392 /* Otherwise, we need two or three jumps. */
5394 label2 = gen_label_rtx ();
5397 code2 = swap_condition (code);
5398 code3 = unsigned_condition (code);
5402 case LT: case GT: case LTU: case GTU:
5405 case LE: code1 = LT; code2 = GT; break;
5406 case GE: code1 = GT; code2 = LT; break;
5407 case LEU: code1 = LTU; code2 = GTU; break;
5408 case GEU: code1 = GTU; code2 = LTU; break;
5410 case EQ: code1 = NIL; code2 = NE; break;
5411 case NE: code2 = NIL; break;
5419 * if (hi(a) < hi(b)) goto true;
5420 * if (hi(a) > hi(b)) goto false;
5421 * if (lo(a) < lo(b)) goto true;
5425 ix86_compare_op0 = hi[0];
5426 ix86_compare_op1 = hi[1];
5429 ix86_expand_branch (code1, label);
5431 ix86_expand_branch (code2, label2);
5433 ix86_compare_op0 = lo[0];
5434 ix86_compare_op1 = lo[1];
5435 ix86_expand_branch (code3, label);
5438 emit_label (label2);
5447 /* Split branch based on floating point condition. */
5449 ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5450 rtx condition, op1, op2, target1, target2, tmp;
5453 rtx label = NULL_RTX;
5454 enum rtx_code code = GET_CODE (condition);
5456 if (target2 != pc_rtx)
5459 code = reverse_condition_maybe_unordered (code);
5464 condition = ix86_expand_fp_compare (code, op1, op2,
5465 tmp, &second, &bypass);
5466 if (bypass != NULL_RTX)
5468 label = gen_label_rtx ();
5469 emit_jump_insn (gen_rtx_SET
5471 gen_rtx_IF_THEN_ELSE (VOIDmode,
5473 gen_rtx_LABEL_REF (VOIDmode,
5477 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5478 comparison and first branch. The second branch takes longer to execute
5479 so place first branch the worse predicable one if possible. */
5480 if (second != NULL_RTX
5481 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5483 rtx tmp = condition;
5487 emit_jump_insn (gen_rtx_SET
5489 gen_rtx_IF_THEN_ELSE (VOIDmode,
5490 condition, target1, target2)));
5491 if (second != NULL_RTX)
5492 emit_jump_insn (gen_rtx_SET
5494 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5495 if (label != NULL_RTX)
5500 ix86_expand_setcc (code, dest)
5504 rtx ret, tmp, tmpreg;
5505 rtx second_test, bypass_test;
5508 if (GET_MODE (ix86_compare_op0) == DImode)
5509 return 0; /* FAIL */
5511 /* Three modes of generation:
5512 0 -- destination does not overlap compare sources:
5513 clear dest first, emit strict_low_part setcc.
5514 1 -- destination does overlap compare sources:
5515 emit subreg setcc, zero extend.
5516 2 -- destination is in QImode:
5522 if (GET_MODE (dest) == QImode)
5524 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5525 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5529 emit_move_insn (dest, const0_rtx);
5531 ret = ix86_expand_compare (code, &second_test, &bypass_test);
5532 PUT_MODE (ret, QImode);
5538 tmp = gen_lowpart (QImode, dest);
5540 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5544 if (!cse_not_expected)
5545 tmp = gen_reg_rtx (QImode);
5547 tmp = gen_lowpart (QImode, dest);
5551 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5552 if (bypass_test || second_test)
5554 rtx test = second_test;
5556 rtx tmp2 = gen_reg_rtx (QImode);
5563 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5565 PUT_MODE (test, QImode);
5566 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5569 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5571 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5578 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5579 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5580 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5581 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5585 return 1; /* DONE */
5589 ix86_expand_int_movcc (operands)
5592 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5593 rtx compare_seq, compare_op;
5594 rtx second_test, bypass_test;
5596 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5597 In case comparsion is done with immediate, we can convert it to LTU or
5598 GEU by altering the integer. */
5600 if ((code == LEU || code == GTU)
5601 && GET_CODE (ix86_compare_op1) == CONST_INT
5602 && GET_MODE (operands[0]) != HImode
5603 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5604 && GET_CODE (operands[2]) == CONST_INT
5605 && GET_CODE (operands[3]) == CONST_INT)
5611 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5615 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5616 compare_seq = gen_sequence ();
5619 compare_code = GET_CODE (compare_op);
5621 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5622 HImode insns, we'd be swallowed in word prefix ops. */
5624 if (GET_MODE (operands[0]) != HImode
5625 && GET_CODE (operands[2]) == CONST_INT
5626 && GET_CODE (operands[3]) == CONST_INT)
5628 rtx out = operands[0];
5629 HOST_WIDE_INT ct = INTVAL (operands[2]);
5630 HOST_WIDE_INT cf = INTVAL (operands[3]);
5633 if ((compare_code == LTU || compare_code == GEU)
5634 && !second_test && !bypass_test)
5637 /* Detect overlap between destination and compare sources. */
5640 /* To simplify rest of code, restrict to the GEU case. */
5641 if (compare_code == LTU)
5646 compare_code = reverse_condition (compare_code);
5647 code = reverse_condition (code);
5651 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5652 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5653 tmp = gen_reg_rtx (SImode);
5655 emit_insn (compare_seq);
5656 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5668 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5679 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5681 else if (diff == -1 && ct)
5691 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5693 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5700 * andl cf - ct, dest
5705 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5707 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5711 emit_move_insn (out, tmp);
5713 return 1; /* DONE */
5720 tmp = ct, ct = cf, cf = tmp;
5722 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5724 /* We may be reversing unordered compare to normal compare, that
5725 is not valid in general (we may convert non-trapping condition
5726 to trapping one), however on i386 we currently emit all
5727 comparisons unordered. */
5728 compare_code = reverse_condition_maybe_unordered (compare_code);
5729 code = reverse_condition_maybe_unordered (code);
5733 compare_code = reverse_condition (compare_code);
5734 code = reverse_condition (code);
5737 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5738 || diff == 3 || diff == 5 || diff == 9)
5744 * lea cf(dest*(ct-cf)),dest
5748 * This also catches the degenerate setcc-only case.
5754 out = emit_store_flag (out, code, ix86_compare_op0,
5755 ix86_compare_op1, VOIDmode, 0, 1);
5762 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5766 tmp = gen_rtx_PLUS (SImode, tmp, out);
5772 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5778 emit_move_insn (out, tmp);
5783 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5784 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5786 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5787 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5791 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5793 if (out != operands[0])
5794 emit_move_insn (operands[0], out);
5796 return 1; /* DONE */
5800 * General case: Jumpful:
5801 * xorl dest,dest cmpl op1, op2
5802 * cmpl op1, op2 movl ct, dest
5804 * decl dest movl cf, dest
5805 * andl (cf-ct),dest 1:
5810 * This is reasonably steep, but branch mispredict costs are
5811 * high on modern cpus, so consider failing only if optimizing
5814 * %%% Parameterize branch_cost on the tuning architecture, then
5815 * use that. The 80386 couldn't care less about mispredicts.
5818 if (!optimize_size && !TARGET_CMOVE)
5824 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5826 /* We may be reversing unordered compare to normal compare,
5827 that is not valid in general (we may convert non-trapping
5828 condition to trapping one), however on i386 we currently
5829 emit all comparisons unordered. */
5830 compare_code = reverse_condition_maybe_unordered (compare_code);
5831 code = reverse_condition_maybe_unordered (code);
5835 compare_code = reverse_condition (compare_code);
5836 code = reverse_condition (code);
5840 out = emit_store_flag (out, code, ix86_compare_op0,
5841 ix86_compare_op1, VOIDmode, 0, 1);
5843 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5844 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5846 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5847 if (out != operands[0])
5848 emit_move_insn (operands[0], out);
5850 return 1; /* DONE */
5856 /* Try a few things more with specific constants and a variable. */
5859 rtx var, orig_out, out, tmp;
5862 return 0; /* FAIL */
5864 /* If one of the two operands is an interesting constant, load a
5865 constant with the above and mask it in with a logical operation. */
5867 if (GET_CODE (operands[2]) == CONST_INT)
5870 if (INTVAL (operands[2]) == 0)
5871 operands[3] = constm1_rtx, op = and_optab;
5872 else if (INTVAL (operands[2]) == -1)
5873 operands[3] = const0_rtx, op = ior_optab;
5875 return 0; /* FAIL */
5877 else if (GET_CODE (operands[3]) == CONST_INT)
5880 if (INTVAL (operands[3]) == 0)
5881 operands[2] = constm1_rtx, op = and_optab;
5882 else if (INTVAL (operands[3]) == -1)
5883 operands[2] = const0_rtx, op = ior_optab;
5885 return 0; /* FAIL */
5888 return 0; /* FAIL */
5890 orig_out = operands[0];
5891 tmp = gen_reg_rtx (GET_MODE (orig_out));
5894 /* Recurse to get the constant loaded. */
5895 if (ix86_expand_int_movcc (operands) == 0)
5896 return 0; /* FAIL */
5898 /* Mask in the interesting variable. */
5899 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5901 if (out != orig_out)
5902 emit_move_insn (orig_out, out);
5904 return 1; /* DONE */
5908 * For comparison with above,
5918 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5919 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5920 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5921 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5923 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
5925 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5926 emit_move_insn (tmp, operands[3]);
5929 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
5931 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5932 emit_move_insn (tmp, operands[2]);
5936 emit_insn (compare_seq);
5937 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5938 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5939 compare_op, operands[2],
5942 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5943 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5948 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5949 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5954 return 1; /* DONE */
5958 ix86_expand_fp_movcc (operands)
5963 rtx compare_op, second_test, bypass_test;
5965 /* The floating point conditional move instructions don't directly
5966 support conditions resulting from a signed integer comparison. */
5968 code = GET_CODE (operands[1]);
5969 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5971 /* The floating point conditional move instructions don't directly
5972 support signed integer comparisons. */
5974 if (!fcmov_comparison_operator (compare_op, VOIDmode))
5976 if (second_test != NULL || bypass_test != NULL)
5978 tmp = gen_reg_rtx (QImode);
5979 ix86_expand_setcc (code, tmp);
5981 ix86_compare_op0 = tmp;
5982 ix86_compare_op1 = const0_rtx;
5983 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5985 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
5987 tmp = gen_reg_rtx (GET_MODE (operands[0]));
5988 emit_move_insn (tmp, operands[3]);
5991 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
5993 tmp = gen_reg_rtx (GET_MODE (operands[0]));
5994 emit_move_insn (tmp, operands[2]);
5998 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5999 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6004 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6005 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6010 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6011 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6019 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6020 works for floating pointer parameters and nonoffsetable memories.
6021 For pushes, it returns just stack offsets; the values will be saved
6022 in the right order. Maximally three parts are generated. */
6025 ix86_split_to_parts (operand, parts, mode)
6028 enum machine_mode mode;
6030 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
6032 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6034 if (size < 2 || size > 3)
6037 /* Optimize constant pool reference to immediates. This is used by fp moves,
6038 that force all constants to memory to allow combining. */
6040 if (GET_CODE (operand) == MEM
6041 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6042 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6043 operand = get_pool_constant (XEXP (operand, 0));
6045 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
6047 /* The only non-offsetable memories we handle are pushes. */
6048 if (! push_operand (operand, VOIDmode))
6051 PUT_MODE (operand, SImode);
6052 parts[0] = parts[1] = parts[2] = operand;
6057 split_di (&operand, 1, &parts[0], &parts[1]);
6060 if (REG_P (operand))
6062 if (!reload_completed)
6064 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6065 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6067 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6069 else if (offsettable_memref_p (operand))
6071 PUT_MODE (operand, SImode);
6073 parts[1] = adj_offsettable_operand (operand, 4);
6075 parts[2] = adj_offsettable_operand (operand, 8);
6077 else if (GET_CODE (operand) == CONST_DOUBLE)
6082 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6087 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6088 parts[2] = GEN_INT (l[2]);
6091 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6096 parts[1] = GEN_INT (l[1]);
6097 parts[0] = GEN_INT (l[0]);
6107 /* Emit insns to perform a move or push of DI, DF, and XF values.
6108 Return false when normal moves are needed; true when all required
6109 insns have been emitted. Operands 2-4 contain the input values
6110 int the correct order; operands 5-7 contain the output values. */
6113 ix86_split_long_move (operands1)
6122 /* Make our own copy to avoid clobbering the operands. */
6123 operands[0] = copy_rtx (operands1[0]);
6124 operands[1] = copy_rtx (operands1[1]);
6126 /* The only non-offsettable memory we handle is push. */
6127 if (push_operand (operands[0], VOIDmode))
6129 else if (GET_CODE (operands[0]) == MEM
6130 && ! offsettable_memref_p (operands[0]))
6133 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
6134 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6136 /* When emitting push, take care for source operands on the stack. */
6137 if (push && GET_CODE (operands[1]) == MEM
6138 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6141 part[1][1] = part[1][2];
6142 part[1][0] = part[1][1];
6145 /* We need to do copy in the right order in case an address register
6146 of the source overlaps the destination. */
6147 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6149 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6151 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6154 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6157 /* Collision in the middle part can be handled by reordering. */
6158 if (collisions == 1 && size == 3
6159 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6162 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6163 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6166 /* If there are more collisions, we can't handle it by reordering.
6167 Do an lea to the last part and use only one colliding move. */
6168 else if (collisions > 1)
6171 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6172 XEXP (part[1][0], 0)));
6173 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6174 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6176 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6184 /* We use only first 12 bytes of TFmode value, but for pushing we
6185 are required to adjust stack as if we were pushing real 16byte
6187 if (GET_MODE (operands1[0]) == TFmode)
6188 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6190 emit_insn (gen_push (part[1][2]));
6192 emit_insn (gen_push (part[1][1]));
6193 emit_insn (gen_push (part[1][0]));
6197 /* Choose correct order to not overwrite the source before it is copied. */
6198 if ((REG_P (part[0][0])
6199 && REG_P (part[1][1])
6200 && (REGNO (part[0][0]) == REGNO (part[1][1])
6202 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6204 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6208 operands1[2] = part[0][2];
6209 operands1[3] = part[0][1];
6210 operands1[4] = part[0][0];
6211 operands1[5] = part[1][2];
6212 operands1[6] = part[1][1];
6213 operands1[7] = part[1][0];
6217 operands1[2] = part[0][1];
6218 operands1[3] = part[0][0];
6219 operands1[5] = part[1][1];
6220 operands1[6] = part[1][0];
6227 operands1[2] = part[0][0];
6228 operands1[3] = part[0][1];
6229 operands1[4] = part[0][2];
6230 operands1[5] = part[1][0];
6231 operands1[6] = part[1][1];
6232 operands1[7] = part[1][2];
6236 operands1[2] = part[0][0];
6237 operands1[3] = part[0][1];
6238 operands1[5] = part[1][0];
6239 operands1[6] = part[1][1];
6247 ix86_split_ashldi (operands, scratch)
6248 rtx *operands, scratch;
6250 rtx low[2], high[2];
6253 if (GET_CODE (operands[2]) == CONST_INT)
6255 split_di (operands, 2, low, high);
6256 count = INTVAL (operands[2]) & 63;
6260 emit_move_insn (high[0], low[1]);
6261 emit_move_insn (low[0], const0_rtx);
6264 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6268 if (!rtx_equal_p (operands[0], operands[1]))
6269 emit_move_insn (operands[0], operands[1]);
6270 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6271 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6276 if (!rtx_equal_p (operands[0], operands[1]))
6277 emit_move_insn (operands[0], operands[1]);
6279 split_di (operands, 1, low, high);
6281 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6282 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
6284 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6286 if (! no_new_pseudos)
6287 scratch = force_reg (SImode, const0_rtx);
6289 emit_move_insn (scratch, const0_rtx);
6291 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6295 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6300 ix86_split_ashrdi (operands, scratch)
6301 rtx *operands, scratch;
6303 rtx low[2], high[2];
6306 if (GET_CODE (operands[2]) == CONST_INT)
6308 split_di (operands, 2, low, high);
6309 count = INTVAL (operands[2]) & 63;
6313 emit_move_insn (low[0], high[1]);
6315 if (! reload_completed)
6316 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6319 emit_move_insn (high[0], low[0]);
6320 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6324 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6328 if (!rtx_equal_p (operands[0], operands[1]))
6329 emit_move_insn (operands[0], operands[1]);
6330 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6331 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6336 if (!rtx_equal_p (operands[0], operands[1]))
6337 emit_move_insn (operands[0], operands[1]);
6339 split_di (operands, 1, low, high);
6341 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6342 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6344 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6346 if (! no_new_pseudos)
6347 scratch = gen_reg_rtx (SImode);
6348 emit_move_insn (scratch, high[0]);
6349 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6350 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6354 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
6359 ix86_split_lshrdi (operands, scratch)
6360 rtx *operands, scratch;
6362 rtx low[2], high[2];
6365 if (GET_CODE (operands[2]) == CONST_INT)
6367 split_di (operands, 2, low, high);
6368 count = INTVAL (operands[2]) & 63;
6372 emit_move_insn (low[0], high[1]);
6373 emit_move_insn (high[0], const0_rtx);
6376 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6380 if (!rtx_equal_p (operands[0], operands[1]))
6381 emit_move_insn (operands[0], operands[1]);
6382 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6383 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6388 if (!rtx_equal_p (operands[0], operands[1]))
6389 emit_move_insn (operands[0], operands[1]);
6391 split_di (operands, 1, low, high);
6393 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6394 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6396 /* Heh. By reversing the arguments, we can reuse this pattern. */
6397 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6399 if (! no_new_pseudos)
6400 scratch = force_reg (SImode, const0_rtx);
6402 emit_move_insn (scratch, const0_rtx);
6404 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6408 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6412 /* Expand the appropriate insns for doing strlen if not just doing
6415 out = result, initialized with the start address
6416 align_rtx = alignment of the address.
6417 scratch = scratch register, initialized with the startaddress when
6418 not aligned, otherwise undefined
6420 This is just the body. It needs the initialisations mentioned above and
6421 some address computing at the end. These things are done in i386.md. */
6424 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6425 rtx out, align_rtx, scratch;
6429 rtx align_2_label = NULL_RTX;
6430 rtx align_3_label = NULL_RTX;
6431 rtx align_4_label = gen_label_rtx ();
6432 rtx end_0_label = gen_label_rtx ();
6434 rtx tmpreg = gen_reg_rtx (SImode);
6437 if (GET_CODE (align_rtx) == CONST_INT)
6438 align = INTVAL (align_rtx);
6440 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6442 /* Is there a known alignment and is it less than 4? */
6445 /* Is there a known alignment and is it not 2? */
6448 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6449 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6451 /* Leave just the 3 lower bits. */
6452 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6453 NULL_RTX, 0, OPTAB_WIDEN);
6455 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6456 SImode, 1, 0, align_4_label);
6457 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6458 SImode, 1, 0, align_2_label);
6459 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6460 SImode, 1, 0, align_3_label);
6464 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6465 check if is aligned to 4 - byte. */
6467 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6468 NULL_RTX, 0, OPTAB_WIDEN);
6470 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6471 SImode, 1, 0, align_4_label);
6474 mem = gen_rtx_MEM (QImode, out);
6476 /* Now compare the bytes. */
6478 /* Compare the first n unaligned byte on a byte per byte basis. */
6479 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6480 QImode, 1, 0, end_0_label);
6482 /* Increment the address. */
6483 emit_insn (gen_addsi3 (out, out, const1_rtx));
6485 /* Not needed with an alignment of 2 */
6488 emit_label (align_2_label);
6490 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6491 QImode, 1, 0, end_0_label);
6493 emit_insn (gen_addsi3 (out, out, const1_rtx));
6495 emit_label (align_3_label);
6498 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6499 QImode, 1, 0, end_0_label);
6501 emit_insn (gen_addsi3 (out, out, const1_rtx));
6504 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6505 align this loop. It gives only huge programs, but does not help to
6507 emit_label (align_4_label);
6509 mem = gen_rtx_MEM (SImode, out);
6510 emit_move_insn (scratch, mem);
6511 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6513 /* This formula yields a nonzero result iff one of the bytes is zero.
6514 This saves three branches inside loop and many cycles. */
6516 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6517 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6518 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6519 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6520 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6521 SImode, 1, 0, align_4_label);
6525 rtx reg = gen_reg_rtx (SImode);
6526 emit_move_insn (reg, tmpreg);
6527 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6529 /* If zero is not in the first two bytes, move two bytes forward. */
6530 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6531 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6532 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6533 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6534 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6537 /* Emit lea manually to avoid clobbering of flags. */
6538 emit_insn (gen_rtx_SET (SImode, reg,
6539 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6541 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6542 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6543 emit_insn (gen_rtx_SET (VOIDmode, out,
6544 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6551 rtx end_2_label = gen_label_rtx ();
6552 /* Is zero in the first two bytes? */
6554 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6555 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6556 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6557 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6558 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6560 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6561 JUMP_LABEL (tmp) = end_2_label;
6563 /* Not in the first two. Move two bytes forward. */
6564 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6565 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6567 emit_label (end_2_label);
6571 /* Avoid branch in fixing the byte. */
6572 tmpreg = gen_lowpart (QImode, tmpreg);
6573 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6574 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6576 emit_label (end_0_label);
6579 /* Clear stack slot assignments remembered from previous functions.
6580 This is called from INIT_EXPANDERS once before RTL is emitted for each
6584 ix86_init_machine_status (p)
6587 p->machine = (struct machine_function *)
6588 xcalloc (1, sizeof (struct machine_function));
6591 /* Mark machine specific bits of P for GC. */
6593 ix86_mark_machine_status (p)
6596 struct machine_function *machine = p->machine;
6597 enum machine_mode mode;
6603 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6604 mode = (enum machine_mode) ((int) mode + 1))
6605 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6606 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
6610 ix86_free_machine_status (p)
6617 /* Return a MEM corresponding to a stack slot with mode MODE.
6618 Allocate a new slot if necessary.
6620 The RTL for a function can have several slots available: N is
6621 which slot to use. */
6624 assign_386_stack_local (mode, n)
6625 enum machine_mode mode;
6628 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6631 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6632 ix86_stack_locals[(int) mode][n]
6633 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6635 return ix86_stack_locals[(int) mode][n];
6638 /* Calculate the length of the memory address in the instruction
6639 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6642 memory_address_length (addr)
6645 struct ix86_address parts;
6646 rtx base, index, disp;
6649 if (GET_CODE (addr) == PRE_DEC
6650 || GET_CODE (addr) == POST_INC)
6653 if (! ix86_decompose_address (addr, &parts))
6657 index = parts.index;
6661 /* Register Indirect. */
6662 if (base && !index && !disp)
6664 /* Special cases: ebp and esp need the two-byte modrm form. */
6665 if (addr == stack_pointer_rtx
6666 || addr == arg_pointer_rtx
6667 || addr == frame_pointer_rtx
6668 || addr == hard_frame_pointer_rtx)
6672 /* Direct Addressing. */
6673 else if (disp && !base && !index)
6678 /* Find the length of the displacement constant. */
6681 if (GET_CODE (disp) == CONST_INT
6682 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6688 /* An index requires the two-byte modrm form. */
6696 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6697 expect that insn have 8bit immediate alternative. */
6699 ix86_attr_length_immediate_default (insn, shortform)
6705 extract_insn_cached (insn);
6706 for (i = recog_data.n_operands - 1; i >= 0; --i)
6707 if (CONSTANT_P (recog_data.operand[i]))
6712 && GET_CODE (recog_data.operand[i]) == CONST_INT
6713 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6717 switch (get_attr_mode (insn))
6729 fatal_insn ("Unknown insn mode", insn);
6735 /* Compute default value for "length_address" attribute. */
6737 ix86_attr_length_address_default (insn)
6741 extract_insn_cached (insn);
6742 for (i = recog_data.n_operands - 1; i >= 0; --i)
6743 if (GET_CODE (recog_data.operand[i]) == MEM)
6745 return memory_address_length (XEXP (recog_data.operand[i], 0));
6751 /* Return the maximum number of instructions a cpu can issue. */
6758 case PROCESSOR_PENTIUM:
6762 case PROCESSOR_PENTIUMPRO:
6770 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6771 by DEP_INSN and nothing set by DEP_INSN. */
6774 ix86_flags_dependant (insn, dep_insn, insn_type)
6776 enum attr_type insn_type;
6780 /* Simplify the test for uninteresting insns. */
6781 if (insn_type != TYPE_SETCC
6782 && insn_type != TYPE_ICMOV
6783 && insn_type != TYPE_FCMOV
6784 && insn_type != TYPE_IBR)
6787 if ((set = single_set (dep_insn)) != 0)
6789 set = SET_DEST (set);
6792 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6793 && XVECLEN (PATTERN (dep_insn), 0) == 2
6794 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6795 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6797 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6798 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6803 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6806 /* This test is true if the dependant insn reads the flags but
6807 not any other potentially set register. */
6808 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6811 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6817 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6818 address with operands set by DEP_INSN. */
6821 ix86_agi_dependant (insn, dep_insn, insn_type)
6823 enum attr_type insn_type;
6827 if (insn_type == TYPE_LEA)
6829 addr = PATTERN (insn);
6830 if (GET_CODE (addr) == SET)
6832 else if (GET_CODE (addr) == PARALLEL
6833 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6834 addr = XVECEXP (addr, 0, 0);
6837 addr = SET_SRC (addr);
6842 extract_insn_cached (insn);
6843 for (i = recog_data.n_operands - 1; i >= 0; --i)
6844 if (GET_CODE (recog_data.operand[i]) == MEM)
6846 addr = XEXP (recog_data.operand[i], 0);
6853 return modified_in_p (addr, dep_insn);
6857 ix86_adjust_cost (insn, link, dep_insn, cost)
6858 rtx insn, link, dep_insn;
6861 enum attr_type insn_type, dep_insn_type;
6862 enum attr_memory memory;
6864 int dep_insn_code_number;
6866 /* Anti and output depenancies have zero cost on all CPUs. */
6867 if (REG_NOTE_KIND (link) != 0)
6870 dep_insn_code_number = recog_memoized (dep_insn);
6872 /* If we can't recognize the insns, we can't really do anything. */
6873 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6876 insn_type = get_attr_type (insn);
6877 dep_insn_type = get_attr_type (dep_insn);
6879 /* Prologue and epilogue allocators can have a false dependency on ebp.
6880 This results in one cycle extra stall on Pentium prologue scheduling,
6881 so handle this important case manually. */
6882 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6883 && dep_insn_type == TYPE_ALU
6884 && !reg_mentioned_p (stack_pointer_rtx, insn))
6889 case PROCESSOR_PENTIUM:
6890 /* Address Generation Interlock adds a cycle of latency. */
6891 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6894 /* ??? Compares pair with jump/setcc. */
6895 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6898 /* Floating point stores require value to be ready one cycle ealier. */
6899 if (insn_type == TYPE_FMOV
6900 && get_attr_memory (insn) == MEMORY_STORE
6901 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6905 case PROCESSOR_PENTIUMPRO:
6906 /* Since we can't represent delayed latencies of load+operation,
6907 increase the cost here for non-imov insns. */
6908 if (dep_insn_type != TYPE_IMOV
6909 && dep_insn_type != TYPE_FMOV
6910 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6911 || memory == MEMORY_BOTH))
6914 /* INT->FP conversion is expensive. */
6915 if (get_attr_fp_int_src (dep_insn))
6918 /* There is one cycle extra latency between an FP op and a store. */
6919 if (insn_type == TYPE_FMOV
6920 && (set = single_set (dep_insn)) != NULL_RTX
6921 && (set2 = single_set (insn)) != NULL_RTX
6922 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6923 && GET_CODE (SET_DEST (set2)) == MEM)
6928 /* The esp dependency is resolved before the instruction is really
6930 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6931 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6934 /* Since we can't represent delayed latencies of load+operation,
6935 increase the cost here for non-imov insns. */
6936 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6937 || memory == MEMORY_BOTH)
6938 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6940 /* INT->FP conversion is expensive. */
6941 if (get_attr_fp_int_src (dep_insn))
6945 case PROCESSOR_ATHLON:
6946 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6947 || memory == MEMORY_BOTH)
6949 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6964 struct ppro_sched_data
6967 int issued_this_cycle;
6972 ix86_safe_length (insn)
6975 if (recog_memoized (insn) >= 0)
6976 return get_attr_length(insn);
6982 ix86_safe_length_prefix (insn)
6985 if (recog_memoized (insn) >= 0)
6986 return get_attr_length(insn);
6991 static enum attr_memory
6992 ix86_safe_memory (insn)
6995 if (recog_memoized (insn) >= 0)
6996 return get_attr_memory(insn);
6998 return MEMORY_UNKNOWN;
7001 static enum attr_pent_pair
7002 ix86_safe_pent_pair (insn)
7005 if (recog_memoized (insn) >= 0)
7006 return get_attr_pent_pair(insn);
7008 return PENT_PAIR_NP;
7011 static enum attr_ppro_uops
7012 ix86_safe_ppro_uops (insn)
7015 if (recog_memoized (insn) >= 0)
7016 return get_attr_ppro_uops (insn);
7018 return PPRO_UOPS_MANY;
7022 ix86_dump_ppro_packet (dump)
7025 if (ix86_sched_data.ppro.decode[0])
7027 fprintf (dump, "PPRO packet: %d",
7028 INSN_UID (ix86_sched_data.ppro.decode[0]));
7029 if (ix86_sched_data.ppro.decode[1])
7030 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
7031 if (ix86_sched_data.ppro.decode[2])
7032 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
7037 /* We're beginning a new block. Initialize data structures as necessary. */
7040 ix86_sched_init (dump, sched_verbose)
7041 FILE *dump ATTRIBUTE_UNUSED;
7042 int sched_verbose ATTRIBUTE_UNUSED;
7044 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
7047 /* Shift INSN to SLOT, and shift everything else down. */
7050 ix86_reorder_insn (insnp, slot)
7057 insnp[0] = insnp[1];
7058 while (++insnp != slot);
7063 /* Find an instruction with given pairability and minimal amount of cycles
7064 lost by the fact that the CPU waits for both pipelines to finish before
7065 reading next instructions. Also take care that both instructions together
7066 can not exceed 7 bytes. */
7069 ix86_pent_find_pair (e_ready, ready, type, first)
7072 enum attr_pent_pair type;
7075 int mincycles, cycles;
7076 enum attr_pent_pair tmp;
7077 enum attr_memory memory;
7078 rtx *insnp, *bestinsnp = NULL;
7080 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
7083 memory = ix86_safe_memory (first);
7084 cycles = result_ready_cost (first);
7085 mincycles = INT_MAX;
7087 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
7088 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
7089 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
7091 enum attr_memory second_memory;
7092 int secondcycles, currentcycles;
7094 second_memory = ix86_safe_memory (*insnp);
7095 secondcycles = result_ready_cost (*insnp);
7096 currentcycles = abs (cycles - secondcycles);
7098 if (secondcycles >= 1 && cycles >= 1)
7100 /* Two read/modify/write instructions together takes two
7102 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7105 /* Read modify/write instruction followed by read/modify
7106 takes one cycle longer. */
7107 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7108 && tmp != PENT_PAIR_UV
7109 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7112 if (currentcycles < mincycles)
7113 bestinsnp = insnp, mincycles = currentcycles;
7119 /* Subroutines of ix86_sched_reorder. */
7122 ix86_sched_reorder_pentium (ready, e_ready)
7126 enum attr_pent_pair pair1, pair2;
7129 /* This wouldn't be necessary if Haifa knew that static insn ordering
7130 is important to which pipe an insn is issued to. So we have to make
7131 some minor rearrangements. */
7133 pair1 = ix86_safe_pent_pair (*e_ready);
7135 /* If the first insn is non-pairable, let it be. */
7136 if (pair1 == PENT_PAIR_NP)
7139 pair2 = PENT_PAIR_NP;
7142 /* If the first insn is UV or PV pairable, search for a PU
7144 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
7146 insnp = ix86_pent_find_pair (e_ready-1, ready,
7147 PENT_PAIR_PU, *e_ready);
7149 pair2 = PENT_PAIR_PU;
7152 /* If the first insn is PU or UV pairable, search for a PV
7154 if (pair2 == PENT_PAIR_NP
7155 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7157 insnp = ix86_pent_find_pair (e_ready-1, ready,
7158 PENT_PAIR_PV, *e_ready);
7160 pair2 = PENT_PAIR_PV;
7163 /* If the first insn is pairable, search for a UV
7165 if (pair2 == PENT_PAIR_NP)
7167 insnp = ix86_pent_find_pair (e_ready-1, ready,
7168 PENT_PAIR_UV, *e_ready);
7170 pair2 = PENT_PAIR_UV;
7173 if (pair2 == PENT_PAIR_NP)
7176 /* Found something! Decide if we need to swap the order. */
7177 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7178 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7179 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7180 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7181 ix86_reorder_insn (insnp, e_ready);
7183 ix86_reorder_insn (insnp, e_ready - 1);
7187 ix86_sched_reorder_ppro (ready, e_ready)
7192 enum attr_ppro_uops cur_uops;
7193 int issued_this_cycle;
7197 /* At this point .ppro.decode contains the state of the three
7198 decoders from last "cycle". That is, those insns that were
7199 actually independent. But here we're scheduling for the
7200 decoder, and we may find things that are decodable in the
7203 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
7204 issued_this_cycle = 0;
7207 cur_uops = ix86_safe_ppro_uops (*insnp);
7209 /* If the decoders are empty, and we've a complex insn at the
7210 head of the priority queue, let it issue without complaint. */
7211 if (decode[0] == NULL)
7213 if (cur_uops == PPRO_UOPS_MANY)
7219 /* Otherwise, search for a 2-4 uop unsn to issue. */
7220 while (cur_uops != PPRO_UOPS_FEW)
7224 cur_uops = ix86_safe_ppro_uops (*--insnp);
7227 /* If so, move it to the head of the line. */
7228 if (cur_uops == PPRO_UOPS_FEW)
7229 ix86_reorder_insn (insnp, e_ready);
7231 /* Issue the head of the queue. */
7232 issued_this_cycle = 1;
7233 decode[0] = *e_ready--;
7236 /* Look for simple insns to fill in the other two slots. */
7237 for (i = 1; i < 3; ++i)
7238 if (decode[i] == NULL)
7240 if (ready >= e_ready)
7244 cur_uops = ix86_safe_ppro_uops (*insnp);
7245 while (cur_uops != PPRO_UOPS_ONE)
7249 cur_uops = ix86_safe_ppro_uops (*--insnp);
7252 /* Found one. Move it to the head of the queue and issue it. */
7253 if (cur_uops == PPRO_UOPS_ONE)
7255 ix86_reorder_insn (insnp, e_ready);
7256 decode[i] = *e_ready--;
7257 issued_this_cycle++;
7261 /* ??? Didn't find one. Ideally, here we would do a lazy split
7262 of 2-uop insns, issue one and queue the other. */
7266 if (issued_this_cycle == 0)
7267 issued_this_cycle = 1;
7268 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7271 /* We are about to being issuing insns for this clock cycle.
7272 Override the default sort algorithm to better slot instructions. */
7274 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7275 FILE *dump ATTRIBUTE_UNUSED;
7276 int sched_verbose ATTRIBUTE_UNUSED;
7279 int clock_var ATTRIBUTE_UNUSED;
7281 rtx *e_ready = ready + n_ready - 1;
7291 case PROCESSOR_PENTIUM:
7292 ix86_sched_reorder_pentium (ready, e_ready);
7295 case PROCESSOR_PENTIUMPRO:
7296 ix86_sched_reorder_ppro (ready, e_ready);
7301 return ix86_issue_rate ();
7304 /* We are about to issue INSN. Return the number of insns left on the
7305 ready queue that can be issued this cycle. */
7308 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7318 return can_issue_more - 1;
7320 case PROCESSOR_PENTIUMPRO:
7322 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
7324 if (uops == PPRO_UOPS_MANY)
7327 ix86_dump_ppro_packet (dump);
7328 ix86_sched_data.ppro.decode[0] = insn;
7329 ix86_sched_data.ppro.decode[1] = NULL;
7330 ix86_sched_data.ppro.decode[2] = NULL;
7332 ix86_dump_ppro_packet (dump);
7333 ix86_sched_data.ppro.decode[0] = NULL;
7335 else if (uops == PPRO_UOPS_FEW)
7338 ix86_dump_ppro_packet (dump);
7339 ix86_sched_data.ppro.decode[0] = insn;
7340 ix86_sched_data.ppro.decode[1] = NULL;
7341 ix86_sched_data.ppro.decode[2] = NULL;
7345 for (i = 0; i < 3; ++i)
7346 if (ix86_sched_data.ppro.decode[i] == NULL)
7348 ix86_sched_data.ppro.decode[i] = insn;
7356 ix86_dump_ppro_packet (dump);
7357 ix86_sched_data.ppro.decode[0] = NULL;
7358 ix86_sched_data.ppro.decode[1] = NULL;
7359 ix86_sched_data.ppro.decode[2] = NULL;
7363 return --ix86_sched_data.ppro.issued_this_cycle;
7367 /* Walk through INSNS and look for MEM references whose address is DSTREG or
7368 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
7372 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
7374 rtx dstref, srcref, dstreg, srcreg;
7378 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
7380 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
7384 /* Subroutine of above to actually do the updating by recursively walking
7388 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
7390 rtx dstref, srcref, dstreg, srcreg;
7392 enum rtx_code code = GET_CODE (x);
7393 const char *format_ptr = GET_RTX_FORMAT (code);
7396 if (code == MEM && XEXP (x, 0) == dstreg)
7397 MEM_COPY_ATTRIBUTES (x, dstref);
7398 else if (code == MEM && XEXP (x, 0) == srcreg)
7399 MEM_COPY_ATTRIBUTES (x, srcref);
7401 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
7403 if (*format_ptr == 'e')
7404 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
7406 else if (*format_ptr == 'E')
7407 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7408 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
7413 /* Compute the alignment given to a constant that is being placed in memory.
7414 EXP is the constant and ALIGN is the alignment that the object would
7416 The value of this function is used instead of that alignment to align
7420 ix86_constant_alignment (exp, align)
7424 if (TREE_CODE (exp) == REAL_CST)
7426 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7428 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7431 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7438 /* Compute the alignment for a static variable.
7439 TYPE is the data type, and ALIGN is the alignment that
7440 the object would ordinarily have. The value of this function is used
7441 instead of that alignment to align the object. */
7444 ix86_data_alignment (type, align)
7448 if (AGGREGATE_TYPE_P (type)
7450 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7451 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7452 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7455 if (TREE_CODE (type) == ARRAY_TYPE)
7457 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7459 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7462 else if (TREE_CODE (type) == COMPLEX_TYPE)
7465 if (TYPE_MODE (type) == DCmode && align < 64)
7467 if (TYPE_MODE (type) == XCmode && align < 128)
7470 else if ((TREE_CODE (type) == RECORD_TYPE
7471 || TREE_CODE (type) == UNION_TYPE
7472 || TREE_CODE (type) == QUAL_UNION_TYPE)
7473 && TYPE_FIELDS (type))
7475 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7477 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7480 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7481 || TREE_CODE (type) == INTEGER_TYPE)
7483 if (TYPE_MODE (type) == DFmode && align < 64)
7485 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7492 /* Compute the alignment for a local variable.
7493 TYPE is the data type, and ALIGN is the alignment that
7494 the object would ordinarily have. The value of this macro is used
7495 instead of that alignment to align the object. */
7498 ix86_local_alignment (type, align)
7502 if (TREE_CODE (type) == ARRAY_TYPE)
7504 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7506 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7509 else if (TREE_CODE (type) == COMPLEX_TYPE)
7511 if (TYPE_MODE (type) == DCmode && align < 64)
7513 if (TYPE_MODE (type) == XCmode && align < 128)
7516 else if ((TREE_CODE (type) == RECORD_TYPE
7517 || TREE_CODE (type) == UNION_TYPE
7518 || TREE_CODE (type) == QUAL_UNION_TYPE)
7519 && TYPE_FIELDS (type))
7521 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7523 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7526 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7527 || TREE_CODE (type) == INTEGER_TYPE)
7530 if (TYPE_MODE (type) == DFmode && align < 64)
7532 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7538 #define def_builtin(NAME, TYPE, CODE) \
7539 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7540 struct builtin_description
7542 enum insn_code icode;
7544 enum ix86_builtins code;
7545 enum rtx_code comparison;
7549 static struct builtin_description bdesc_comi[] =
7551 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7552 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7553 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7554 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7555 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7556 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7557 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7558 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7559 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7560 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7561 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7562 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7565 static struct builtin_description bdesc_2arg[] =
7568 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7569 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7570 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7571 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7572 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7573 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7574 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7575 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7577 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7578 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7579 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7580 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7581 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7582 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7583 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7584 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7585 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7586 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7587 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7588 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7589 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7590 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7591 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7592 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7593 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7594 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7595 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7596 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7597 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7598 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7599 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7600 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7602 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7603 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7604 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7605 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7607 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7608 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7609 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7610 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7612 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7613 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7614 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7615 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7616 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7619 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7620 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7621 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7622 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7623 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7624 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7626 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7627 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7628 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7629 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7630 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7631 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7632 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7633 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7635 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7636 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7637 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7639 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7640 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7641 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7642 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7644 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7645 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7647 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7648 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7649 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7650 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7651 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7652 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7654 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7655 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7656 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7657 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7659 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7660 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7661 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7662 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7663 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7664 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7667 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7668 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7669 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7671 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7672 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7674 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7675 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7676 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7677 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7678 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7679 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7681 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7682 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7683 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7684 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7685 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7686 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7688 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7689 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7690 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7691 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7693 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7694 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7698 static struct builtin_description bdesc_1arg[] =
7700 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7701 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7703 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7704 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7705 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7707 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7708 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7709 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7710 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7714 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7715 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7718 ix86_init_builtins ()
7720 struct builtin_description * d;
7722 tree endlink = void_list_node;
7724 tree pchar_type_node = build_pointer_type (char_type_node);
7725 tree pfloat_type_node = build_pointer_type (float_type_node);
7726 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7727 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7730 tree int_ftype_v4sf_v4sf
7731 = build_function_type (integer_type_node,
7732 tree_cons (NULL_TREE, V4SF_type_node,
7733 tree_cons (NULL_TREE,
7736 tree v4si_ftype_v4sf_v4sf
7737 = build_function_type (V4SI_type_node,
7738 tree_cons (NULL_TREE, V4SF_type_node,
7739 tree_cons (NULL_TREE,
7742 /* MMX/SSE/integer conversions. */
7743 tree int_ftype_v4sf_int
7744 = build_function_type (integer_type_node,
7745 tree_cons (NULL_TREE, V4SF_type_node,
7746 tree_cons (NULL_TREE,
7750 = build_function_type (integer_type_node,
7751 tree_cons (NULL_TREE, V4SF_type_node,
7754 = build_function_type (integer_type_node,
7755 tree_cons (NULL_TREE, V8QI_type_node,
7758 = build_function_type (integer_type_node,
7759 tree_cons (NULL_TREE, V2SI_type_node,
7762 = build_function_type (V2SI_type_node,
7763 tree_cons (NULL_TREE, integer_type_node,
7765 tree v4sf_ftype_v4sf_int
7766 = build_function_type (integer_type_node,
7767 tree_cons (NULL_TREE, V4SF_type_node,
7768 tree_cons (NULL_TREE, integer_type_node,
7770 tree v4sf_ftype_v4sf_v2si
7771 = build_function_type (V4SF_type_node,
7772 tree_cons (NULL_TREE, V4SF_type_node,
7773 tree_cons (NULL_TREE, V2SI_type_node,
7775 tree int_ftype_v4hi_int
7776 = build_function_type (integer_type_node,
7777 tree_cons (NULL_TREE, V4HI_type_node,
7778 tree_cons (NULL_TREE, integer_type_node,
7780 tree v4hi_ftype_v4hi_int_int
7781 = build_function_type (V4HI_type_node,
7782 tree_cons (NULL_TREE, V4HI_type_node,
7783 tree_cons (NULL_TREE, integer_type_node,
7784 tree_cons (NULL_TREE,
7787 /* Miscellaneous. */
7788 tree v8qi_ftype_v4hi_v4hi
7789 = build_function_type (V8QI_type_node,
7790 tree_cons (NULL_TREE, V4HI_type_node,
7791 tree_cons (NULL_TREE, V4HI_type_node,
7793 tree v4hi_ftype_v2si_v2si
7794 = build_function_type (V4HI_type_node,
7795 tree_cons (NULL_TREE, V2SI_type_node,
7796 tree_cons (NULL_TREE, V2SI_type_node,
7798 tree v4sf_ftype_v4sf_v4sf_int
7799 = build_function_type (V4SF_type_node,
7800 tree_cons (NULL_TREE, V4SF_type_node,
7801 tree_cons (NULL_TREE, V4SF_type_node,
7802 tree_cons (NULL_TREE,
7805 tree v4hi_ftype_v8qi_v8qi
7806 = build_function_type (V4HI_type_node,
7807 tree_cons (NULL_TREE, V8QI_type_node,
7808 tree_cons (NULL_TREE, V8QI_type_node,
7810 tree v2si_ftype_v4hi_v4hi
7811 = build_function_type (V2SI_type_node,
7812 tree_cons (NULL_TREE, V4HI_type_node,
7813 tree_cons (NULL_TREE, V4HI_type_node,
7815 tree v4hi_ftype_v4hi_int
7816 = build_function_type (V4HI_type_node,
7817 tree_cons (NULL_TREE, V4HI_type_node,
7818 tree_cons (NULL_TREE, integer_type_node,
7820 tree di_ftype_di_int
7821 = build_function_type (long_long_unsigned_type_node,
7822 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7823 tree_cons (NULL_TREE, integer_type_node,
7825 tree v8qi_ftype_v8qi_di
7826 = build_function_type (V8QI_type_node,
7827 tree_cons (NULL_TREE, V8QI_type_node,
7828 tree_cons (NULL_TREE,
7829 long_long_integer_type_node,
7831 tree v4hi_ftype_v4hi_di
7832 = build_function_type (V4HI_type_node,
7833 tree_cons (NULL_TREE, V4HI_type_node,
7834 tree_cons (NULL_TREE,
7835 long_long_integer_type_node,
7837 tree v2si_ftype_v2si_di
7838 = build_function_type (V2SI_type_node,
7839 tree_cons (NULL_TREE, V2SI_type_node,
7840 tree_cons (NULL_TREE,
7841 long_long_integer_type_node,
7843 tree void_ftype_void
7844 = build_function_type (void_type_node, endlink);
7845 tree void_ftype_pchar_int
7846 = build_function_type (void_type_node,
7847 tree_cons (NULL_TREE, pchar_type_node,
7848 tree_cons (NULL_TREE, integer_type_node,
7850 tree void_ftype_unsigned
7851 = build_function_type (void_type_node,
7852 tree_cons (NULL_TREE, unsigned_type_node,
7854 tree unsigned_ftype_void
7855 = build_function_type (unsigned_type_node, endlink);
7857 = build_function_type (long_long_unsigned_type_node, endlink);
7859 = build_function_type (intTI_type_node, endlink);
7860 tree v2si_ftype_v4sf
7861 = build_function_type (V2SI_type_node,
7862 tree_cons (NULL_TREE, V4SF_type_node,
7865 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
7866 tree_cons (NULL_TREE, V8QI_type_node,
7867 tree_cons (NULL_TREE,
7870 tree void_ftype_v8qi_v8qi_pchar
7871 = build_function_type (void_type_node, maskmovq_args);
7872 tree v4sf_ftype_pfloat
7873 = build_function_type (V4SF_type_node,
7874 tree_cons (NULL_TREE, pfloat_type_node,
7876 tree v4sf_ftype_float
7877 = build_function_type (V4SF_type_node,
7878 tree_cons (NULL_TREE, float_type_node,
7880 tree v4sf_ftype_float_float_float_float
7881 = build_function_type (V4SF_type_node,
7882 tree_cons (NULL_TREE, float_type_node,
7883 tree_cons (NULL_TREE, float_type_node,
7884 tree_cons (NULL_TREE,
7886 tree_cons (NULL_TREE,
7889 /* @@@ the type is bogus */
7890 tree v4sf_ftype_v4sf_pv2si
7891 = build_function_type (V4SF_type_node,
7892 tree_cons (NULL_TREE, V4SF_type_node,
7893 tree_cons (NULL_TREE, pv2si_type_node,
7895 tree v4sf_ftype_pv2si_v4sf
7896 = build_function_type (V4SF_type_node,
7897 tree_cons (NULL_TREE, V4SF_type_node,
7898 tree_cons (NULL_TREE, pv2si_type_node,
7900 tree void_ftype_pfloat_v4sf
7901 = build_function_type (void_type_node,
7902 tree_cons (NULL_TREE, pfloat_type_node,
7903 tree_cons (NULL_TREE, V4SF_type_node,
7905 tree void_ftype_pdi_di
7906 = build_function_type (void_type_node,
7907 tree_cons (NULL_TREE, pdi_type_node,
7908 tree_cons (NULL_TREE,
7909 long_long_unsigned_type_node,
7911 /* Normal vector unops. */
7912 tree v4sf_ftype_v4sf
7913 = build_function_type (V4SF_type_node,
7914 tree_cons (NULL_TREE, V4SF_type_node,
7917 /* Normal vector binops. */
7918 tree v4sf_ftype_v4sf_v4sf
7919 = build_function_type (V4SF_type_node,
7920 tree_cons (NULL_TREE, V4SF_type_node,
7921 tree_cons (NULL_TREE, V4SF_type_node,
7923 tree v8qi_ftype_v8qi_v8qi
7924 = build_function_type (V8QI_type_node,
7925 tree_cons (NULL_TREE, V8QI_type_node,
7926 tree_cons (NULL_TREE, V8QI_type_node,
7928 tree v4hi_ftype_v4hi_v4hi
7929 = build_function_type (V4HI_type_node,
7930 tree_cons (NULL_TREE, V4HI_type_node,
7931 tree_cons (NULL_TREE, V4HI_type_node,
7933 tree v2si_ftype_v2si_v2si
7934 = build_function_type (V2SI_type_node,
7935 tree_cons (NULL_TREE, V2SI_type_node,
7936 tree_cons (NULL_TREE, V2SI_type_node,
7939 = build_function_type (intTI_type_node,
7940 tree_cons (NULL_TREE, intTI_type_node,
7941 tree_cons (NULL_TREE, intTI_type_node,
7944 = build_function_type (long_long_unsigned_type_node,
7945 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7946 tree_cons (NULL_TREE,
7947 long_long_unsigned_type_node,
7950 /* Add all builtins that are more or less simple operations on two
7952 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
7954 /* Use one of the operands; the target can have a different mode for
7955 mask-generating compares. */
7956 enum machine_mode mode;
7961 mode = insn_data[d->icode].operand[1].mode;
7963 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
7969 type = v4sf_ftype_v4sf_v4sf;
7972 type = v8qi_ftype_v8qi_v8qi;
7975 type = v4hi_ftype_v4hi_v4hi;
7978 type = v2si_ftype_v2si_v2si;
7981 type = ti_ftype_ti_ti;
7984 type = di_ftype_di_di;
7991 /* Override for comparisons. */
7992 if (d->icode == CODE_FOR_maskcmpv4sf3
7993 || d->icode == CODE_FOR_maskncmpv4sf3
7994 || d->icode == CODE_FOR_vmmaskcmpv4sf3
7995 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
7996 type = v4si_ftype_v4sf_v4sf;
7998 def_builtin (d->name, type, d->code);
8001 /* Add the remaining MMX insns with somewhat more complicated types. */
8002 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
8003 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
8004 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
8005 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
8006 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
8007 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
8008 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
8009 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
8010 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
8012 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
8013 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
8014 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
8016 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
8017 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
8019 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
8020 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
8022 /* Everything beyond this point is SSE only. */
8026 /* comi/ucomi insns. */
8027 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8028 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
8030 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
8031 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
8032 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
8034 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
8035 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
8036 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
8037 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
8038 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
8039 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
8041 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
8042 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
8044 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
8046 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
8047 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
8048 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
8049 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
8050 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
8051 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
8053 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
8054 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
8055 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
8056 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
8058 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
8059 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
8060 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
8061 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
8063 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
8064 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
8066 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
8068 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
8069 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8070 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
8071 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
8072 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
8073 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
8075 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
8077 /* Composite intrinsics. */
8078 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
8079 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
8080 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
8081 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
8082 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
8083 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
8084 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
8087 /* Errors in the source file can cause expand_expr to return const0_rtx
8088 where we expect a vector. To avoid crashing, use one of the vector
8089 clear instructions. */
8091 safe_vector_operand (x, mode)
8093 enum machine_mode mode;
8095 if (x != const0_rtx)
8097 x = gen_reg_rtx (mode);
8099 if (VALID_MMX_REG_MODE (mode))
8100 emit_insn (gen_mmx_clrdi (mode == DImode ? x
8101 : gen_rtx_SUBREG (DImode, x, 0)));
8103 emit_insn (gen_sse_clrti (mode == TImode ? x
8104 : gen_rtx_SUBREG (TImode, x, 0)));
8108 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
8111 ix86_expand_binop_builtin (icode, arglist, target)
8112 enum insn_code icode;
8117 tree arg0 = TREE_VALUE (arglist);
8118 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8119 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8120 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8121 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8122 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8123 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
8125 if (VECTOR_MODE_P (mode0))
8126 op0 = safe_vector_operand (op0, mode0);
8127 if (VECTOR_MODE_P (mode1))
8128 op1 = safe_vector_operand (op1, mode1);
8131 || GET_MODE (target) != tmode
8132 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8133 target = gen_reg_rtx (tmode);
8135 /* In case the insn wants input operands in modes different from
8136 the result, abort. */
8137 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8140 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8141 op0 = copy_to_mode_reg (mode0, op0);
8142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8143 op1 = copy_to_mode_reg (mode1, op1);
8145 pat = GEN_FCN (icode) (target, op0, op1);
8152 /* Subroutine of ix86_expand_builtin to take care of stores. */
8155 ix86_expand_store_builtin (icode, arglist, shuffle)
8156 enum insn_code icode;
8161 tree arg0 = TREE_VALUE (arglist);
8162 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8163 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8164 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8165 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8166 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8168 if (VECTOR_MODE_P (mode1))
8169 op1 = safe_vector_operand (op1, mode1);
8171 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8172 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8173 op1 = copy_to_mode_reg (mode1, op1);
8175 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8176 pat = GEN_FCN (icode) (op0, op1);
8182 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
8185 ix86_expand_unop_builtin (icode, arglist, target, do_load)
8186 enum insn_code icode;
8192 tree arg0 = TREE_VALUE (arglist);
8193 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8194 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8195 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8198 || GET_MODE (target) != tmode
8199 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8200 target = gen_reg_rtx (tmode);
8202 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8205 if (VECTOR_MODE_P (mode0))
8206 op0 = safe_vector_operand (op0, mode0);
8208 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8209 op0 = copy_to_mode_reg (mode0, op0);
8212 pat = GEN_FCN (icode) (target, op0);
8219 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8220 sqrtss, rsqrtss, rcpss. */
8223 ix86_expand_unop1_builtin (icode, arglist, target)
8224 enum insn_code icode;
8229 tree arg0 = TREE_VALUE (arglist);
8230 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8231 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8232 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8235 || GET_MODE (target) != tmode
8236 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8237 target = gen_reg_rtx (tmode);
8239 if (VECTOR_MODE_P (mode0))
8240 op0 = safe_vector_operand (op0, mode0);
8242 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8243 op0 = copy_to_mode_reg (mode0, op0);
8245 pat = GEN_FCN (icode) (target, op0, op0);
8252 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8255 ix86_expand_sse_compare (d, arglist, target)
8256 struct builtin_description *d;
8261 tree arg0 = TREE_VALUE (arglist);
8262 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8263 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8264 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8266 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8267 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8268 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8269 enum rtx_code comparison = d->comparison;
8271 if (VECTOR_MODE_P (mode0))
8272 op0 = safe_vector_operand (op0, mode0);
8273 if (VECTOR_MODE_P (mode1))
8274 op1 = safe_vector_operand (op1, mode1);
8276 /* Swap operands if we have a comparison that isn't available in
8280 target = gen_reg_rtx (tmode);
8281 emit_move_insn (target, op1);
8284 comparison = swap_condition (comparison);
8287 || GET_MODE (target) != tmode
8288 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8289 target = gen_reg_rtx (tmode);
8291 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8292 op0 = copy_to_mode_reg (mode0, op0);
8293 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8294 op1 = copy_to_mode_reg (mode1, op1);
8296 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8297 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8304 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
8307 ix86_expand_sse_comi (d, arglist, target)
8308 struct builtin_description *d;
8313 tree arg0 = TREE_VALUE (arglist);
8314 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8315 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8316 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8318 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8319 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8320 enum rtx_code comparison = d->comparison;
8322 if (VECTOR_MODE_P (mode0))
8323 op0 = safe_vector_operand (op0, mode0);
8324 if (VECTOR_MODE_P (mode1))
8325 op1 = safe_vector_operand (op1, mode1);
8327 /* Swap operands if we have a comparison that isn't available in
8334 comparison = swap_condition (comparison);
8337 target = gen_reg_rtx (SImode);
8338 emit_move_insn (target, const0_rtx);
8339 target = gen_rtx_SUBREG (QImode, target, 0);
8341 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8342 op0 = copy_to_mode_reg (mode0, op0);
8343 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8344 op1 = copy_to_mode_reg (mode1, op1);
8346 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8347 pat = GEN_FCN (d->icode) (op0, op1, op2);
8351 emit_insn (gen_setcc_2 (target, op2));
8356 /* Expand an expression EXP that calls a built-in function,
8357 with result going to TARGET if that's convenient
8358 (and in mode MODE if that's convenient).
8359 SUBTARGET may be used as the target for computing one of EXP's operands.
8360 IGNORE is nonzero if the value is to be ignored. */
8363 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8366 rtx subtarget ATTRIBUTE_UNUSED;
8367 enum machine_mode mode ATTRIBUTE_UNUSED;
8368 int ignore ATTRIBUTE_UNUSED;
8370 struct builtin_description *d;
8372 enum insn_code icode;
8373 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8374 tree arglist = TREE_OPERAND (exp, 1);
8375 tree arg0, arg1, arg2, arg3;
8376 rtx op0, op1, op2, pat;
8377 enum machine_mode tmode, mode0, mode1, mode2;
8378 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8382 case IX86_BUILTIN_EMMS:
8383 emit_insn (gen_emms ());
8386 case IX86_BUILTIN_SFENCE:
8387 emit_insn (gen_sfence ());
8390 case IX86_BUILTIN_M_FROM_INT:
8391 target = gen_reg_rtx (DImode);
8392 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8393 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8396 case IX86_BUILTIN_M_TO_INT:
8397 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8398 op0 = copy_to_mode_reg (DImode, op0);
8399 target = gen_reg_rtx (SImode);
8400 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8403 case IX86_BUILTIN_PEXTRW:
8404 icode = CODE_FOR_mmx_pextrw;
8405 arg0 = TREE_VALUE (arglist);
8406 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8407 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8408 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8409 tmode = insn_data[icode].operand[0].mode;
8410 mode0 = insn_data[icode].operand[1].mode;
8411 mode1 = insn_data[icode].operand[2].mode;
8413 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8414 op0 = copy_to_mode_reg (mode0, op0);
8415 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8417 /* @@@ better error message */
8418 error ("selector must be an immediate");
8422 || GET_MODE (target) != tmode
8423 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8424 target = gen_reg_rtx (tmode);
8425 pat = GEN_FCN (icode) (target, op0, op1);
8431 case IX86_BUILTIN_PINSRW:
8432 icode = CODE_FOR_mmx_pinsrw;
8433 arg0 = TREE_VALUE (arglist);
8434 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8435 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8436 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8437 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8438 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8439 tmode = insn_data[icode].operand[0].mode;
8440 mode0 = insn_data[icode].operand[1].mode;
8441 mode1 = insn_data[icode].operand[2].mode;
8442 mode2 = insn_data[icode].operand[3].mode;
8444 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8445 op0 = copy_to_mode_reg (mode0, op0);
8446 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8447 op1 = copy_to_mode_reg (mode1, op1);
8448 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8450 /* @@@ better error message */
8451 error ("selector must be an immediate");
8455 || GET_MODE (target) != tmode
8456 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8457 target = gen_reg_rtx (tmode);
8458 pat = GEN_FCN (icode) (target, op0, op1, op2);
8464 case IX86_BUILTIN_MASKMOVQ:
8465 icode = CODE_FOR_mmx_maskmovq;
8466 /* Note the arg order is different from the operand order. */
8467 arg1 = TREE_VALUE (arglist);
8468 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8469 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8470 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8471 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8472 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8473 mode0 = insn_data[icode].operand[0].mode;
8474 mode1 = insn_data[icode].operand[1].mode;
8475 mode2 = insn_data[icode].operand[2].mode;
8477 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8478 op0 = copy_to_mode_reg (mode0, op0);
8479 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8480 op1 = copy_to_mode_reg (mode1, op1);
8481 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8482 op2 = copy_to_mode_reg (mode2, op2);
8483 pat = GEN_FCN (icode) (op0, op1, op2);
8489 case IX86_BUILTIN_SQRTSS:
8490 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8491 case IX86_BUILTIN_RSQRTSS:
8492 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8493 case IX86_BUILTIN_RCPSS:
8494 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8496 case IX86_BUILTIN_LOADAPS:
8497 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8499 case IX86_BUILTIN_LOADUPS:
8500 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8502 case IX86_BUILTIN_STOREAPS:
8503 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8504 case IX86_BUILTIN_STOREUPS:
8505 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8507 case IX86_BUILTIN_LOADSS:
8508 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8510 case IX86_BUILTIN_STORESS:
8511 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8513 case IX86_BUILTIN_LOADHPS:
8514 case IX86_BUILTIN_LOADLPS:
8515 icode = (fcode == IX86_BUILTIN_LOADHPS
8516 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8517 arg0 = TREE_VALUE (arglist);
8518 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8519 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8520 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8521 tmode = insn_data[icode].operand[0].mode;
8522 mode0 = insn_data[icode].operand[1].mode;
8523 mode1 = insn_data[icode].operand[2].mode;
8525 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8526 op0 = copy_to_mode_reg (mode0, op0);
8527 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8529 || GET_MODE (target) != tmode
8530 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8531 target = gen_reg_rtx (tmode);
8532 pat = GEN_FCN (icode) (target, op0, op1);
8538 case IX86_BUILTIN_STOREHPS:
8539 case IX86_BUILTIN_STORELPS:
8540 icode = (fcode == IX86_BUILTIN_STOREHPS
8541 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8542 arg0 = TREE_VALUE (arglist);
8543 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8544 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8545 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8546 mode0 = insn_data[icode].operand[1].mode;
8547 mode1 = insn_data[icode].operand[2].mode;
8549 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8550 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8551 op1 = copy_to_mode_reg (mode1, op1);
8553 pat = GEN_FCN (icode) (op0, op0, op1);
8559 case IX86_BUILTIN_MOVNTPS:
8560 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8561 case IX86_BUILTIN_MOVNTQ:
8562 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8564 case IX86_BUILTIN_LDMXCSR:
8565 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8566 target = assign_386_stack_local (SImode, 0);
8567 emit_move_insn (target, op0);
8568 emit_insn (gen_ldmxcsr (target));
8571 case IX86_BUILTIN_STMXCSR:
8572 target = assign_386_stack_local (SImode, 0);
8573 emit_insn (gen_stmxcsr (target));
8574 return copy_to_mode_reg (SImode, target);
8576 case IX86_BUILTIN_PREFETCH:
8577 icode = CODE_FOR_prefetch;
8578 arg0 = TREE_VALUE (arglist);
8579 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8580 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8581 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8582 mode0 = insn_data[icode].operand[0].mode;
8583 mode1 = insn_data[icode].operand[1].mode;
8585 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8587 /* @@@ better error message */
8588 error ("selector must be an immediate");
8592 op0 = copy_to_mode_reg (Pmode, op0);
8593 pat = GEN_FCN (icode) (op0, op1);
8599 case IX86_BUILTIN_SHUFPS:
8600 icode = CODE_FOR_sse_shufps;
8601 arg0 = TREE_VALUE (arglist);
8602 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8603 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8604 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8605 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8606 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8607 tmode = insn_data[icode].operand[0].mode;
8608 mode0 = insn_data[icode].operand[1].mode;
8609 mode1 = insn_data[icode].operand[2].mode;
8610 mode2 = insn_data[icode].operand[3].mode;
8612 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8613 op0 = copy_to_mode_reg (mode0, op0);
8614 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8615 op1 = copy_to_mode_reg (mode1, op1);
8616 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8618 /* @@@ better error message */
8619 error ("mask must be an immediate");
8623 || GET_MODE (target) != tmode
8624 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8625 target = gen_reg_rtx (tmode);
8626 pat = GEN_FCN (icode) (target, op0, op1, op2);
8632 case IX86_BUILTIN_PSHUFW:
8633 icode = CODE_FOR_mmx_pshufw;
8634 arg0 = TREE_VALUE (arglist);
8635 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8636 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8637 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8638 tmode = insn_data[icode].operand[0].mode;
8639 mode0 = insn_data[icode].operand[2].mode;
8640 mode1 = insn_data[icode].operand[3].mode;
8642 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8643 op0 = copy_to_mode_reg (mode0, op0);
8644 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8646 /* @@@ better error message */
8647 error ("mask must be an immediate");
8651 || GET_MODE (target) != tmode
8652 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8653 target = gen_reg_rtx (tmode);
8654 pat = GEN_FCN (icode) (target, target, op0, op1);
8660 /* Composite intrinsics. */
8661 case IX86_BUILTIN_SETPS1:
8662 target = assign_386_stack_local (SFmode, 0);
8663 arg0 = TREE_VALUE (arglist);
8664 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8665 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8666 op0 = gen_reg_rtx (V4SFmode);
8667 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8668 XEXP (target, 0))));
8669 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8672 case IX86_BUILTIN_SETPS:
8673 target = assign_386_stack_local (V4SFmode, 0);
8674 op0 = change_address (target, SFmode, XEXP (target, 0));
8675 arg0 = TREE_VALUE (arglist);
8676 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8677 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8678 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8679 emit_move_insn (op0,
8680 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8681 emit_move_insn (adj_offsettable_operand (op0, 4),
8682 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8683 emit_move_insn (adj_offsettable_operand (op0, 8),
8684 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8685 emit_move_insn (adj_offsettable_operand (op0, 12),
8686 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8687 op0 = gen_reg_rtx (V4SFmode);
8688 emit_insn (gen_sse_movaps (op0, target));
8691 case IX86_BUILTIN_CLRPS:
8692 target = gen_reg_rtx (TImode);
8693 emit_insn (gen_sse_clrti (target));
8696 case IX86_BUILTIN_LOADRPS:
8697 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8698 gen_reg_rtx (V4SFmode), 1);
8699 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8702 case IX86_BUILTIN_LOADPS1:
8703 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8704 gen_reg_rtx (V4SFmode), 1);
8705 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8708 case IX86_BUILTIN_STOREPS1:
8709 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8710 case IX86_BUILTIN_STORERPS:
8711 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8713 case IX86_BUILTIN_MMX_ZERO:
8714 target = gen_reg_rtx (DImode);
8715 emit_insn (gen_mmx_clrdi (target));
8722 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8723 if (d->code == fcode)
8725 /* Compares are treated specially. */
8726 if (d->icode == CODE_FOR_maskcmpv4sf3
8727 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8728 || d->icode == CODE_FOR_maskncmpv4sf3
8729 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8730 return ix86_expand_sse_compare (d, arglist, target);
8732 return ix86_expand_binop_builtin (d->icode, arglist, target);
8735 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8736 if (d->code == fcode)
8737 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
8739 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8740 if (d->code == fcode)
8741 return ix86_expand_sse_comi (d, arglist, target);
8743 /* @@@ Should really do something sensible here. */
8747 /* Store OPERAND to the memory after reload is completed. This means
8748 that we can't easilly use assign_stack_local. */
8750 ix86_force_to_memory (mode, operand)
8751 enum machine_mode mode;
8754 if (!reload_completed)
8761 split_di (&operand, 1, operands, operands+1);
8763 gen_rtx_SET (VOIDmode,
8764 gen_rtx_MEM (SImode,
8765 gen_rtx_PRE_DEC (Pmode,
8766 stack_pointer_rtx)),
8769 gen_rtx_SET (VOIDmode,
8770 gen_rtx_MEM (SImode,
8771 gen_rtx_PRE_DEC (Pmode,
8772 stack_pointer_rtx)),
8777 /* It is better to store HImodes as SImodes. */
8778 if (!TARGET_PARTIAL_REG_STALL)
8779 operand = gen_lowpart (SImode, operand);
8783 gen_rtx_SET (VOIDmode,
8784 gen_rtx_MEM (GET_MODE (operand),
8785 gen_rtx_PRE_DEC (SImode,
8786 stack_pointer_rtx)),
8792 return gen_rtx_MEM (mode, stack_pointer_rtx);
8795 /* Free operand from the memory. */
8797 ix86_free_from_memory (mode)
8798 enum machine_mode mode;
8800 /* Use LEA to deallocate stack space. In peephole2 it will be converted
8801 to pop or add instruction if registers are available. */
8802 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8803 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8804 GEN_INT (mode == DImode
8806 : mode == HImode && TARGET_PARTIAL_REG_STALL
8811 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
8812 QImode must go into class Q_REGS.
8813 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
8814 movdf to do mem-to-mem moves through integer regs. */
8816 ix86_preferred_reload_class (x, class)
8818 enum reg_class class;
8820 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
8822 /* SSE can't load any constant directly yet. */
8823 if (SSE_CLASS_P (class))
8825 /* Floats can load 0 and 1. */
8826 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
8828 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
8829 if (MAYBE_SSE_CLASS_P (class))
8830 return (reg_class_subset_p (class, GENERAL_REGS)
8831 ? GENERAL_REGS : FLOAT_REGS);
8835 /* General regs can load everything. */
8836 if (reg_class_subset_p (class, GENERAL_REGS))
8837 return GENERAL_REGS;
8838 /* In case we haven't resolved FLOAT or SSE yet, give up. */
8839 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
8842 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
8844 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
8849 /* If we are copying between general and FP registers, we need a memory
8850 location. The same is true for SSE and MMX registers.
8852 The macro can't work reliably when one of the CLASSES is class containing
8853 registers from multiple units (SSE, MMX, integer). We avoid this by never
8854 combining those units in single alternative in the machine description.
8855 Ensure that this constraint holds to avoid unexpected surprises.
8857 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
8858 enforce these sanity checks. */
8860 ix86_secondary_memory_needed (class1, class2, mode, strict)
8861 enum reg_class class1, class2;
8862 enum machine_mode mode;
8865 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
8866 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
8867 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
8868 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
8869 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
8870 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
8877 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8878 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
8879 && (mode) != SImode)
8880 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
8881 && (mode) != SImode));
8883 /* Return the cost of moving data from a register in class CLASS1 to
8884 one in class CLASS2.
8886 It is not required that the cost always equal 2 when FROM is the same as TO;
8887 on some machines it is expensive to move between registers if they are not
8888 general registers. */
8890 ix86_register_move_cost (mode, class1, class2)
8891 enum machine_mode mode;
8892 enum reg_class class1, class2;
8894 /* In case we require secondary memory, compute cost of the store followed
8895 by load. In case of copying from general_purpose_register we may emit
8896 multiple stores followed by single load causing memory size mismatch
8897 stall. Count this as arbitarily high cost of 20. */
8898 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
8900 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
8902 return (MEMORY_MOVE_COST (mode, class1, 0)
8903 + MEMORY_MOVE_COST (mode, class2, 1));
8905 /* Moves between SSE/MMX and integer unit are expensive.
8906 ??? We should make this cost CPU specific. */
8907 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
8908 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
8909 return ix86_cost->mmxsse_to_integer;
8910 if (MAYBE_FLOAT_CLASS_P (class1))
8911 return ix86_cost->fp_move;
8912 if (MAYBE_SSE_CLASS_P (class1))
8913 return ix86_cost->sse_move;
8914 if (MAYBE_MMX_CLASS_P (class1))
8915 return ix86_cost->mmx_move;
8919 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
8921 ix86_hard_regno_mode_ok (regno, mode)
8923 enum machine_mode mode;
8925 /* Flags and only flags can only hold CCmode values. */
8926 if (CC_REGNO_P (regno))
8927 return GET_MODE_CLASS (mode) == MODE_CC;
8928 if (GET_MODE_CLASS (mode) == MODE_CC
8929 || GET_MODE_CLASS (mode) == MODE_RANDOM
8930 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
8932 if (FP_REGNO_P (regno))
8933 return VALID_FP_MODE_P (mode);
8934 if (SSE_REGNO_P (regno))
8935 return VALID_SSE_REG_MODE (mode);
8936 if (MMX_REGNO_P (regno))
8937 return VALID_MMX_REG_MODE (mode);
8938 /* We handle both integer and floats in the general purpose registers.
8939 In future we should be able to handle vector modes as well. */
8940 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
8942 /* Take care for QImode values - they can be in non-QI regs, but then
8943 they do cause partial register stalls. */
8944 if (regno < 4 || mode != QImode)
8946 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
8949 /* Return the cost of moving data of mode M between a
8950 register and memory. A value of 2 is the default; this cost is
8951 relative to those in `REGISTER_MOVE_COST'.
8953 If moving between registers and memory is more expensive than
8954 between two registers, you should define this macro to express the
8957 Model also increased moving costs of QImode registers in non
8961 ix86_memory_move_cost (mode, class, in)
8962 enum machine_mode mode;
8963 enum reg_class class;
8966 if (FLOAT_CLASS_P (class))
8984 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
8986 if (SSE_CLASS_P (class))
8989 switch (GET_MODE_SIZE (mode))
9003 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
9005 if (MMX_CLASS_P (class))
9008 switch (GET_MODE_SIZE (mode))
9019 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
9021 switch (GET_MODE_SIZE (mode))
9025 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
9026 : ix86_cost->movzbl_load);
9028 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
9029 : ix86_cost->int_store[0] + 4);
9032 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
9034 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
9037 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
9038 * (int) GET_MODE_SIZE (mode) / 4);