1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8}, /* cost of loading integer registers */
69 2, /* cost of moving MMX register */
70 {4, 8}, /* cost of loading MMX registers
71 in SImode and DImode */
72 {4, 8}, /* cost of storing MMX registers
73 in SImode and DImode */
74 2, /* cost of moving SSE register */
75 {4, 8, 16}, /* cost of loading SSE registers
76 in SImode, DImode and TImode */
77 {4, 8, 16}, /* cost of storing SSE registers
78 in SImode, DImode and TImode */
79 3, /* MMX or SSE register to integer */
82 struct processor_costs i486_cost = { /* 486 specific costs */
83 1, /* cost of an add instruction */
84 1, /* cost of a lea instruction */
85 3, /* variable shift costs */
86 2, /* constant shift costs */
87 12, /* cost of starting a multiply */
88 1, /* cost of multiply per each bit set */
89 40, /* cost of a divide/mod */
90 15, /* "large" insn */
92 4, /* cost for loading QImode using movzbl */
93 {2, 4, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 4, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {8, 8, 8}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {8, 8, 8}, /* cost of loading integer registers */
101 2, /* cost of moving MMX register */
102 {4, 8}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {4, 8}, /* cost of storing MMX registers
105 in SImode and DImode */
106 2, /* cost of moving SSE register */
107 {4, 8, 16}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {4, 8, 16}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3 /* MMX or SSE register to integer */
114 struct processor_costs pentium_cost = {
115 1, /* cost of an add instruction */
116 1, /* cost of a lea instruction */
117 4, /* variable shift costs */
118 1, /* constant shift costs */
119 11, /* cost of starting a multiply */
120 0, /* cost of multiply per each bit set */
121 25, /* cost of a divide/mod */
122 8, /* "large" insn */
124 6, /* cost for loading QImode using movzbl */
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 6}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {4, 4, 6}, /* cost of loading integer registers */
133 8, /* cost of moving MMX register */
134 {8, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {8, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3 /* MMX or SSE register to integer */
146 struct processor_costs pentiumpro_cost = {
147 1, /* cost of an add instruction */
148 1, /* cost of a lea instruction */
149 1, /* variable shift costs */
150 1, /* constant shift costs */
151 4, /* cost of starting a multiply */
152 0, /* cost of multiply per each bit set */
153 17, /* cost of a divide/mod */
154 8, /* "large" insn */
156 2, /* cost for loading QImode using movzbl */
157 {4, 4, 4}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 6}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {4, 4, 6}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {2, 2}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {2, 2}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {2, 2, 8}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {2, 2, 8}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3 /* MMX or SSE register to integer */
178 struct processor_costs k6_cost = {
179 1, /* cost of an add instruction */
180 2, /* cost of a lea instruction */
181 1, /* variable shift costs */
182 1, /* constant shift costs */
183 3, /* cost of starting a multiply */
184 0, /* cost of multiply per each bit set */
185 18, /* cost of a divide/mod */
186 8, /* "large" insn */
188 3, /* cost for loading QImode using movzbl */
189 {4, 5, 4}, /* cost of loading integer registers
190 in QImode, HImode and SImode.
191 Relative to reg-reg move (2). */
192 {2, 3, 2}, /* cost of storing integer registers */
193 4, /* cost of reg,reg fld/fst */
194 {6, 6, 6}, /* cost of loading fp registers
195 in SFmode, DFmode and XFmode */
196 {4, 4, 4}, /* cost of loading integer registers */
197 2, /* cost of moving MMX register */
198 {2, 2}, /* cost of loading MMX registers
199 in SImode and DImode */
200 {2, 2}, /* cost of storing MMX registers
201 in SImode and DImode */
202 2, /* cost of moving SSE register */
203 {2, 2, 8}, /* cost of loading SSE registers
204 in SImode, DImode and TImode */
205 {2, 2, 8}, /* cost of storing SSE registers
206 in SImode, DImode and TImode */
207 6 /* MMX or SSE register to integer */
210 struct processor_costs athlon_cost = {
211 1, /* cost of an add instruction */
212 2, /* cost of a lea instruction */
213 1, /* variable shift costs */
214 1, /* constant shift costs */
215 5, /* cost of starting a multiply */
216 0, /* cost of multiply per each bit set */
217 42, /* cost of a divide/mod */
218 8, /* "large" insn */
220 4, /* cost for loading QImode using movzbl */
221 {4, 5, 4}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
223 Relative to reg-reg move (2). */
224 {2, 3, 2}, /* cost of storing integer registers */
225 4, /* cost of reg,reg fld/fst */
226 {6, 6, 20}, /* cost of loading fp registers
227 in SFmode, DFmode and XFmode */
228 {4, 4, 16}, /* cost of loading integer registers */
229 2, /* cost of moving MMX register */
230 {2, 2}, /* cost of loading MMX registers
231 in SImode and DImode */
232 {2, 2}, /* cost of storing MMX registers
233 in SImode and DImode */
234 2, /* cost of moving SSE register */
235 {2, 2, 8}, /* cost of loading SSE registers
236 in SImode, DImode and TImode */
237 {2, 2, 8}, /* cost of storing SSE registers
238 in SImode, DImode and TImode */
239 6 /* MMX or SSE register to integer */
242 struct processor_costs pentium4_cost = {
243 1, /* cost of an add instruction */
244 1, /* cost of a lea instruction */
245 8, /* variable shift costs */
246 8, /* constant shift costs */
247 30, /* cost of starting a multiply */
248 0, /* cost of multiply per each bit set */
249 112, /* cost of a divide/mod */
250 16, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 5, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 3, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 12, /* cost of moving SSE register */
267 {12, 12, 12}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 10, /* MMX or SSE register to integer */
274 struct processor_costs *ix86_cost = &pentium_cost;
276 /* Processor feature/optimization bitmasks. */
277 #define m_386 (1<<PROCESSOR_I386)
278 #define m_486 (1<<PROCESSOR_I486)
279 #define m_PENT (1<<PROCESSOR_PENTIUM)
280 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
281 #define m_K6 (1<<PROCESSOR_K6)
282 #define m_ATHLON (1<<PROCESSOR_ATHLON)
283 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
285 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
286 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
287 const int x86_zero_extend_with_and = m_486 | m_PENT;
288 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
289 const int x86_double_with_add = ~m_386;
290 const int x86_use_bit_test = m_386;
291 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
292 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
293 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
294 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
295 const int x86_partial_reg_stall = m_PPRO;
296 const int x86_use_loop = m_K6;
297 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
298 const int x86_use_mov0 = m_K6;
299 const int x86_use_cltd = ~(m_PENT | m_K6);
300 const int x86_read_modify_write = ~m_PENT;
301 const int x86_read_modify = ~(m_PENT | m_PPRO);
302 const int x86_split_long_moves = m_PPRO;
303 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
304 const int x86_single_stringop = m_386 | m_PENT4;
305 const int x86_qimode_math = ~(0);
306 const int x86_promote_qi_regs = 0;
307 const int x86_himode_math = ~(m_PPRO);
308 const int x86_promote_hi_regs = m_PPRO;
309 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
310 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
311 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
312 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
313 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
314 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
315 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
317 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
319 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
320 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
321 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
323 /* Array of the smallest class containing reg number REGNO, indexed by
324 REGNO. Used by REGNO_REG_CLASS in i386.h. */
326 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
329 AREG, DREG, CREG, BREG,
331 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
333 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
334 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
337 /* flags, fpsr, dirflag, frame */
338 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
339 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
341 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
345 /* The "default" register map. */
347 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
349 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
350 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
351 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
352 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
353 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
356 /* Define the register numbers to be used in Dwarf debugging information.
357 The SVR4 reference port C compiler uses the following register numbers
358 in its Dwarf output code:
359 0 for %eax (gcc regno = 0)
360 1 for %ecx (gcc regno = 2)
361 2 for %edx (gcc regno = 1)
362 3 for %ebx (gcc regno = 3)
363 4 for %esp (gcc regno = 7)
364 5 for %ebp (gcc regno = 6)
365 6 for %esi (gcc regno = 4)
366 7 for %edi (gcc regno = 5)
367 The following three DWARF register numbers are never generated by
368 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
369 believes these numbers have these meanings.
370 8 for %eip (no gcc equivalent)
371 9 for %eflags (gcc regno = 17)
372 10 for %trapno (no gcc equivalent)
373 It is not at all clear how we should number the FP stack registers
374 for the x86 architecture. If the version of SDB on x86/svr4 were
375 a bit less brain dead with respect to floating-point then we would
376 have a precedent to follow with respect to DWARF register numbers
377 for x86 FP registers, but the SDB on x86/svr4 is so completely
378 broken with respect to FP registers that it is hardly worth thinking
379 of it as something to strive for compatibility with.
380 The version of x86/svr4 SDB I have at the moment does (partially)
381 seem to believe that DWARF register number 11 is associated with
382 the x86 register %st(0), but that's about all. Higher DWARF
383 register numbers don't seem to be associated with anything in
384 particular, and even for DWARF regno 11, SDB only seems to under-
385 stand that it should say that a variable lives in %st(0) (when
386 asked via an `=' command) if we said it was in DWARF regno 11,
387 but SDB still prints garbage when asked for the value of the
388 variable in question (via a `/' command).
389 (Also note that the labels SDB prints for various FP stack regs
390 when doing an `x' command are all wrong.)
391 Note that these problems generally don't affect the native SVR4
392 C compiler because it doesn't allow the use of -O with -g and
393 because when it is *not* optimizing, it allocates a memory
394 location for each floating-point variable, and the memory
395 location is what gets described in the DWARF AT_location
396 attribute for the variable in question.
397 Regardless of the severe mental illness of the x86/svr4 SDB, we
398 do something sensible here and we use the following DWARF
399 register numbers. Note that these are all stack-top-relative
401 11 for %st(0) (gcc regno = 8)
402 12 for %st(1) (gcc regno = 9)
403 13 for %st(2) (gcc regno = 10)
404 14 for %st(3) (gcc regno = 11)
405 15 for %st(4) (gcc regno = 12)
406 16 for %st(5) (gcc regno = 13)
407 17 for %st(6) (gcc regno = 14)
408 18 for %st(7) (gcc regno = 15)
410 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
412 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
413 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
414 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
415 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
416 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
419 /* Test and compare insns in i386.md store the information needed to
420 generate branch and scc insns here. */
422 struct rtx_def *ix86_compare_op0 = NULL_RTX;
423 struct rtx_def *ix86_compare_op1 = NULL_RTX;
425 #define MAX_386_STACK_LOCALS 2
427 /* Define the structure for the machine field in struct function. */
428 struct machine_function
430 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
431 int accesses_prev_frame;
434 #define ix86_stack_locals (cfun->machine->stack_locals)
436 /* Structure describing stack frame layout.
437 Stack grows downward:
443 saved frame pointer if frame_pointer_needed
444 <- HARD_FRAME_POINTER
450 > to_allocate <- FRAME_POINTER
461 int outgoing_arguments_size;
463 HOST_WIDE_INT to_allocate;
464 /* The offsets relative to ARG_POINTER. */
465 HOST_WIDE_INT frame_pointer_offset;
466 HOST_WIDE_INT hard_frame_pointer_offset;
467 HOST_WIDE_INT stack_pointer_offset;
470 /* which cpu are we scheduling for */
471 enum processor_type ix86_cpu;
473 /* which instruction set architecture to use. */
476 /* Strings to hold which cpu and instruction set architecture to use. */
477 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
478 const char *ix86_arch_string; /* for -march=<xxx> */
480 /* # of registers to use to pass arguments. */
481 const char *ix86_regparm_string;
483 /* ix86_regparm_string as a number */
486 /* Alignment to use for loops and jumps: */
488 /* Power of two alignment for loops. */
489 const char *ix86_align_loops_string;
491 /* Power of two alignment for non-loop jumps. */
492 const char *ix86_align_jumps_string;
494 /* Power of two alignment for stack boundary in bytes. */
495 const char *ix86_preferred_stack_boundary_string;
497 /* Preferred alignment for stack boundary in bits. */
498 int ix86_preferred_stack_boundary;
500 /* Values 1-5: see jump.c */
501 int ix86_branch_cost;
502 const char *ix86_branch_cost_string;
504 /* Power of two alignment for functions. */
505 int ix86_align_funcs;
506 const char *ix86_align_funcs_string;
508 /* Power of two alignment for loops. */
509 int ix86_align_loops;
511 /* Power of two alignment for non-loop jumps. */
512 int ix86_align_jumps;
514 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
515 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
517 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
518 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
520 static rtx gen_push PARAMS ((rtx));
521 static int memory_address_length PARAMS ((rtx addr));
522 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
523 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
524 static int ix86_safe_length PARAMS ((rtx));
525 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
526 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
527 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
528 static void ix86_dump_ppro_packet PARAMS ((FILE *));
529 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
530 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
532 static void ix86_init_machine_status PARAMS ((struct function *));
533 static void ix86_mark_machine_status PARAMS ((struct function *));
534 static void ix86_free_machine_status PARAMS ((struct function *));
535 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
536 static int ix86_safe_length_prefix PARAMS ((rtx));
537 static int ix86_nsaved_regs PARAMS((void));
538 static void ix86_emit_save_regs PARAMS((void));
539 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
540 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
541 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
542 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
543 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
544 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
548 rtx base, index, disp;
552 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
554 struct builtin_description;
555 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
557 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
559 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
560 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
561 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
562 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
563 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
564 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
565 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
569 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
571 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
572 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
573 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
574 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
575 static int ix86_save_reg PARAMS ((int));
576 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
578 /* Sometimes certain combinations of command options do not make
579 sense on a particular target machine. You can define a macro
580 `OVERRIDE_OPTIONS' to take account of this. This macro, if
581 defined, is executed once just after all the command options have
584 Don't use this macro to turn on various extra optimizations for
585 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
591 /* Comes from final.c -- no real reason to change it. */
592 #define MAX_CODE_ALIGN 16
596 struct processor_costs *cost; /* Processor costs */
597 int target_enable; /* Target flags to enable. */
598 int target_disable; /* Target flags to disable. */
599 int align_loop; /* Default alignments. */
604 const processor_target_table[PROCESSOR_max] =
606 {&i386_cost, 0, 0, 2, 2, 2, 1},
607 {&i486_cost, 0, 0, 4, 4, 4, 1},
608 {&pentium_cost, 0, 0, -4, -4, -4, 1},
609 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
610 {&k6_cost, 0, 0, -5, -5, 4, 1},
611 {&athlon_cost, 0, 0, 4, -4, 4, 1},
612 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
617 const char *name; /* processor name or nickname. */
618 enum processor_type processor;
620 const processor_alias_table[] =
622 {"i386", PROCESSOR_I386},
623 {"i486", PROCESSOR_I486},
624 {"i586", PROCESSOR_PENTIUM},
625 {"pentium", PROCESSOR_PENTIUM},
626 {"i686", PROCESSOR_PENTIUMPRO},
627 {"pentiumpro", PROCESSOR_PENTIUMPRO},
628 {"k6", PROCESSOR_K6},
629 {"athlon", PROCESSOR_ATHLON},
630 {"pentium4", PROCESSOR_PENTIUM4},
633 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
635 #ifdef SUBTARGET_OVERRIDE_OPTIONS
636 SUBTARGET_OVERRIDE_OPTIONS;
639 ix86_arch = PROCESSOR_I386;
640 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
642 if (ix86_arch_string != 0)
644 for (i = 0; i < pta_size; i++)
645 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
647 ix86_arch = processor_alias_table[i].processor;
648 /* Default cpu tuning to the architecture. */
649 ix86_cpu = ix86_arch;
654 error ("bad value (%s) for -march= switch", ix86_arch_string);
657 if (ix86_cpu_string != 0)
659 for (i = 0; i < pta_size; i++)
660 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
662 ix86_cpu = processor_alias_table[i].processor;
666 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
669 ix86_cost = processor_target_table[ix86_cpu].cost;
670 target_flags |= processor_target_table[ix86_cpu].target_enable;
671 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
673 /* Arrange to set up i386_stack_locals for all functions. */
674 init_machine_status = ix86_init_machine_status;
675 mark_machine_status = ix86_mark_machine_status;
676 free_machine_status = ix86_free_machine_status;
678 /* Validate -mregparm= value. */
679 if (ix86_regparm_string)
681 i = atoi (ix86_regparm_string);
682 if (i < 0 || i > REGPARM_MAX)
683 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
688 /* Validate -malign-loops= value, or provide default. */
689 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
690 if (ix86_align_loops_string)
692 i = atoi (ix86_align_loops_string);
693 if (i < 0 || i > MAX_CODE_ALIGN)
694 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
696 ix86_align_loops = i;
699 /* Validate -malign-jumps= value, or provide default. */
700 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
701 if (ix86_align_jumps_string)
703 i = atoi (ix86_align_jumps_string);
704 if (i < 0 || i > MAX_CODE_ALIGN)
705 error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
707 ix86_align_jumps = i;
710 /* Validate -malign-functions= value, or provide default. */
711 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
712 if (ix86_align_funcs_string)
714 i = atoi (ix86_align_funcs_string);
715 if (i < 0 || i > MAX_CODE_ALIGN)
716 error ("-malign-functions=%d is not between 0 and %d",
719 ix86_align_funcs = i;
722 /* Validate -mpreferred-stack-boundary= value, or provide default.
723 The default of 128 bits is for Pentium III's SSE __m128. */
724 ix86_preferred_stack_boundary = 128;
725 if (ix86_preferred_stack_boundary_string)
727 i = atoi (ix86_preferred_stack_boundary_string);
729 error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
731 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
734 /* Validate -mbranch-cost= value, or provide default. */
735 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
736 if (ix86_branch_cost_string)
738 i = atoi (ix86_branch_cost_string);
740 error ("-mbranch-cost=%d is not between 0 and 5", i);
742 ix86_branch_cost = i;
745 /* Keep nonleaf frame pointers. */
746 if (TARGET_OMIT_LEAF_FRAME_POINTER)
747 flag_omit_frame_pointer = 1;
749 /* If we're doing fast math, we don't care about comparison order
750 wrt NaNs. This lets us use a shorter comparison sequence. */
752 target_flags &= ~MASK_IEEE_FP;
754 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
757 target_flags |= MASK_MMX;
761 optimization_options (level, size)
763 int size ATTRIBUTE_UNUSED;
765 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
766 make the problem with not enough registers even worse. */
767 #ifdef INSN_SCHEDULING
769 flag_schedule_insns = 0;
773 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
774 attribute for DECL. The attributes in ATTRIBUTES have previously been
778 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
779 tree decl ATTRIBUTE_UNUSED;
780 tree attributes ATTRIBUTE_UNUSED;
781 tree identifier ATTRIBUTE_UNUSED;
782 tree args ATTRIBUTE_UNUSED;
787 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
788 attribute for TYPE. The attributes in ATTRIBUTES have previously been
792 ix86_valid_type_attribute_p (type, attributes, identifier, args)
794 tree attributes ATTRIBUTE_UNUSED;
798 if (TREE_CODE (type) != FUNCTION_TYPE
799 && TREE_CODE (type) != METHOD_TYPE
800 && TREE_CODE (type) != FIELD_DECL
801 && TREE_CODE (type) != TYPE_DECL)
804 /* Stdcall attribute says callee is responsible for popping arguments
805 if they are not variable. */
806 if (is_attribute_p ("stdcall", identifier))
807 return (args == NULL_TREE);
809 /* Cdecl attribute says the callee is a normal C declaration. */
810 if (is_attribute_p ("cdecl", identifier))
811 return (args == NULL_TREE);
813 /* Regparm attribute specifies how many integer arguments are to be
814 passed in registers. */
815 if (is_attribute_p ("regparm", identifier))
819 if (! args || TREE_CODE (args) != TREE_LIST
820 || TREE_CHAIN (args) != NULL_TREE
821 || TREE_VALUE (args) == NULL_TREE)
824 cst = TREE_VALUE (args);
825 if (TREE_CODE (cst) != INTEGER_CST)
828 if (compare_tree_int (cst, REGPARM_MAX) > 0)
837 /* Return 0 if the attributes for two types are incompatible, 1 if they
838 are compatible, and 2 if they are nearly compatible (which causes a
839 warning to be generated). */
842 ix86_comp_type_attributes (type1, type2)
846 /* Check for mismatch of non-default calling convention. */
847 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
849 if (TREE_CODE (type1) != FUNCTION_TYPE)
852 /* Check for mismatched return types (cdecl vs stdcall). */
853 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
854 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
859 /* Value is the number of bytes of arguments automatically
860 popped when returning from a subroutine call.
861 FUNDECL is the declaration node of the function (as a tree),
862 FUNTYPE is the data type of the function (as a tree),
863 or for a library call it is an identifier node for the subroutine name.
864 SIZE is the number of bytes of arguments passed on the stack.
866 On the 80386, the RTD insn may be used to pop them if the number
867 of args is fixed, but if the number is variable then the caller
868 must pop them all. RTD can't be used for library calls now
869 because the library is compiled with the Unix compiler.
870 Use of RTD is a selectable option, since it is incompatible with
871 standard Unix calling sequences. If the option is not selected,
872 the caller must always pop the args.
874 The attribute stdcall is equivalent to RTD on a per module basis. */
877 ix86_return_pops_args (fundecl, funtype, size)
882 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
884 /* Cdecl functions override -mrtd, and never pop the stack. */
885 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
887 /* Stdcall functions will pop the stack if not variable args. */
888 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
892 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
893 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
898 /* Lose any fake structure return argument. */
899 if (aggregate_value_p (TREE_TYPE (funtype)))
900 return GET_MODE_SIZE (Pmode);
905 /* Argument support functions. */
907 /* Initialize a variable CUM of type CUMULATIVE_ARGS
908 for a call to a function whose data type is FNTYPE.
909 For a library call, FNTYPE is 0. */
912 init_cumulative_args (cum, fntype, libname)
913 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
914 tree fntype; /* tree ptr for function decl */
915 rtx libname; /* SYMBOL_REF of library name or 0 */
917 static CUMULATIVE_ARGS zero_cum;
918 tree param, next_param;
920 if (TARGET_DEBUG_ARG)
922 fprintf (stderr, "\ninit_cumulative_args (");
924 fprintf (stderr, "fntype code = %s, ret code = %s",
925 tree_code_name[(int) TREE_CODE (fntype)],
926 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
928 fprintf (stderr, "no fntype");
931 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
936 /* Set up the number of registers to use for passing arguments. */
937 cum->nregs = ix86_regparm;
940 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
943 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
946 /* Determine if this function has variable arguments. This is
947 indicated by the last argument being 'void_type_mode' if there
948 are no variable arguments. If there are variable arguments, then
949 we won't pass anything in registers */
953 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
954 param != 0; param = next_param)
956 next_param = TREE_CHAIN (param);
957 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
962 if (TARGET_DEBUG_ARG)
963 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
968 /* Update the data in CUM to advance over an argument
969 of mode MODE and data type TYPE.
970 (TYPE is null for libcalls where that information may not be available.) */
973 function_arg_advance (cum, mode, type, named)
974 CUMULATIVE_ARGS *cum; /* current arg information */
975 enum machine_mode mode; /* current arg mode */
976 tree type; /* type of the argument or 0 if lib support */
977 int named; /* whether or not the argument was named */
980 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
981 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
983 if (TARGET_DEBUG_ARG)
985 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
986 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
987 if (TARGET_SSE && mode == TImode)
989 cum->sse_words += words;
992 if (cum->sse_nregs <= 0)
1000 cum->words += words;
1001 cum->nregs -= words;
1002 cum->regno += words;
1004 if (cum->nregs <= 0)
1013 /* Define where to put the arguments to a function.
1014 Value is zero to push the argument on the stack,
1015 or a hard register in which to store the argument.
1017 MODE is the argument's machine mode.
1018 TYPE is the data type of the argument (as a tree).
1019 This is null for libcalls where that information may
1021 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1022 the preceding args and about the function being called.
1023 NAMED is nonzero if this argument is a named parameter
1024 (otherwise it is an extra parameter matching an ellipsis). */
1027 function_arg (cum, mode, type, named)
1028 CUMULATIVE_ARGS *cum; /* current arg information */
1029 enum machine_mode mode; /* current arg mode */
1030 tree type; /* type of the argument or 0 if lib support */
1031 int named; /* != 0 for normal args, == 0 for ... args */
1035 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1036 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1040 /* For now, pass fp/complex values on the stack. */
1049 if (words <= cum->nregs)
1050 ret = gen_rtx_REG (mode, cum->regno);
1054 ret = gen_rtx_REG (mode, cum->sse_regno);
1058 if (TARGET_DEBUG_ARG)
1061 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1062 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1065 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1067 fprintf (stderr, ", stack");
1069 fprintf (stderr, " )\n");
1076 /* Return nonzero if OP is (const_int 1), else return zero. */
1079 const_int_1_operand (op, mode)
1081 enum machine_mode mode ATTRIBUTE_UNUSED;
1083 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1086 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1087 reference and a constant. */
1090 symbolic_operand (op, mode)
1092 enum machine_mode mode ATTRIBUTE_UNUSED;
1094 switch (GET_CODE (op))
1102 if (GET_CODE (op) == SYMBOL_REF
1103 || GET_CODE (op) == LABEL_REF
1104 || (GET_CODE (op) == UNSPEC
1105 && XINT (op, 1) >= 6
1106 && XINT (op, 1) <= 7))
1108 if (GET_CODE (op) != PLUS
1109 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1113 if (GET_CODE (op) == SYMBOL_REF
1114 || GET_CODE (op) == LABEL_REF)
1116 /* Only @GOTOFF gets offsets. */
1117 if (GET_CODE (op) != UNSPEC
1118 || XINT (op, 1) != 7)
1121 op = XVECEXP (op, 0, 0);
1122 if (GET_CODE (op) == SYMBOL_REF
1123 || GET_CODE (op) == LABEL_REF)
1132 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1135 pic_symbolic_operand (op, mode)
1137 enum machine_mode mode ATTRIBUTE_UNUSED;
1139 if (GET_CODE (op) == CONST)
1142 if (GET_CODE (op) == UNSPEC)
1144 if (GET_CODE (op) != PLUS
1145 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1148 if (GET_CODE (op) == UNSPEC)
1154 /* Test for a valid operand for a call instruction. Don't allow the
1155 arg pointer register or virtual regs since they may decay into
1156 reg + const, which the patterns can't handle. */
1159 call_insn_operand (op, mode)
1161 enum machine_mode mode ATTRIBUTE_UNUSED;
1163 /* Disallow indirect through a virtual register. This leads to
1164 compiler aborts when trying to eliminate them. */
1165 if (GET_CODE (op) == REG
1166 && (op == arg_pointer_rtx
1167 || op == frame_pointer_rtx
1168 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1169 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1172 /* Disallow `call 1234'. Due to varying assembler lameness this
1173 gets either rejected or translated to `call .+1234'. */
1174 if (GET_CODE (op) == CONST_INT)
1177 /* Explicitly allow SYMBOL_REF even if pic. */
1178 if (GET_CODE (op) == SYMBOL_REF)
1181 /* Half-pic doesn't allow anything but registers and constants.
1182 We've just taken care of the later. */
1184 return register_operand (op, Pmode);
1186 /* Otherwise we can allow any general_operand in the address. */
1187 return general_operand (op, Pmode);
1191 constant_call_address_operand (op, mode)
1193 enum machine_mode mode ATTRIBUTE_UNUSED;
1195 if (GET_CODE (op) == CONST
1196 && GET_CODE (XEXP (op, 0)) == PLUS
1197 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1198 op = XEXP (XEXP (op, 0), 0);
1199 return GET_CODE (op) == SYMBOL_REF;
1202 /* Match exactly zero and one. */
1205 const0_operand (op, mode)
1207 enum machine_mode mode;
1209 return op == CONST0_RTX (mode);
1213 const1_operand (op, mode)
1215 enum machine_mode mode ATTRIBUTE_UNUSED;
1217 return op == const1_rtx;
1220 /* Match 2, 4, or 8. Used for leal multiplicands. */
1223 const248_operand (op, mode)
1225 enum machine_mode mode ATTRIBUTE_UNUSED;
1227 return (GET_CODE (op) == CONST_INT
1228 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1231 /* True if this is a constant appropriate for an increment or decremenmt. */
1234 incdec_operand (op, mode)
1236 enum machine_mode mode;
1238 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1239 registers, since carry flag is not set. */
1240 if (TARGET_PENTIUM4 && !optimize_size)
1242 if (op == const1_rtx || op == constm1_rtx)
1244 if (GET_CODE (op) != CONST_INT)
1246 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1248 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1250 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1255 /* Return false if this is the stack pointer, or any other fake
1256 register eliminable to the stack pointer. Otherwise, this is
1259 This is used to prevent esp from being used as an index reg.
1260 Which would only happen in pathological cases. */
1263 reg_no_sp_operand (op, mode)
1265 enum machine_mode mode;
1268 if (GET_CODE (t) == SUBREG)
1270 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1273 return register_operand (op, mode);
1277 mmx_reg_operand (op, mode)
1279 enum machine_mode mode ATTRIBUTE_UNUSED;
1281 return MMX_REG_P (op);
1284 /* Return false if this is any eliminable register. Otherwise
1288 general_no_elim_operand (op, mode)
1290 enum machine_mode mode;
1293 if (GET_CODE (t) == SUBREG)
1295 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1296 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1297 || t == virtual_stack_dynamic_rtx)
1300 return general_operand (op, mode);
1303 /* Return false if this is any eliminable register. Otherwise
1304 register_operand or const_int. */
1307 nonmemory_no_elim_operand (op, mode)
1309 enum machine_mode mode;
1312 if (GET_CODE (t) == SUBREG)
1314 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1315 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1316 || t == virtual_stack_dynamic_rtx)
1319 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1322 /* Return true if op is a Q_REGS class register. */
1325 q_regs_operand (op, mode)
1327 enum machine_mode mode;
1329 if (mode != VOIDmode && GET_MODE (op) != mode)
1331 if (GET_CODE (op) == SUBREG)
1332 op = SUBREG_REG (op);
1333 return QI_REG_P (op);
1336 /* Return true if op is a NON_Q_REGS class register. */
1339 non_q_regs_operand (op, mode)
1341 enum machine_mode mode;
1343 if (mode != VOIDmode && GET_MODE (op) != mode)
1345 if (GET_CODE (op) == SUBREG)
1346 op = SUBREG_REG (op);
1347 return NON_QI_REG_P (op);
1350 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1353 sse_comparison_operator (op, mode)
1355 enum machine_mode mode ATTRIBUTE_UNUSED;
1357 enum rtx_code code = GET_CODE (op);
1360 /* Operations supported directly. */
1370 /* These are equivalent to ones above in non-IEEE comparisons. */
1377 return !TARGET_IEEE_FP;
1382 /* Return 1 if OP is a valid comparison operator in valid mode. */
1384 ix86_comparison_operator (op, mode)
1386 enum machine_mode mode;
1388 enum machine_mode inmode;
1389 enum rtx_code code = GET_CODE (op);
1390 if (mode != VOIDmode && GET_MODE (op) != mode)
1392 if (GET_RTX_CLASS (code) != '<')
1394 inmode = GET_MODE (XEXP (op, 0));
1396 if (inmode == CCFPmode || inmode == CCFPUmode)
1398 enum rtx_code second_code, bypass_code;
1399 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1400 return (bypass_code == NIL && second_code == NIL);
1407 if (inmode == CCmode || inmode == CCGCmode
1408 || inmode == CCGOCmode || inmode == CCNOmode)
1411 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1412 if (inmode == CCmode)
1416 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1424 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1427 fcmov_comparison_operator (op, mode)
1429 enum machine_mode mode;
1431 enum machine_mode inmode;
1432 enum rtx_code code = GET_CODE (op);
1433 if (mode != VOIDmode && GET_MODE (op) != mode)
1435 if (GET_RTX_CLASS (code) != '<')
1437 inmode = GET_MODE (XEXP (op, 0));
1438 if (inmode == CCFPmode || inmode == CCFPUmode)
1440 enum rtx_code second_code, bypass_code;
1441 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1442 if (bypass_code != NIL || second_code != NIL)
1444 code = ix86_fp_compare_code_to_integer (code);
1446 /* i387 supports just limited amount of conditional codes. */
1449 case LTU: case GTU: case LEU: case GEU:
1450 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1453 case ORDERED: case UNORDERED:
1461 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1464 promotable_binary_operator (op, mode)
1466 enum machine_mode mode ATTRIBUTE_UNUSED;
1468 switch (GET_CODE (op))
1471 /* Modern CPUs have same latency for HImode and SImode multiply,
1472 but 386 and 486 do HImode multiply faster. */
1473 return ix86_cpu > PROCESSOR_I486;
1485 /* Nearly general operand, but accept any const_double, since we wish
1486 to be able to drop them into memory rather than have them get pulled
1490 cmp_fp_expander_operand (op, mode)
1492 enum machine_mode mode;
1494 if (mode != VOIDmode && mode != GET_MODE (op))
1496 if (GET_CODE (op) == CONST_DOUBLE)
1498 return general_operand (op, mode);
1501 /* Match an SI or HImode register for a zero_extract. */
1504 ext_register_operand (op, mode)
1506 enum machine_mode mode ATTRIBUTE_UNUSED;
1508 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1510 return register_operand (op, VOIDmode);
1513 /* Return 1 if this is a valid binary floating-point operation.
1514 OP is the expression matched, and MODE is its mode. */
1517 binary_fp_operator (op, mode)
1519 enum machine_mode mode;
1521 if (mode != VOIDmode && mode != GET_MODE (op))
1524 switch (GET_CODE (op))
1530 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1538 mult_operator(op, mode)
1540 enum machine_mode mode ATTRIBUTE_UNUSED;
1542 return GET_CODE (op) == MULT;
1546 div_operator(op, mode)
1548 enum machine_mode mode ATTRIBUTE_UNUSED;
1550 return GET_CODE (op) == DIV;
1554 arith_or_logical_operator (op, mode)
1556 enum machine_mode mode;
1558 return ((mode == VOIDmode || GET_MODE (op) == mode)
1559 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1560 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1563 /* Returns 1 if OP is memory operand with a displacement. */
1566 memory_displacement_operand (op, mode)
1568 enum machine_mode mode;
1570 struct ix86_address parts;
1572 if (! memory_operand (op, mode))
1575 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1578 return parts.disp != NULL_RTX;
1581 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1582 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1584 ??? It seems likely that this will only work because cmpsi is an
1585 expander, and no actual insns use this. */
1588 cmpsi_operand (op, mode)
1590 enum machine_mode mode;
1592 if (general_operand (op, mode))
1595 if (GET_CODE (op) == AND
1596 && GET_MODE (op) == SImode
1597 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1598 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1599 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1600 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1601 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1602 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1608 /* Returns 1 if OP is memory operand that can not be represented by the
1612 long_memory_operand (op, mode)
1614 enum machine_mode mode;
1616 if (! memory_operand (op, mode))
1619 return memory_address_length (op) != 0;
1622 /* Return nonzero if the rtx is known aligned. */
1625 aligned_operand (op, mode)
1627 enum machine_mode mode;
1629 struct ix86_address parts;
1631 if (!general_operand (op, mode))
1634 /* Registers and immediate operands are always "aligned". */
1635 if (GET_CODE (op) != MEM)
1638 /* Don't even try to do any aligned optimizations with volatiles. */
1639 if (MEM_VOLATILE_P (op))
1644 /* Pushes and pops are only valid on the stack pointer. */
1645 if (GET_CODE (op) == PRE_DEC
1646 || GET_CODE (op) == POST_INC)
1649 /* Decode the address. */
1650 if (! ix86_decompose_address (op, &parts))
1653 /* Look for some component that isn't known to be aligned. */
1657 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1662 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1667 if (GET_CODE (parts.disp) != CONST_INT
1668 || (INTVAL (parts.disp) & 3) != 0)
1672 /* Didn't find one -- this must be an aligned address. */
1676 /* Return true if the constant is something that can be loaded with
1677 a special instruction. Only handle 0.0 and 1.0; others are less
1681 standard_80387_constant_p (x)
1684 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
1686 /* Note that on the 80387, other constants, such as pi, that we should support
1687 too. On some machines, these are much slower to load as standard constant,
1688 than to load from doubles in memory. */
1689 if (x == CONST0_RTX (GET_MODE (x)))
1691 if (x == CONST1_RTX (GET_MODE (x)))
1696 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1699 standard_sse_constant_p (x)
1702 if (GET_CODE (x) != CONST_DOUBLE)
1704 return (x == CONST0_RTX (GET_MODE (x)));
1707 /* Returns 1 if OP contains a symbol reference */
1710 symbolic_reference_mentioned_p (op)
1713 register const char *fmt;
1716 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1719 fmt = GET_RTX_FORMAT (GET_CODE (op));
1720 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1726 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1727 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1731 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1738 /* Return 1 if it is appropriate to emit `ret' instructions in the
1739 body of a function. Do this only if the epilogue is simple, needing a
1740 couple of insns. Prior to reloading, we can't tell how many registers
1741 must be saved, so return 0 then. Return 0 if there is no frame
1742 marker to de-allocate.
1744 If NON_SAVING_SETJMP is defined and true, then it is not possible
1745 for the epilogue to be simple, so return 0. This is a special case
1746 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1747 until final, but jump_optimize may need to know sooner if a
1751 ix86_can_use_return_insn_p ()
1753 struct ix86_frame frame;
1755 #ifdef NON_SAVING_SETJMP
1756 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1759 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1760 if (profile_block_flag == 2)
1764 if (! reload_completed || frame_pointer_needed)
1767 /* Don't allow more than 32 pop, since that's all we can do
1768 with one instruction. */
1769 if (current_function_pops_args
1770 && current_function_args_size >= 32768)
1773 ix86_compute_frame_layout (&frame);
1774 return frame.to_allocate == 0 && frame.nregs == 0;
1777 /* Value should be nonzero if functions must have frame pointers.
1778 Zero means the frame pointer need not be set up (and parms may
1779 be accessed via the stack pointer) in functions that seem suitable. */
1782 ix86_frame_pointer_required ()
1784 /* If we accessed previous frames, then the generated code expects
1785 to be able to access the saved ebp value in our frame. */
1786 if (cfun->machine->accesses_prev_frame)
1789 /* Several x86 os'es need a frame pointer for other reasons,
1790 usually pertaining to setjmp. */
1791 if (SUBTARGET_FRAME_POINTER_REQUIRED)
1794 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
1795 the frame pointer by default. Turn it back on now if we've not
1796 got a leaf function. */
1797 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
1803 /* Record that the current function accesses previous call frames. */
1806 ix86_setup_frame_addresses ()
1808 cfun->machine->accesses_prev_frame = 1;
1811 static char pic_label_name[32];
1813 /* This function generates code for -fpic that loads %ebx with
1814 the return address of the caller and then returns. */
1817 ix86_asm_file_end (file)
1822 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
1825 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
1826 to updating relocations to a section being discarded such that this
1827 doesn't work. Ought to detect this at configure time. */
1828 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
1829 /* The trick here is to create a linkonce section containing the
1830 pic label thunk, but to refer to it with an internal label.
1831 Because the label is internal, we don't have inter-dso name
1832 binding issues on hosts that don't support ".hidden".
1834 In order to use these macros, however, we must create a fake
1837 tree decl = build_decl (FUNCTION_DECL,
1838 get_identifier ("i686.get_pc_thunk"),
1840 DECL_ONE_ONLY (decl) = 1;
1841 UNIQUE_SECTION (decl, 0);
1842 named_section (decl, NULL, 0);
1848 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1849 internal (non-global) label that's being emitted, it didn't make
1850 sense to have .type information for local labels. This caused
1851 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1852 me debug info for a label that you're declaring non-global?) this
1853 was changed to call ASM_OUTPUT_LABEL() instead. */
1855 ASM_OUTPUT_LABEL (file, pic_label_name);
1857 xops[0] = pic_offset_table_rtx;
1858 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
1859 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1860 output_asm_insn ("ret", xops);
1864 load_pic_register ()
1868 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1870 if (TARGET_DEEP_BRANCH_PREDICTION)
1872 if (! pic_label_name[0])
1873 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1874 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1878 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1881 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1883 if (! TARGET_DEEP_BRANCH_PREDICTION)
1884 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1886 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1889 /* Generate an SImode "push" pattern for input ARG. */
1895 return gen_rtx_SET (VOIDmode,
1896 gen_rtx_MEM (SImode,
1897 gen_rtx_PRE_DEC (SImode,
1898 stack_pointer_rtx)),
1902 /* Return 1 if we need to save REGNO. */
1904 ix86_save_reg (regno)
1907 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1908 || current_function_uses_const_pool);
1909 return ((regs_ever_live[regno] && !call_used_regs[regno]
1910 && !fixed_regs[regno]
1911 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
1912 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used));
1916 /* Return number of registers to be saved on the stack. */
1924 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1925 if (ix86_save_reg (regno))
1930 /* Return the offset between two registers, one to be eliminated, and the other
1931 its replacement, at the start of a routine. */
1934 ix86_initial_elimination_offset (from, to)
1938 struct ix86_frame frame;
1939 ix86_compute_frame_layout (&frame);
1941 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1942 return frame.hard_frame_pointer_offset;
1943 else if (from == FRAME_POINTER_REGNUM
1944 && to == HARD_FRAME_POINTER_REGNUM)
1945 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
1948 if (to != STACK_POINTER_REGNUM)
1950 else if (from == ARG_POINTER_REGNUM)
1951 return frame.stack_pointer_offset;
1952 else if (from != FRAME_POINTER_REGNUM)
1955 return frame.stack_pointer_offset - frame.frame_pointer_offset;
1959 /* Fill structure ix86_frame about frame of currently computed function. */
1962 ix86_compute_frame_layout (frame)
1963 struct ix86_frame *frame;
1965 HOST_WIDE_INT total_size;
1966 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1968 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1969 HOST_WIDE_INT size = get_frame_size ();
1971 frame->nregs = ix86_nsaved_regs ();
1974 /* Skip return value and save base pointer. */
1975 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
1977 frame->hard_frame_pointer_offset = offset;
1979 /* Do some sanity checking of stack_alignment_needed and
1980 preferred_alignment, since i386 port is the only using those features
1981 that may break easilly. */
1983 if (size && !stack_alignment_needed)
1985 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1987 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1989 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1992 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
1993 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
1995 /* Register save area */
1996 offset += frame->nregs * UNITS_PER_WORD;
1998 /* Align start of frame for local function. */
1999 frame->padding1 = ((offset + stack_alignment_needed - 1)
2000 & -stack_alignment_needed) - offset;
2002 offset += frame->padding1;
2004 /* Frame pointer points here. */
2005 frame->frame_pointer_offset = offset;
2009 /* Add outgoing arguments area. */
2010 if (ACCUMULATE_OUTGOING_ARGS)
2012 offset += current_function_outgoing_args_size;
2013 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2016 frame->outgoing_arguments_size = 0;
2018 /* Align stack boundary. */
2019 frame->padding2 = ((offset + preferred_alignment - 1)
2020 & -preferred_alignment) - offset;
2022 offset += frame->padding2;
2024 /* We've reached end of stack frame. */
2025 frame->stack_pointer_offset = offset;
2027 /* Size prologue needs to allocate. */
2028 frame->to_allocate =
2029 (size + frame->padding1 + frame->padding2
2030 + frame->outgoing_arguments_size);
2033 fprintf (stderr, "nregs: %i\n", frame->nregs);
2034 fprintf (stderr, "size: %i\n", size);
2035 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2036 fprintf (stderr, "padding1: %i\n", frame->padding1);
2037 fprintf (stderr, "padding2: %i\n", frame->padding2);
2038 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2039 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2040 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2041 frame->hard_frame_pointer_offset);
2042 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2046 /* Emit code to save registers in the prologue. */
2049 ix86_emit_save_regs ()
2054 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2055 if (ix86_save_reg (regno))
2057 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
2058 RTX_FRAME_RELATED_P (insn) = 1;
2062 /* Expand the prologue into a bunch of separate insns. */
2065 ix86_expand_prologue ()
2068 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2069 || current_function_uses_const_pool);
2070 struct ix86_frame frame;
2072 ix86_compute_frame_layout (&frame);
2074 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2075 slower on all targets. Also sdb doesn't like it. */
2077 if (frame_pointer_needed)
2079 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
2080 RTX_FRAME_RELATED_P (insn) = 1;
2082 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2083 RTX_FRAME_RELATED_P (insn) = 1;
2086 ix86_emit_save_regs ();
2088 if (frame.to_allocate == 0)
2090 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
2092 if (frame_pointer_needed)
2093 insn = emit_insn (gen_pro_epilogue_adjust_stack
2094 (stack_pointer_rtx, stack_pointer_rtx,
2095 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
2097 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2098 GEN_INT (-frame.to_allocate)));
2099 RTX_FRAME_RELATED_P (insn) = 1;
2103 /* ??? Is this only valid for Win32? */
2107 arg0 = gen_rtx_REG (SImode, 0);
2108 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
2110 sym = gen_rtx_MEM (FUNCTION_MODE,
2111 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2112 insn = emit_call_insn (gen_call (sym, const0_rtx));
2114 CALL_INSN_FUNCTION_USAGE (insn)
2115 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2116 CALL_INSN_FUNCTION_USAGE (insn));
2119 #ifdef SUBTARGET_PROLOGUE
2124 load_pic_register ();
2126 /* If we are profiling, make sure no instructions are scheduled before
2127 the call to mcount. However, if -fpic, the above call will have
2129 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2130 emit_insn (gen_blockage ());
2133 /* Emit code to add TSIZE to esp value. Use POP instruction when
2137 ix86_emit_epilogue_esp_adjustment (tsize)
2140 /* If a frame pointer is present, we must be sure to tie the sp
2141 to the fp so that we don't mis-schedule. */
2142 if (frame_pointer_needed)
2143 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2146 hard_frame_pointer_rtx));
2148 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2152 /* Emit code to restore saved registers using MOV insns. First register
2153 is restored from POINTER + OFFSET. */
2155 ix86_emit_restore_regs_using_mov (pointer, offset)
2161 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2162 if (ix86_save_reg (regno))
2164 emit_move_insn (gen_rtx_REG (Pmode, regno),
2165 adj_offsettable_operand (gen_rtx_MEM (Pmode,
2168 offset += UNITS_PER_WORD;
2172 /* Restore function stack, frame, and registers. */
2175 ix86_expand_epilogue (emit_return)
2179 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2180 struct ix86_frame frame;
2181 HOST_WIDE_INT offset;
2183 ix86_compute_frame_layout (&frame);
2185 /* Calculate start of saved registers relative to ebp. */
2186 offset = -frame.nregs * UNITS_PER_WORD;
2188 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2189 if (profile_block_flag == 2)
2191 FUNCTION_BLOCK_PROFILER_EXIT;
2195 /* If we're only restoring one register and sp is not valid then
2196 using a move instruction to restore the register since it's
2197 less work than reloading sp and popping the register.
2199 The default code result in stack adjustment using add/lea instruction,
2200 while this code results in LEAVE instruction (or discrete equivalent),
2201 so it is profitable in some other cases as well. Especially when there
2202 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2203 and there is exactly one register to pop. This heruistic may need some
2204 tuning in future. */
2205 if ((!sp_valid && frame.nregs <= 1)
2206 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2207 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2208 && frame.nregs == 1))
2210 /* Restore registers. We can use ebp or esp to address the memory
2211 locations. If both are available, default to ebp, since offsets
2212 are known to be small. Only exception is esp pointing directly to the
2213 end of block of saved registers, where we may simplify addressing
2216 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2217 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2219 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2221 if (!frame_pointer_needed)
2222 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2223 + frame.nregs * UNITS_PER_WORD);
2224 /* If not an i386, mov & pop is faster than "leave". */
2225 else if (TARGET_USE_LEAVE || optimize_size)
2226 emit_insn (gen_leave ());
2229 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2230 hard_frame_pointer_rtx,
2232 hard_frame_pointer_rtx));
2233 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2238 /* First step is to deallocate the stack frame so that we can
2239 pop the registers. */
2242 if (!frame_pointer_needed)
2244 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2245 hard_frame_pointer_rtx,
2247 hard_frame_pointer_rtx));
2249 else if (frame.to_allocate)
2250 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
2252 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2253 if (ix86_save_reg (regno))
2254 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2255 if (frame_pointer_needed)
2256 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2259 /* Sibcall epilogues don't want a return instruction. */
2263 if (current_function_pops_args && current_function_args_size)
2265 rtx popc = GEN_INT (current_function_pops_args);
2267 /* i386 can only pop 64K bytes. If asked to pop more, pop
2268 return address, do explicit add, and jump indirectly to the
2271 if (current_function_pops_args >= 65536)
2273 rtx ecx = gen_rtx_REG (SImode, 2);
2275 emit_insn (gen_popsi1 (ecx));
2276 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2277 emit_jump_insn (gen_return_indirect_internal (ecx));
2280 emit_jump_insn (gen_return_pop_internal (popc));
2283 emit_jump_insn (gen_return_internal ());
2286 /* Extract the parts of an RTL expression that is a valid memory address
2287 for an instruction. Return false if the structure of the address is
2291 ix86_decompose_address (addr, out)
2293 struct ix86_address *out;
2295 rtx base = NULL_RTX;
2296 rtx index = NULL_RTX;
2297 rtx disp = NULL_RTX;
2298 HOST_WIDE_INT scale = 1;
2299 rtx scale_rtx = NULL_RTX;
2301 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2303 else if (GET_CODE (addr) == PLUS)
2305 rtx op0 = XEXP (addr, 0);
2306 rtx op1 = XEXP (addr, 1);
2307 enum rtx_code code0 = GET_CODE (op0);
2308 enum rtx_code code1 = GET_CODE (op1);
2310 if (code0 == REG || code0 == SUBREG)
2312 if (code1 == REG || code1 == SUBREG)
2313 index = op0, base = op1; /* index + base */
2315 base = op0, disp = op1; /* base + displacement */
2317 else if (code0 == MULT)
2319 index = XEXP (op0, 0);
2320 scale_rtx = XEXP (op0, 1);
2321 if (code1 == REG || code1 == SUBREG)
2322 base = op1; /* index*scale + base */
2324 disp = op1; /* index*scale + disp */
2326 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2328 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2329 scale_rtx = XEXP (XEXP (op0, 0), 1);
2330 base = XEXP (op0, 1);
2333 else if (code0 == PLUS)
2335 index = XEXP (op0, 0); /* index + base + disp */
2336 base = XEXP (op0, 1);
2342 else if (GET_CODE (addr) == MULT)
2344 index = XEXP (addr, 0); /* index*scale */
2345 scale_rtx = XEXP (addr, 1);
2347 else if (GET_CODE (addr) == ASHIFT)
2351 /* We're called for lea too, which implements ashift on occasion. */
2352 index = XEXP (addr, 0);
2353 tmp = XEXP (addr, 1);
2354 if (GET_CODE (tmp) != CONST_INT)
2356 scale = INTVAL (tmp);
2357 if ((unsigned HOST_WIDE_INT) scale > 3)
2362 disp = addr; /* displacement */
2364 /* Extract the integral value of scale. */
2367 if (GET_CODE (scale_rtx) != CONST_INT)
2369 scale = INTVAL (scale_rtx);
2372 /* Allow arg pointer and stack pointer as index if there is not scaling */
2373 if (base && index && scale == 1
2374 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2375 || index == stack_pointer_rtx))
2382 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2383 if ((base == hard_frame_pointer_rtx
2384 || base == frame_pointer_rtx
2385 || base == arg_pointer_rtx) && !disp)
2388 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2389 Avoid this by transforming to [%esi+0]. */
2390 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2391 && base && !index && !disp
2393 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2396 /* Special case: encode reg+reg instead of reg*2. */
2397 if (!base && index && scale && scale == 2)
2398 base = index, scale = 1;
2400 /* Special case: scaling cannot be encoded without base or displacement. */
2401 if (!base && !disp && index && scale != 1)
2412 /* Return cost of the memory address x.
2413 For i386, it is better to use a complex address than let gcc copy
2414 the address into a reg and make a new pseudo. But not if the address
2415 requires to two regs - that would mean more pseudos with longer
2418 ix86_address_cost (x)
2421 struct ix86_address parts;
2424 if (!ix86_decompose_address (x, &parts))
2427 /* More complex memory references are better. */
2428 if (parts.disp && parts.disp != const0_rtx)
2431 /* Attempt to minimize number of registers in the address. */
2433 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2435 && (!REG_P (parts.index)
2436 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2440 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2442 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2443 && parts.base != parts.index)
2446 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2447 since it's predecode logic can't detect the length of instructions
2448 and it degenerates to vector decoded. Increase cost of such
2449 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2450 to split such addresses or even refuse such addresses at all.
2452 Following addressing modes are affected:
2457 The first and last case may be avoidable by explicitly coding the zero in
2458 memory address, but I don't have AMD-K6 machine handy to check this
2462 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2463 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2464 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2470 /* If X is a machine specific address (i.e. a symbol or label being
2471 referenced as a displacement from the GOT implemented using an
2472 UNSPEC), then return the base term. Otherwise return X. */
2475 ix86_find_base_term (x)
2480 if (GET_CODE (x) != PLUS
2481 || XEXP (x, 0) != pic_offset_table_rtx
2482 || GET_CODE (XEXP (x, 1)) != CONST)
2485 term = XEXP (XEXP (x, 1), 0);
2487 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2488 term = XEXP (term, 0);
2490 if (GET_CODE (term) != UNSPEC
2491 || XVECLEN (term, 0) != 1
2492 || XINT (term, 1) != 7)
2495 term = XVECEXP (term, 0, 0);
2497 if (GET_CODE (term) != SYMBOL_REF
2498 && GET_CODE (term) != LABEL_REF)
2504 /* Determine if a given CONST RTX is a valid memory displacement
2508 legitimate_pic_address_disp_p (disp)
2511 if (GET_CODE (disp) != CONST)
2513 disp = XEXP (disp, 0);
2515 if (GET_CODE (disp) == PLUS)
2517 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2519 disp = XEXP (disp, 0);
2522 if (GET_CODE (disp) != UNSPEC
2523 || XVECLEN (disp, 0) != 1)
2526 /* Must be @GOT or @GOTOFF. */
2527 if (XINT (disp, 1) != 6
2528 && XINT (disp, 1) != 7)
2531 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2532 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2538 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2539 memory address for an instruction. The MODE argument is the machine mode
2540 for the MEM expression that wants to use this address.
2542 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2543 convert common non-canonical forms to canonical form so that they will
2547 legitimate_address_p (mode, addr, strict)
2548 enum machine_mode mode;
2552 struct ix86_address parts;
2553 rtx base, index, disp;
2554 HOST_WIDE_INT scale;
2555 const char *reason = NULL;
2556 rtx reason_rtx = NULL_RTX;
2558 if (TARGET_DEBUG_ADDR)
2561 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2562 GET_MODE_NAME (mode), strict);
2566 if (! ix86_decompose_address (addr, &parts))
2568 reason = "decomposition failed";
2573 index = parts.index;
2575 scale = parts.scale;
2577 /* Validate base register.
2579 Don't allow SUBREG's here, it can lead to spill failures when the base
2580 is one word out of a two word structure, which is represented internally
2587 if (GET_CODE (base) != REG)
2589 reason = "base is not a register";
2593 if (GET_MODE (base) != Pmode)
2595 reason = "base is not in Pmode";
2599 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2600 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2602 reason = "base is not valid";
2607 /* Validate index register.
2609 Don't allow SUBREG's here, it can lead to spill failures when the index
2610 is one word out of a two word structure, which is represented internally
2617 if (GET_CODE (index) != REG)
2619 reason = "index is not a register";
2623 if (GET_MODE (index) != Pmode)
2625 reason = "index is not in Pmode";
2629 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2630 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2632 reason = "index is not valid";
2637 /* Validate scale factor. */
2640 reason_rtx = GEN_INT (scale);
2643 reason = "scale without index";
2647 if (scale != 2 && scale != 4 && scale != 8)
2649 reason = "scale is not a valid multiplier";
2654 /* Validate displacement. */
2659 if (!CONSTANT_ADDRESS_P (disp))
2661 reason = "displacement is not constant";
2665 if (GET_CODE (disp) == CONST_DOUBLE)
2667 reason = "displacement is a const_double";
2671 if (flag_pic && SYMBOLIC_CONST (disp))
2673 if (! legitimate_pic_address_disp_p (disp))
2675 reason = "displacement is an invalid pic construct";
2679 /* This code used to verify that a symbolic pic displacement
2680 includes the pic_offset_table_rtx register.
2682 While this is good idea, unfortunately these constructs may
2683 be created by "adds using lea" optimization for incorrect
2692 This code is nonsensical, but results in addressing
2693 GOT table with pic_offset_table_rtx base. We can't
2694 just refuse it easilly, since it gets matched by
2695 "addsi3" pattern, that later gets split to lea in the
2696 case output register differs from input. While this
2697 can be handled by separate addsi pattern for this case
2698 that never results in lea, this seems to be easier and
2699 correct fix for crash to disable this test. */
2701 else if (HALF_PIC_P ())
2703 if (! HALF_PIC_ADDRESS_P (disp)
2704 || (base != NULL_RTX || index != NULL_RTX))
2706 reason = "displacement is an invalid half-pic reference";
2712 /* Everything looks valid. */
2713 if (TARGET_DEBUG_ADDR)
2714 fprintf (stderr, "Success.\n");
2718 if (TARGET_DEBUG_ADDR)
2720 fprintf (stderr, "Error: %s\n", reason);
2721 debug_rtx (reason_rtx);
2726 /* Return an unique alias set for the GOT. */
2728 static HOST_WIDE_INT
2729 ix86_GOT_alias_set ()
2731 static HOST_WIDE_INT set = -1;
2733 set = new_alias_set ();
2737 /* Return a legitimate reference for ORIG (an address) using the
2738 register REG. If REG is 0, a new pseudo is generated.
2740 There are two types of references that must be handled:
2742 1. Global data references must load the address from the GOT, via
2743 the PIC reg. An insn is emitted to do this load, and the reg is
2746 2. Static data references, constant pool addresses, and code labels
2747 compute the address as an offset from the GOT, whose base is in
2748 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2749 differentiate them from global data objects. The returned
2750 address is the PIC reg + an unspec constant.
2752 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2753 reg also appears in the address. */
2756 legitimize_pic_address (orig, reg)
2764 if (GET_CODE (addr) == LABEL_REF
2765 || (GET_CODE (addr) == SYMBOL_REF
2766 && (CONSTANT_POOL_ADDRESS_P (addr)
2767 || SYMBOL_REF_FLAG (addr))))
2769 /* This symbol may be referenced via a displacement from the PIC
2770 base address (@GOTOFF). */
2772 current_function_uses_pic_offset_table = 1;
2773 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2774 new = gen_rtx_CONST (Pmode, new);
2775 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2779 emit_move_insn (reg, new);
2783 else if (GET_CODE (addr) == SYMBOL_REF)
2785 /* This symbol must be referenced via a load from the
2786 Global Offset Table (@GOT). */
2788 current_function_uses_pic_offset_table = 1;
2789 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2790 new = gen_rtx_CONST (Pmode, new);
2791 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2792 new = gen_rtx_MEM (Pmode, new);
2793 RTX_UNCHANGING_P (new) = 1;
2794 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2797 reg = gen_reg_rtx (Pmode);
2798 emit_move_insn (reg, new);
2803 if (GET_CODE (addr) == CONST)
2805 addr = XEXP (addr, 0);
2806 if (GET_CODE (addr) == UNSPEC)
2808 /* Check that the unspec is one of the ones we generate? */
2810 else if (GET_CODE (addr) != PLUS)
2813 if (GET_CODE (addr) == PLUS)
2815 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2817 /* Check first to see if this is a constant offset from a @GOTOFF
2818 symbol reference. */
2819 if ((GET_CODE (op0) == LABEL_REF
2820 || (GET_CODE (op0) == SYMBOL_REF
2821 && (CONSTANT_POOL_ADDRESS_P (op0)
2822 || SYMBOL_REF_FLAG (op0))))
2823 && GET_CODE (op1) == CONST_INT)
2825 current_function_uses_pic_offset_table = 1;
2826 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2827 new = gen_rtx_PLUS (Pmode, new, op1);
2828 new = gen_rtx_CONST (Pmode, new);
2829 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2833 emit_move_insn (reg, new);
2839 base = legitimize_pic_address (XEXP (addr, 0), reg);
2840 new = legitimize_pic_address (XEXP (addr, 1),
2841 base == reg ? NULL_RTX : reg);
2843 if (GET_CODE (new) == CONST_INT)
2844 new = plus_constant (base, INTVAL (new));
2847 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2849 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2850 new = XEXP (new, 1);
2852 new = gen_rtx_PLUS (Pmode, base, new);
2860 /* Try machine-dependent ways of modifying an illegitimate address
2861 to be legitimate. If we find one, return the new, valid address.
2862 This macro is used in only one place: `memory_address' in explow.c.
2864 OLDX is the address as it was before break_out_memory_refs was called.
2865 In some cases it is useful to look at this to decide what needs to be done.
2867 MODE and WIN are passed so that this macro can use
2868 GO_IF_LEGITIMATE_ADDRESS.
2870 It is always safe for this macro to do nothing. It exists to recognize
2871 opportunities to optimize the output.
2873 For the 80386, we handle X+REG by loading X into a register R and
2874 using R+REG. R will go in a general reg and indexing will be used.
2875 However, if REG is a broken-out memory address or multiplication,
2876 nothing needs to be done because REG can certainly go in a general reg.
2878 When -fpic is used, special handling is needed for symbolic references.
2879 See comments by legitimize_pic_address in i386.c for details. */
2882 legitimize_address (x, oldx, mode)
2884 register rtx oldx ATTRIBUTE_UNUSED;
2885 enum machine_mode mode;
2890 if (TARGET_DEBUG_ADDR)
2892 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2893 GET_MODE_NAME (mode));
2897 if (flag_pic && SYMBOLIC_CONST (x))
2898 return legitimize_pic_address (x, 0);
2900 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2901 if (GET_CODE (x) == ASHIFT
2902 && GET_CODE (XEXP (x, 1)) == CONST_INT
2903 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2906 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2907 GEN_INT (1 << log));
2910 if (GET_CODE (x) == PLUS)
2912 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2914 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2915 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2916 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2919 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2920 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2921 GEN_INT (1 << log));
2924 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2925 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2926 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2929 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2930 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2931 GEN_INT (1 << log));
2934 /* Put multiply first if it isn't already. */
2935 if (GET_CODE (XEXP (x, 1)) == MULT)
2937 rtx tmp = XEXP (x, 0);
2938 XEXP (x, 0) = XEXP (x, 1);
2943 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2944 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2945 created by virtual register instantiation, register elimination, and
2946 similar optimizations. */
2947 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2950 x = gen_rtx_PLUS (Pmode,
2951 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2952 XEXP (XEXP (x, 1), 0)),
2953 XEXP (XEXP (x, 1), 1));
2957 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2958 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2959 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2960 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2961 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2962 && CONSTANT_P (XEXP (x, 1)))
2965 rtx other = NULL_RTX;
2967 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2969 constant = XEXP (x, 1);
2970 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2972 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2974 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2975 other = XEXP (x, 1);
2983 x = gen_rtx_PLUS (Pmode,
2984 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2985 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2986 plus_constant (other, INTVAL (constant)));
2990 if (changed && legitimate_address_p (mode, x, FALSE))
2993 if (GET_CODE (XEXP (x, 0)) == MULT)
2996 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2999 if (GET_CODE (XEXP (x, 1)) == MULT)
3002 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3006 && GET_CODE (XEXP (x, 1)) == REG
3007 && GET_CODE (XEXP (x, 0)) == REG)
3010 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3013 x = legitimize_pic_address (x, 0);
3016 if (changed && legitimate_address_p (mode, x, FALSE))
3019 if (GET_CODE (XEXP (x, 0)) == REG)
3021 register rtx temp = gen_reg_rtx (Pmode);
3022 register rtx val = force_operand (XEXP (x, 1), temp);
3024 emit_move_insn (temp, val);
3030 else if (GET_CODE (XEXP (x, 1)) == REG)
3032 register rtx temp = gen_reg_rtx (Pmode);
3033 register rtx val = force_operand (XEXP (x, 0), temp);
3035 emit_move_insn (temp, val);
3045 /* Print an integer constant expression in assembler syntax. Addition
3046 and subtraction are the only arithmetic that may appear in these
3047 expressions. FILE is the stdio stream to write to, X is the rtx, and
3048 CODE is the operand print code from the output string. */
3051 output_pic_addr_const (file, x, code)
3058 switch (GET_CODE (x))
3068 assemble_name (file, XSTR (x, 0));
3069 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3070 fputs ("@PLT", file);
3077 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3078 assemble_name (asm_out_file, buf);
3082 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3086 /* This used to output parentheses around the expression,
3087 but that does not work on the 386 (either ATT or BSD assembler). */
3088 output_pic_addr_const (file, XEXP (x, 0), code);
3092 if (GET_MODE (x) == VOIDmode)
3094 /* We can use %d if the number is <32 bits and positive. */
3095 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
3096 fprintf (file, "0x%lx%08lx",
3097 (unsigned long) CONST_DOUBLE_HIGH (x),
3098 (unsigned long) CONST_DOUBLE_LOW (x));
3100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
3103 /* We can't handle floating point constants;
3104 PRINT_OPERAND must handle them. */
3105 output_operand_lossage ("floating constant misused");
3109 /* Some assemblers need integer constants to appear first. */
3110 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3112 output_pic_addr_const (file, XEXP (x, 0), code);
3114 output_pic_addr_const (file, XEXP (x, 1), code);
3116 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3118 output_pic_addr_const (file, XEXP (x, 1), code);
3120 output_pic_addr_const (file, XEXP (x, 0), code);
3127 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3128 output_pic_addr_const (file, XEXP (x, 0), code);
3130 output_pic_addr_const (file, XEXP (x, 1), code);
3131 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3135 if (XVECLEN (x, 0) != 1)
3137 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3138 switch (XINT (x, 1))
3141 fputs ("@GOT", file);
3144 fputs ("@GOTOFF", file);
3147 fputs ("@PLT", file);
3150 output_operand_lossage ("invalid UNSPEC as operand");
3156 output_operand_lossage ("invalid expression as operand");
3160 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3161 We need to handle our special PIC relocations. */
3164 i386_dwarf_output_addr_const (file, x)
3168 fprintf (file, "%s", INT_ASM_OP);
3170 output_pic_addr_const (file, x, '\0');
3172 output_addr_const (file, x);
3176 /* In the name of slightly smaller debug output, and to cater to
3177 general assembler losage, recognize PIC+GOTOFF and turn it back
3178 into a direct symbol reference. */
3181 i386_simplify_dwarf_addr (orig_x)
3186 if (GET_CODE (x) != PLUS
3187 || GET_CODE (XEXP (x, 0)) != REG
3188 || GET_CODE (XEXP (x, 1)) != CONST)
3191 x = XEXP (XEXP (x, 1), 0);
3192 if (GET_CODE (x) == UNSPEC
3193 && (XINT (x, 1) == 6
3194 || XINT (x, 1) == 7))
3195 return XVECEXP (x, 0, 0);
3197 if (GET_CODE (x) == PLUS
3198 && GET_CODE (XEXP (x, 0)) == UNSPEC
3199 && GET_CODE (XEXP (x, 1)) == CONST_INT
3200 && (XINT (XEXP (x, 0), 1) == 6
3201 || XINT (XEXP (x, 0), 1) == 7))
3202 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3208 put_condition_code (code, mode, reverse, fp, file)
3210 enum machine_mode mode;
3216 if (mode == CCFPmode || mode == CCFPUmode)
3218 enum rtx_code second_code, bypass_code;
3219 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3220 if (bypass_code != NIL || second_code != NIL)
3222 code = ix86_fp_compare_code_to_integer (code);
3226 code = reverse_condition (code);
3237 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3242 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3243 Those same assemblers have the same but opposite losage on cmov. */
3246 suffix = fp ? "nbe" : "a";
3249 if (mode == CCNOmode || mode == CCGOCmode)
3251 else if (mode == CCmode || mode == CCGCmode)
3262 if (mode == CCNOmode || mode == CCGOCmode)
3264 else if (mode == CCmode || mode == CCGCmode)
3273 suffix = fp ? "nb" : "ae";
3276 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3286 suffix = fp ? "u" : "p";
3289 suffix = fp ? "nu" : "np";
3294 fputs (suffix, file);
3298 print_reg (x, code, file)
3303 if (REGNO (x) == ARG_POINTER_REGNUM
3304 || REGNO (x) == FRAME_POINTER_REGNUM
3305 || REGNO (x) == FLAGS_REG
3306 || REGNO (x) == FPSR_REG)
3309 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3314 else if (code == 'b')
3316 else if (code == 'k')
3318 else if (code == 'y')
3320 else if (code == 'h')
3322 else if (code == 'm' || MMX_REG_P (x))
3325 code = GET_MODE_SIZE (GET_MODE (x));
3330 fputs (hi_reg_name[REGNO (x)], file);
3333 if (STACK_TOP_P (x))
3335 fputs ("st(0)", file);
3342 if (! ANY_FP_REG_P (x))
3347 fputs (hi_reg_name[REGNO (x)], file);
3350 fputs (qi_reg_name[REGNO (x)], file);
3353 fputs (qi_high_reg_name[REGNO (x)], file);
3361 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3362 C -- print opcode suffix for set/cmov insn.
3363 c -- like C, but print reversed condition
3364 R -- print the prefix for register names.
3365 z -- print the opcode suffix for the size of the current operand.
3366 * -- print a star (in certain assembler syntax)
3367 A -- print an absolute memory reference.
3368 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3369 s -- print a shift double count, followed by the assemblers argument
3371 b -- print the QImode name of the register for the indicated operand.
3372 %b0 would print %al if operands[0] is reg 0.
3373 w -- likewise, print the HImode name of the register.
3374 k -- likewise, print the SImode name of the register.
3375 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3376 y -- print "st(0)" instead of "st" as a register.
3377 m -- print "st(n)" as an mmx register.
3378 D -- print condition for SSE cmp instruction.
3382 print_operand (file, x, code)
3392 if (ASSEMBLER_DIALECT == 0)
3397 if (ASSEMBLER_DIALECT == 0)
3399 else if (ASSEMBLER_DIALECT == 1)
3401 /* Intel syntax. For absolute addresses, registers should not
3402 be surrounded by braces. */
3403 if (GET_CODE (x) != REG)
3406 PRINT_OPERAND (file, x, 0);
3412 PRINT_OPERAND (file, x, 0);
3417 if (ASSEMBLER_DIALECT == 0)
3422 if (ASSEMBLER_DIALECT == 0)
3427 if (ASSEMBLER_DIALECT == 0)
3432 if (ASSEMBLER_DIALECT == 0)
3437 if (ASSEMBLER_DIALECT == 0)
3442 if (ASSEMBLER_DIALECT == 0)
3447 /* 387 opcodes don't get size suffixes if the operands are
3450 if (STACK_REG_P (x))
3453 /* this is the size of op from size of operand */
3454 switch (GET_MODE_SIZE (GET_MODE (x)))
3457 #ifdef HAVE_GAS_FILDS_FISTS
3463 if (GET_MODE (x) == SFmode)
3478 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3480 #ifdef GAS_MNEMONICS
3506 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3508 PRINT_OPERAND (file, x, 0);
3514 /* Little bit of braindamage here. The SSE compare instructions
3515 does use completely different names for the comparisons that the
3516 fp conditional moves. */
3517 switch (GET_CODE (x))
3532 fputs ("unord", file);
3536 fputs ("neq", file);
3540 fputs ("nlt", file);
3544 fputs ("nle", file);
3547 fputs ("ord", file);
3555 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3558 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3561 /* Like above, but reverse condition */
3563 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3566 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3572 sprintf (str, "invalid operand code `%c'", code);
3573 output_operand_lossage (str);
3578 if (GET_CODE (x) == REG)
3580 PRINT_REG (x, code, file);
3583 else if (GET_CODE (x) == MEM)
3585 /* No `byte ptr' prefix for call instructions. */
3586 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3589 switch (GET_MODE_SIZE (GET_MODE (x)))
3591 case 1: size = "BYTE"; break;
3592 case 2: size = "WORD"; break;
3593 case 4: size = "DWORD"; break;
3594 case 8: size = "QWORD"; break;
3595 case 12: size = "XWORD"; break;
3596 case 16: size = "XMMWORD"; break;
3601 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3604 else if (code == 'w')
3606 else if (code == 'k')
3610 fputs (" PTR ", file);
3614 if (flag_pic && CONSTANT_ADDRESS_P (x))
3615 output_pic_addr_const (file, x, code);
3620 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3625 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3626 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3628 if (ASSEMBLER_DIALECT == 0)
3630 fprintf (file, "0x%lx", l);
3633 /* These float cases don't actually occur as immediate operands. */
3634 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3639 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3640 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3641 fprintf (file, "%s", dstr);
3644 else if (GET_CODE (x) == CONST_DOUBLE
3645 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
3650 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3651 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3652 fprintf (file, "%s", dstr);
3658 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3660 if (ASSEMBLER_DIALECT == 0)
3663 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3664 || GET_CODE (x) == LABEL_REF)
3666 if (ASSEMBLER_DIALECT == 0)
3669 fputs ("OFFSET FLAT:", file);
3672 if (GET_CODE (x) == CONST_INT)
3673 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3675 output_pic_addr_const (file, x, code);
3677 output_addr_const (file, x);
3681 /* Print a memory operand whose address is ADDR. */
3684 print_operand_address (file, addr)
3688 struct ix86_address parts;
3689 rtx base, index, disp;
3692 if (! ix86_decompose_address (addr, &parts))
3696 index = parts.index;
3698 scale = parts.scale;
3700 if (!base && !index)
3702 /* Displacement only requires special attention. */
3704 if (GET_CODE (disp) == CONST_INT)
3706 if (ASSEMBLER_DIALECT != 0)
3708 if (USER_LABEL_PREFIX[0] == 0)
3710 fputs ("ds:", file);
3712 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3715 output_pic_addr_const (file, addr, 0);
3717 output_addr_const (file, addr);
3721 if (ASSEMBLER_DIALECT == 0)
3726 output_pic_addr_const (file, disp, 0);
3727 else if (GET_CODE (disp) == LABEL_REF)
3728 output_asm_label (disp);
3730 output_addr_const (file, disp);
3735 PRINT_REG (base, 0, file);
3739 PRINT_REG (index, 0, file);
3741 fprintf (file, ",%d", scale);
3747 rtx offset = NULL_RTX;
3751 /* Pull out the offset of a symbol; print any symbol itself. */
3752 if (GET_CODE (disp) == CONST
3753 && GET_CODE (XEXP (disp, 0)) == PLUS
3754 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3756 offset = XEXP (XEXP (disp, 0), 1);
3757 disp = gen_rtx_CONST (VOIDmode,
3758 XEXP (XEXP (disp, 0), 0));
3762 output_pic_addr_const (file, disp, 0);
3763 else if (GET_CODE (disp) == LABEL_REF)
3764 output_asm_label (disp);
3765 else if (GET_CODE (disp) == CONST_INT)
3768 output_addr_const (file, disp);
3774 PRINT_REG (base, 0, file);
3777 if (INTVAL (offset) >= 0)
3779 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3783 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3790 PRINT_REG (index, 0, file);
3792 fprintf (file, "*%d", scale);
3799 /* Split one or more DImode RTL references into pairs of SImode
3800 references. The RTL can be REG, offsettable MEM, integer constant, or
3801 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3802 split and "num" is its length. lo_half and hi_half are output arrays
3803 that parallel "operands". */
3806 split_di (operands, num, lo_half, hi_half)
3809 rtx lo_half[], hi_half[];
3813 rtx op = operands[num];
3814 if (CONSTANT_P (op))
3815 split_double (op, &lo_half[num], &hi_half[num]);
3816 else if (! reload_completed)
3818 lo_half[num] = gen_lowpart (SImode, op);
3819 hi_half[num] = gen_highpart (SImode, op);
3821 else if (GET_CODE (op) == REG)
3823 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3824 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3826 else if (offsettable_memref_p (op))
3828 rtx lo_addr = XEXP (op, 0);
3829 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3830 lo_half[num] = change_address (op, SImode, lo_addr);
3831 hi_half[num] = change_address (op, SImode, hi_addr);
3838 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3839 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3840 is the expression of the binary operation. The output may either be
3841 emitted here, or returned to the caller, like all output_* functions.
3843 There is no guarantee that the operands are the same mode, as they
3844 might be within FLOAT or FLOAT_EXTEND expressions. */
3846 #ifndef SYSV386_COMPAT
3847 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3848 wants to fix the assemblers because that causes incompatibility
3849 with gcc. No-one wants to fix gcc because that causes
3850 incompatibility with assemblers... You can use the option of
3851 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3852 #define SYSV386_COMPAT 1
3856 output_387_binary_op (insn, operands)
3860 static char buf[30];
3863 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
3865 #ifdef ENABLE_CHECKING
3866 /* Even if we do not want to check the inputs, this documents input
3867 constraints. Which helps in understanding the following code. */
3868 if (STACK_REG_P (operands[0])
3869 && ((REG_P (operands[1])
3870 && REGNO (operands[0]) == REGNO (operands[1])
3871 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3872 || (REG_P (operands[2])
3873 && REGNO (operands[0]) == REGNO (operands[2])
3874 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3875 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3881 switch (GET_CODE (operands[3]))
3884 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3885 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3893 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3894 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3902 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3903 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3911 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3912 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3926 if (GET_MODE (operands[0]) == SFmode)
3927 strcat (buf, "ss\t{%2, %0|%0, %2}");
3929 strcat (buf, "sd\t{%2, %0|%0, %2}");
3934 switch (GET_CODE (operands[3]))
3938 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3940 rtx temp = operands[2];
3941 operands[2] = operands[1];
3945 /* know operands[0] == operands[1]. */
3947 if (GET_CODE (operands[2]) == MEM)
3953 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3955 if (STACK_TOP_P (operands[0]))
3956 /* How is it that we are storing to a dead operand[2]?
3957 Well, presumably operands[1] is dead too. We can't
3958 store the result to st(0) as st(0) gets popped on this
3959 instruction. Instead store to operands[2] (which I
3960 think has to be st(1)). st(1) will be popped later.
3961 gcc <= 2.8.1 didn't have this check and generated
3962 assembly code that the Unixware assembler rejected. */
3963 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3965 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3969 if (STACK_TOP_P (operands[0]))
3970 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3972 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3977 if (GET_CODE (operands[1]) == MEM)
3983 if (GET_CODE (operands[2]) == MEM)
3989 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3992 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3993 derived assemblers, confusingly reverse the direction of
3994 the operation for fsub{r} and fdiv{r} when the
3995 destination register is not st(0). The Intel assembler
3996 doesn't have this brain damage. Read !SYSV386_COMPAT to
3997 figure out what the hardware really does. */
3998 if (STACK_TOP_P (operands[0]))
3999 p = "{p\t%0, %2|rp\t%2, %0}";
4001 p = "{rp\t%2, %0|p\t%0, %2}";
4003 if (STACK_TOP_P (operands[0]))
4004 /* As above for fmul/fadd, we can't store to st(0). */
4005 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4007 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4012 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
4015 if (STACK_TOP_P (operands[0]))
4016 p = "{rp\t%0, %1|p\t%1, %0}";
4018 p = "{p\t%1, %0|rp\t%0, %1}";
4020 if (STACK_TOP_P (operands[0]))
4021 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4023 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4028 if (STACK_TOP_P (operands[0]))
4030 if (STACK_TOP_P (operands[1]))
4031 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4033 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
4036 else if (STACK_TOP_P (operands[1]))
4039 p = "{\t%1, %0|r\t%0, %1}";
4041 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4047 p = "{r\t%2, %0|\t%0, %2}";
4049 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4062 /* Output code for INSN to convert a float to a signed int. OPERANDS
4063 are the insn operands. The output may be [HSD]Imode and the input
4064 operand may be [SDX]Fmode. */
4067 output_fix_trunc (insn, operands)
4071 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4072 int dimode_p = GET_MODE (operands[0]) == DImode;
4075 /* Jump through a hoop or two for DImode, since the hardware has no
4076 non-popping instruction. We used to do this a different way, but
4077 that was somewhat fragile and broke with post-reload splitters. */
4078 if (dimode_p && !stack_top_dies)
4079 output_asm_insn ("fld\t%y1", operands);
4081 if (! STACK_TOP_P (operands[1]))
4084 xops[0] = GEN_INT (12);
4085 xops[1] = adj_offsettable_operand (operands[2], 1);
4086 xops[1] = change_address (xops[1], QImode, NULL_RTX);
4088 xops[2] = operands[0];
4089 if (GET_CODE (operands[0]) != MEM)
4090 xops[2] = operands[3];
4092 output_asm_insn ("fnstcw\t%2", operands);
4093 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4094 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4095 output_asm_insn ("fldcw\t%2", operands);
4096 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
4098 if (stack_top_dies || dimode_p)
4099 output_asm_insn ("fistp%z2\t%2", xops);
4101 output_asm_insn ("fist%z2\t%2", xops);
4103 output_asm_insn ("fldcw\t%2", operands);
4105 if (GET_CODE (operands[0]) != MEM)
4109 split_di (operands+0, 1, xops+0, xops+1);
4110 split_di (operands+3, 1, xops+2, xops+3);
4111 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4112 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
4114 else if (GET_MODE (operands[0]) == SImode)
4115 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
4117 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
4123 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4124 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4125 when fucom should be used. */
4128 output_fp_compare (insn, operands, eflags_p, unordered_p)
4131 int eflags_p, unordered_p;
4134 rtx cmp_op0 = operands[0];
4135 rtx cmp_op1 = operands[1];
4136 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
4141 cmp_op1 = operands[2];
4145 if (GET_MODE (operands[0]) == SFmode)
4147 return "ucomiss\t{%1, %0|%0, %1}";
4149 return "comiss\t{%1, %0|%0, %y}";
4152 return "ucomisd\t{%1, %0|%0, %1}";
4154 return "comisd\t{%1, %0|%0, %y}";
4157 if (! STACK_TOP_P (cmp_op0))
4160 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4162 if (STACK_REG_P (cmp_op1)
4164 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4165 && REGNO (cmp_op1) != FIRST_STACK_REG)
4167 /* If both the top of the 387 stack dies, and the other operand
4168 is also a stack register that dies, then this must be a
4169 `fcompp' float compare */
4173 /* There is no double popping fcomi variant. Fortunately,
4174 eflags is immune from the fstp's cc clobbering. */
4176 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4178 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4186 return "fucompp\n\tfnstsw\t%0";
4188 return "fcompp\n\tfnstsw\t%0";
4201 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4203 static const char * const alt[24] =
4215 "fcomi\t{%y1, %0|%0, %y1}",
4216 "fcomip\t{%y1, %0|%0, %y1}",
4217 "fucomi\t{%y1, %0|%0, %y1}",
4218 "fucomip\t{%y1, %0|%0, %y1}",
4225 "fcom%z2\t%y2\n\tfnstsw\t%0",
4226 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4227 "fucom%z2\t%y2\n\tfnstsw\t%0",
4228 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4230 "ficom%z2\t%y2\n\tfnstsw\t%0",
4231 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4239 mask = eflags_p << 3;
4240 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4241 mask |= unordered_p << 1;
4242 mask |= stack_top_dies;
4254 /* Output assembler code to FILE to initialize basic-block profiling.
4256 If profile_block_flag == 2
4258 Output code to call the subroutine `__bb_init_trace_func'
4259 and pass two parameters to it. The first parameter is
4260 the address of a block allocated in the object module.
4261 The second parameter is the number of the first basic block
4264 The name of the block is a local symbol made with this statement:
4266 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4268 Of course, since you are writing the definition of
4269 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4270 can take a short cut in the definition of this macro and use the
4271 name that you know will result.
4273 The number of the first basic block of the function is
4274 passed to the macro in BLOCK_OR_LABEL.
4276 If described in a virtual assembler language the code to be
4280 parameter2 <- BLOCK_OR_LABEL
4281 call __bb_init_trace_func
4283 else if profile_block_flag != 0
4285 Output code to call the subroutine `__bb_init_func'
4286 and pass one single parameter to it, which is the same
4287 as the first parameter to `__bb_init_trace_func'.
4289 The first word of this parameter is a flag which will be nonzero if
4290 the object module has already been initialized. So test this word
4291 first, and do not call `__bb_init_func' if the flag is nonzero.
4292 Note: When profile_block_flag == 2 the test need not be done
4293 but `__bb_init_trace_func' *must* be called.
4295 BLOCK_OR_LABEL may be used to generate a label number as a
4296 branch destination in case `__bb_init_func' will not be called.
4298 If described in a virtual assembler language the code to be
4309 ix86_output_function_block_profiler (file, block_or_label)
4313 static int num_func = 0;
4315 char block_table[80], false_label[80];
4317 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4319 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4320 xops[5] = stack_pointer_rtx;
4321 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4323 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4325 switch (profile_block_flag)
4328 xops[2] = GEN_INT (block_or_label);
4329 xops[3] = gen_rtx_MEM (Pmode,
4330 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4331 xops[6] = GEN_INT (8);
4333 output_asm_insn ("push{l}\t%2", xops);
4335 output_asm_insn ("push{l}\t%1", xops);
4338 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4339 output_asm_insn ("push{l}\t%7", xops);
4341 output_asm_insn ("call\t%P3", xops);
4342 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4346 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4348 xops[0] = const0_rtx;
4349 xops[2] = gen_rtx_MEM (Pmode,
4350 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4351 xops[3] = gen_rtx_MEM (Pmode,
4352 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4353 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4354 xops[6] = GEN_INT (4);
4356 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4358 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4359 output_asm_insn ("jne\t%2", xops);
4362 output_asm_insn ("push{l}\t%1", xops);
4365 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4366 output_asm_insn ("push{l}\t%7", xops);
4368 output_asm_insn ("call\t%P3", xops);
4369 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4370 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4376 /* Output assembler code to FILE to increment a counter associated
4377 with basic block number BLOCKNO.
4379 If profile_block_flag == 2
4381 Output code to initialize the global structure `__bb' and
4382 call the function `__bb_trace_func' which will increment the
4385 `__bb' consists of two words. In the first word the number
4386 of the basic block has to be stored. In the second word
4387 the address of a block allocated in the object module
4390 The basic block number is given by BLOCKNO.
4392 The address of the block is given by the label created with
4394 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4396 by FUNCTION_BLOCK_PROFILER.
4398 Of course, since you are writing the definition of
4399 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4400 can take a short cut in the definition of this macro and use the
4401 name that you know will result.
4403 If described in a virtual assembler language the code to be
4406 move BLOCKNO -> (__bb)
4407 move LPBX0 -> (__bb+4)
4408 call __bb_trace_func
4410 Note that function `__bb_trace_func' must not change the
4411 machine state, especially the flag register. To grant
4412 this, you must output code to save and restore registers
4413 either in this macro or in the macros MACHINE_STATE_SAVE
4414 and MACHINE_STATE_RESTORE. The last two macros will be
4415 used in the function `__bb_trace_func', so you must make
4416 sure that the function prologue does not change any
4417 register prior to saving it with MACHINE_STATE_SAVE.
4419 else if profile_block_flag != 0
4421 Output code to increment the counter directly.
4422 Basic blocks are numbered separately from zero within each
4423 compiled object module. The count associated with block number
4424 BLOCKNO is at index BLOCKNO in an array of words; the name of
4425 this array is a local symbol made with this statement:
4427 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4429 Of course, since you are writing the definition of
4430 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4431 can take a short cut in the definition of this macro and use the
4432 name that you know will result.
4434 If described in a virtual assembler language the code to be
4437 inc (LPBX2+4*BLOCKNO)
4441 ix86_output_block_profiler (file, blockno)
4442 FILE *file ATTRIBUTE_UNUSED;
4445 rtx xops[8], cnt_rtx;
4447 char *block_table = counts;
4449 switch (profile_block_flag)
4452 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4454 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4455 xops[2] = GEN_INT (blockno);
4456 xops[3] = gen_rtx_MEM (Pmode,
4457 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4458 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4459 xops[5] = plus_constant (xops[4], 4);
4460 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4461 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4463 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4465 output_asm_insn ("pushf", xops);
4466 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4469 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4470 output_asm_insn ("push{l}\t%7", xops);
4471 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4472 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4473 output_asm_insn ("pop{l}\t%7", xops);
4476 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4477 output_asm_insn ("call\t%P3", xops);
4478 output_asm_insn ("popf", xops);
4483 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4484 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4485 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4488 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4491 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4493 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4494 output_asm_insn ("inc{l}\t%0", xops);
4501 ix86_expand_move (mode, operands)
4502 enum machine_mode mode;
4505 int strict = (reload_in_progress || reload_completed);
4508 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4510 /* Emit insns to move operands[1] into operands[0]. */
4512 if (GET_CODE (operands[0]) == MEM)
4513 operands[1] = force_reg (Pmode, operands[1]);
4516 rtx temp = operands[0];
4517 if (GET_CODE (temp) != REG)
4518 temp = gen_reg_rtx (Pmode);
4519 temp = legitimize_pic_address (operands[1], temp);
4520 if (temp == operands[0])
4527 if (GET_CODE (operands[0]) == MEM
4528 && (GET_MODE (operands[0]) == QImode
4529 || !push_operand (operands[0], mode))
4530 && GET_CODE (operands[1]) == MEM)
4531 operands[1] = force_reg (mode, operands[1]);
4533 if (push_operand (operands[0], mode)
4534 && ! general_no_elim_operand (operands[1], mode))
4535 operands[1] = copy_to_mode_reg (mode, operands[1]);
4537 if (FLOAT_MODE_P (mode))
4539 /* If we are loading a floating point constant to a register,
4540 force the value to memory now, since we'll get better code
4541 out the back end. */
4545 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4546 && register_operand (operands[0], mode))
4547 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4551 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4556 /* Attempt to expand a binary operator. Make the expansion closer to the
4557 actual machine, then just general_operand, which will allow 3 separate
4558 memory references (one output, two input) in a single insn. */
4561 ix86_expand_binary_operator (code, mode, operands)
4563 enum machine_mode mode;
4566 int matching_memory;
4567 rtx src1, src2, dst, op, clob;
4573 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4574 if (GET_RTX_CLASS (code) == 'c'
4575 && (rtx_equal_p (dst, src2)
4576 || immediate_operand (src1, mode)))
4583 /* If the destination is memory, and we do not have matching source
4584 operands, do things in registers. */
4585 matching_memory = 0;
4586 if (GET_CODE (dst) == MEM)
4588 if (rtx_equal_p (dst, src1))
4589 matching_memory = 1;
4590 else if (GET_RTX_CLASS (code) == 'c'
4591 && rtx_equal_p (dst, src2))
4592 matching_memory = 2;
4594 dst = gen_reg_rtx (mode);
4597 /* Both source operands cannot be in memory. */
4598 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4600 if (matching_memory != 2)
4601 src2 = force_reg (mode, src2);
4603 src1 = force_reg (mode, src1);
4606 /* If the operation is not commutable, source 1 cannot be a constant
4607 or non-matching memory. */
4608 if ((CONSTANT_P (src1)
4609 || (!matching_memory && GET_CODE (src1) == MEM))
4610 && GET_RTX_CLASS (code) != 'c')
4611 src1 = force_reg (mode, src1);
4613 /* If optimizing, copy to regs to improve CSE */
4614 if (optimize && ! no_new_pseudos)
4616 if (GET_CODE (dst) == MEM)
4617 dst = gen_reg_rtx (mode);
4618 if (GET_CODE (src1) == MEM)
4619 src1 = force_reg (mode, src1);
4620 if (GET_CODE (src2) == MEM)
4621 src2 = force_reg (mode, src2);
4624 /* Emit the instruction. */
4626 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4627 if (reload_in_progress)
4629 /* Reload doesn't know about the flags register, and doesn't know that
4630 it doesn't want to clobber it. We can only do this with PLUS. */
4637 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4638 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4641 /* Fix up the destination if needed. */
4642 if (dst != operands[0])
4643 emit_move_insn (operands[0], dst);
4646 /* Return TRUE or FALSE depending on whether the binary operator meets the
4647 appropriate constraints. */
4650 ix86_binary_operator_ok (code, mode, operands)
4652 enum machine_mode mode ATTRIBUTE_UNUSED;
4655 /* Both source operands cannot be in memory. */
4656 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4658 /* If the operation is not commutable, source 1 cannot be a constant. */
4659 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4661 /* If the destination is memory, we must have a matching source operand. */
4662 if (GET_CODE (operands[0]) == MEM
4663 && ! (rtx_equal_p (operands[0], operands[1])
4664 || (GET_RTX_CLASS (code) == 'c'
4665 && rtx_equal_p (operands[0], operands[2]))))
4667 /* If the operation is not commutable and the source 1 is memory, we must
4668 have a matching destionation. */
4669 if (GET_CODE (operands[1]) == MEM
4670 && GET_RTX_CLASS (code) != 'c'
4671 && ! rtx_equal_p (operands[0], operands[1]))
4676 /* Attempt to expand a unary operator. Make the expansion closer to the
4677 actual machine, then just general_operand, which will allow 2 separate
4678 memory references (one output, one input) in a single insn. */
4681 ix86_expand_unary_operator (code, mode, operands)
4683 enum machine_mode mode;
4686 int matching_memory;
4687 rtx src, dst, op, clob;
4692 /* If the destination is memory, and we do not have matching source
4693 operands, do things in registers. */
4694 matching_memory = 0;
4695 if (GET_CODE (dst) == MEM)
4697 if (rtx_equal_p (dst, src))
4698 matching_memory = 1;
4700 dst = gen_reg_rtx (mode);
4703 /* When source operand is memory, destination must match. */
4704 if (!matching_memory && GET_CODE (src) == MEM)
4705 src = force_reg (mode, src);
4707 /* If optimizing, copy to regs to improve CSE */
4708 if (optimize && ! no_new_pseudos)
4710 if (GET_CODE (dst) == MEM)
4711 dst = gen_reg_rtx (mode);
4712 if (GET_CODE (src) == MEM)
4713 src = force_reg (mode, src);
4716 /* Emit the instruction. */
4718 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4719 if (reload_in_progress || code == NOT)
4721 /* Reload doesn't know about the flags register, and doesn't know that
4722 it doesn't want to clobber it. */
4729 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4730 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4733 /* Fix up the destination if needed. */
4734 if (dst != operands[0])
4735 emit_move_insn (operands[0], dst);
4738 /* Return TRUE or FALSE depending on whether the unary operator meets the
4739 appropriate constraints. */
4742 ix86_unary_operator_ok (code, mode, operands)
4743 enum rtx_code code ATTRIBUTE_UNUSED;
4744 enum machine_mode mode ATTRIBUTE_UNUSED;
4745 rtx operands[2] ATTRIBUTE_UNUSED;
4747 /* If one of operands is memory, source and destination must match. */
4748 if ((GET_CODE (operands[0]) == MEM
4749 || GET_CODE (operands[1]) == MEM)
4750 && ! rtx_equal_p (operands[0], operands[1]))
4755 /* Return TRUE or FALSE depending on whether the first SET in INSN
4756 has source and destination with matching CC modes, and that the
4757 CC mode is at least as constrained as REQ_MODE. */
4760 ix86_match_ccmode (insn, req_mode)
4762 enum machine_mode req_mode;
4765 enum machine_mode set_mode;
4767 set = PATTERN (insn);
4768 if (GET_CODE (set) == PARALLEL)
4769 set = XVECEXP (set, 0, 0);
4770 if (GET_CODE (set) != SET)
4772 if (GET_CODE (SET_SRC (set)) != COMPARE)
4775 set_mode = GET_MODE (SET_DEST (set));
4779 if (req_mode != CCNOmode
4780 && (req_mode != CCmode
4781 || XEXP (SET_SRC (set), 1) != const0_rtx))
4785 if (req_mode == CCGCmode)
4789 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4793 if (req_mode == CCZmode)
4803 return (GET_MODE (SET_SRC (set)) == set_mode);
4806 /* Generate insn patterns to do an integer compare of OPERANDS. */
4809 ix86_expand_int_compare (code, op0, op1)
4813 enum machine_mode cmpmode;
4816 cmpmode = SELECT_CC_MODE (code, op0, op1);
4817 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4819 /* This is very simple, but making the interface the same as in the
4820 FP case makes the rest of the code easier. */
4821 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4822 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4824 /* Return the test that should be put into the flags user, i.e.
4825 the bcc, scc, or cmov instruction. */
4826 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4829 /* Figure out whether to use ordered or unordered fp comparisons.
4830 Return the appropriate mode to use. */
4833 ix86_fp_compare_mode (code)
4834 enum rtx_code code ATTRIBUTE_UNUSED;
4836 /* ??? In order to make all comparisons reversible, we do all comparisons
4837 non-trapping when compiling for IEEE. Once gcc is able to distinguish
4838 all forms trapping and nontrapping comparisons, we can make inequality
4839 comparisons trapping again, since it results in better code when using
4840 FCOM based compares. */
4841 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
4845 ix86_cc_mode (code, op0, op1)
4849 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4850 return ix86_fp_compare_mode (code);
4853 /* Only zero flag is needed. */
4855 case NE: /* ZF!=0 */
4857 /* Codes needing carry flag. */
4858 case GEU: /* CF=0 */
4859 case GTU: /* CF=0 & ZF=0 */
4860 case LTU: /* CF=1 */
4861 case LEU: /* CF=1 | ZF=1 */
4863 /* Codes possibly doable only with sign flag when
4864 comparing against zero. */
4865 case GE: /* SF=OF or SF=0 */
4866 case LT: /* SF<>OF or SF=1 */
4867 if (op1 == const0_rtx)
4870 /* For other cases Carry flag is not required. */
4872 /* Codes doable only with sign flag when comparing
4873 against zero, but we miss jump instruction for it
4874 so we need to use relational tests agains overflow
4875 that thus needs to be zero. */
4876 case GT: /* ZF=0 & SF=OF */
4877 case LE: /* ZF=1 | SF<>OF */
4878 if (op1 == const0_rtx)
4887 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4890 ix86_use_fcomi_compare (code)
4891 enum rtx_code code ATTRIBUTE_UNUSED;
4893 enum rtx_code swapped_code = swap_condition (code);
4894 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
4895 || (ix86_fp_comparison_cost (swapped_code)
4896 == ix86_fp_comparison_fcomi_cost (swapped_code)));
4899 /* Swap, force into registers, or otherwise massage the two operands
4900 to a fp comparison. The operands are updated in place; the new
4901 comparsion code is returned. */
4903 static enum rtx_code
4904 ix86_prepare_fp_compare_args (code, pop0, pop1)
4908 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4909 rtx op0 = *pop0, op1 = *pop1;
4910 enum machine_mode op_mode = GET_MODE (op0);
4911 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
4913 /* All of the unordered compare instructions only work on registers.
4914 The same is true of the XFmode compare instructions. The same is
4915 true of the fcomi compare instructions. */
4918 && (fpcmp_mode == CCFPUmode
4919 || op_mode == XFmode
4920 || op_mode == TFmode
4921 || ix86_use_fcomi_compare (code)))
4923 op0 = force_reg (op_mode, op0);
4924 op1 = force_reg (op_mode, op1);
4928 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4929 things around if they appear profitable, otherwise force op0
4932 if (standard_80387_constant_p (op0) == 0
4933 || (GET_CODE (op0) == MEM
4934 && ! (standard_80387_constant_p (op1) == 0
4935 || GET_CODE (op1) == MEM)))
4938 tmp = op0, op0 = op1, op1 = tmp;
4939 code = swap_condition (code);
4942 if (GET_CODE (op0) != REG)
4943 op0 = force_reg (op_mode, op0);
4945 if (CONSTANT_P (op1))
4947 if (standard_80387_constant_p (op1))
4948 op1 = force_reg (op_mode, op1);
4950 op1 = validize_mem (force_const_mem (op_mode, op1));
4954 /* Try to rearrange the comparison to make it cheaper. */
4955 if (ix86_fp_comparison_cost (code)
4956 > ix86_fp_comparison_cost (swap_condition (code))
4957 && (GET_CODE (op0) == REG || !reload_completed))
4960 tmp = op0, op0 = op1, op1 = tmp;
4961 code = swap_condition (code);
4962 if (GET_CODE (op0) != REG)
4963 op0 = force_reg (op_mode, op0);
4971 /* Convert comparison codes we use to represent FP comparison to integer
4972 code that will result in proper branch. Return UNKNOWN if no such code
4974 static enum rtx_code
4975 ix86_fp_compare_code_to_integer (code)
5005 /* Split comparison code CODE into comparisons we can do using branch
5006 instructions. BYPASS_CODE is comparison code for branch that will
5007 branch around FIRST_CODE and SECOND_CODE. If some of branches
5008 is not required, set value to NIL.
5009 We never require more than two branches. */
5011 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5012 enum rtx_code code, *bypass_code, *first_code, *second_code;
5018 /* The fcomi comparison sets flags as follows:
5028 case GT: /* GTU - CF=0 & ZF=0 */
5029 case GE: /* GEU - CF=0 */
5030 case ORDERED: /* PF=0 */
5031 case UNORDERED: /* PF=1 */
5032 case UNEQ: /* EQ - ZF=1 */
5033 case UNLT: /* LTU - CF=1 */
5034 case UNLE: /* LEU - CF=1 | ZF=1 */
5035 case LTGT: /* EQ - ZF=0 */
5037 case LT: /* LTU - CF=1 - fails on unordered */
5039 *bypass_code = UNORDERED;
5041 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5043 *bypass_code = UNORDERED;
5045 case EQ: /* EQ - ZF=1 - fails on unordered */
5047 *bypass_code = UNORDERED;
5049 case NE: /* NE - ZF=0 - fails on unordered */
5051 *second_code = UNORDERED;
5053 case UNGE: /* GEU - CF=0 - fails on unordered */
5055 *second_code = UNORDERED;
5057 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5059 *second_code = UNORDERED;
5064 if (!TARGET_IEEE_FP)
5071 /* Return cost of comparison done fcom + arithmetics operations on AX.
5072 All following functions do use number of instructions as an cost metrics.
5073 In future this should be tweaked to compute bytes for optimize_size and
5074 take into account performance of various instructions on various CPUs. */
5076 ix86_fp_comparison_arithmetics_cost (code)
5079 if (!TARGET_IEEE_FP)
5081 /* The cost of code output by ix86_expand_fp_compare. */
5109 /* Return cost of comparison done using fcomi operation.
5110 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5112 ix86_fp_comparison_fcomi_cost (code)
5115 enum rtx_code bypass_code, first_code, second_code;
5116 /* Return arbitarily high cost when instruction is not supported - this
5117 prevents gcc from using it. */
5120 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5121 return (bypass_code != NIL || second_code != NIL) + 2;
5124 /* Return cost of comparison done using sahf operation.
5125 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5127 ix86_fp_comparison_sahf_cost (code)
5130 enum rtx_code bypass_code, first_code, second_code;
5131 /* Return arbitarily high cost when instruction is not preferred - this
5132 avoids gcc from using it. */
5133 if (!TARGET_USE_SAHF && !optimize_size)
5135 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5136 return (bypass_code != NIL || second_code != NIL) + 3;
5139 /* Compute cost of the comparison done using any method.
5140 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5142 ix86_fp_comparison_cost (code)
5145 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5148 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5149 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5151 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5152 if (min > sahf_cost)
5154 if (min > fcomi_cost)
5159 /* Generate insn patterns to do a floating point compare of OPERANDS. */
5162 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
5164 rtx op0, op1, scratch;
5168 enum machine_mode fpcmp_mode, intcmp_mode;
5170 int cost = ix86_fp_comparison_cost (code);
5171 enum rtx_code bypass_code, first_code, second_code;
5173 fpcmp_mode = ix86_fp_compare_mode (code);
5174 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5177 *second_test = NULL_RTX;
5179 *bypass_test = NULL_RTX;
5181 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5183 /* Do fcomi/sahf based test when profitable. */
5184 if ((bypass_code == NIL || bypass_test)
5185 && (second_code == NIL || second_test)
5186 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5190 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5191 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5197 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5198 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5200 scratch = gen_reg_rtx (HImode);
5201 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5202 emit_insn (gen_x86_sahf_1 (scratch));
5205 /* The FP codes work out to act like unsigned. */
5206 intcmp_mode = fpcmp_mode;
5208 if (bypass_code != NIL)
5209 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5210 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5212 if (second_code != NIL)
5213 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5214 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5219 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5220 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5221 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5223 scratch = gen_reg_rtx (HImode);
5224 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5226 /* In the unordered case, we have to check C2 for NaN's, which
5227 doesn't happen to work out to anything nice combination-wise.
5228 So do some bit twiddling on the value we've got in AH to come
5229 up with an appropriate set of condition codes. */
5231 intcmp_mode = CCNOmode;
5236 if (code == GT || !TARGET_IEEE_FP)
5238 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5243 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5244 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5245 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5246 intcmp_mode = CCmode;
5252 if (code == LT && TARGET_IEEE_FP)
5254 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5255 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5256 intcmp_mode = CCmode;
5261 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5267 if (code == GE || !TARGET_IEEE_FP)
5269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5274 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5275 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5282 if (code == LE && TARGET_IEEE_FP)
5284 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5285 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5286 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5287 intcmp_mode = CCmode;
5292 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5298 if (code == EQ && TARGET_IEEE_FP)
5300 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5301 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5302 intcmp_mode = CCmode;
5307 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5314 if (code == NE && TARGET_IEEE_FP)
5316 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5317 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5323 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5329 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5333 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5342 /* Return the test that should be put into the flags user, i.e.
5343 the bcc, scc, or cmov instruction. */
5344 return gen_rtx_fmt_ee (code, VOIDmode,
5345 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5350 ix86_expand_compare (code, second_test, bypass_test)
5352 rtx *second_test, *bypass_test;
5355 op0 = ix86_compare_op0;
5356 op1 = ix86_compare_op1;
5359 *second_test = NULL_RTX;
5361 *bypass_test = NULL_RTX;
5363 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5364 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
5365 second_test, bypass_test);
5367 ret = ix86_expand_int_compare (code, op0, op1);
5373 ix86_expand_branch (code, label)
5379 switch (GET_MODE (ix86_compare_op0))
5384 tmp = ix86_expand_compare (code, NULL, NULL);
5385 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5386 gen_rtx_LABEL_REF (VOIDmode, label),
5388 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5395 /* Don't expand the comparison early, so that we get better code
5396 when jump or whoever decides to reverse the comparison. */
5401 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5404 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5405 ix86_compare_op0, ix86_compare_op1);
5406 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5407 gen_rtx_LABEL_REF (VOIDmode, label),
5409 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5411 use_fcomi = ix86_use_fcomi_compare (code);
5412 vec = rtvec_alloc (3 + !use_fcomi);
5413 RTVEC_ELT (vec, 0) = tmp;
5415 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5417 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5420 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5422 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5427 /* Expand DImode branch into multiple compare+branch. */
5429 rtx lo[2], hi[2], label2;
5430 enum rtx_code code1, code2, code3;
5432 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5434 tmp = ix86_compare_op0;
5435 ix86_compare_op0 = ix86_compare_op1;
5436 ix86_compare_op1 = tmp;
5437 code = swap_condition (code);
5439 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5440 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5442 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5443 avoid two branches. This costs one extra insn, so disable when
5444 optimizing for size. */
5446 if ((code == EQ || code == NE)
5448 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5453 if (hi[1] != const0_rtx)
5454 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5455 NULL_RTX, 0, OPTAB_WIDEN);
5458 if (lo[1] != const0_rtx)
5459 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5460 NULL_RTX, 0, OPTAB_WIDEN);
5462 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5463 NULL_RTX, 0, OPTAB_WIDEN);
5465 ix86_compare_op0 = tmp;
5466 ix86_compare_op1 = const0_rtx;
5467 ix86_expand_branch (code, label);
5471 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5472 op1 is a constant and the low word is zero, then we can just
5473 examine the high word. */
5475 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5478 case LT: case LTU: case GE: case GEU:
5479 ix86_compare_op0 = hi[0];
5480 ix86_compare_op1 = hi[1];
5481 ix86_expand_branch (code, label);
5487 /* Otherwise, we need two or three jumps. */
5489 label2 = gen_label_rtx ();
5492 code2 = swap_condition (code);
5493 code3 = unsigned_condition (code);
5497 case LT: case GT: case LTU: case GTU:
5500 case LE: code1 = LT; code2 = GT; break;
5501 case GE: code1 = GT; code2 = LT; break;
5502 case LEU: code1 = LTU; code2 = GTU; break;
5503 case GEU: code1 = GTU; code2 = LTU; break;
5505 case EQ: code1 = NIL; code2 = NE; break;
5506 case NE: code2 = NIL; break;
5514 * if (hi(a) < hi(b)) goto true;
5515 * if (hi(a) > hi(b)) goto false;
5516 * if (lo(a) < lo(b)) goto true;
5520 ix86_compare_op0 = hi[0];
5521 ix86_compare_op1 = hi[1];
5524 ix86_expand_branch (code1, label);
5526 ix86_expand_branch (code2, label2);
5528 ix86_compare_op0 = lo[0];
5529 ix86_compare_op1 = lo[1];
5530 ix86_expand_branch (code3, label);
5533 emit_label (label2);
5542 /* Split branch based on floating point condition. */
5544 ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5545 rtx condition, op1, op2, target1, target2, tmp;
5548 rtx label = NULL_RTX;
5549 enum rtx_code code = GET_CODE (condition);
5551 if (target2 != pc_rtx)
5554 code = reverse_condition_maybe_unordered (code);
5559 condition = ix86_expand_fp_compare (code, op1, op2,
5560 tmp, &second, &bypass);
5561 if (bypass != NULL_RTX)
5563 label = gen_label_rtx ();
5564 emit_jump_insn (gen_rtx_SET
5566 gen_rtx_IF_THEN_ELSE (VOIDmode,
5568 gen_rtx_LABEL_REF (VOIDmode,
5572 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5573 comparison and first branch. The second branch takes longer to execute
5574 so place first branch the worse predicable one if possible. */
5575 if (second != NULL_RTX
5576 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5578 rtx tmp = condition;
5582 emit_jump_insn (gen_rtx_SET
5584 gen_rtx_IF_THEN_ELSE (VOIDmode,
5585 condition, target1, target2)));
5586 if (second != NULL_RTX)
5587 emit_jump_insn (gen_rtx_SET
5589 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5590 if (label != NULL_RTX)
5595 ix86_expand_setcc (code, dest)
5599 rtx ret, tmp, tmpreg;
5600 rtx second_test, bypass_test;
5603 if (GET_MODE (ix86_compare_op0) == DImode)
5604 return 0; /* FAIL */
5606 /* Three modes of generation:
5607 0 -- destination does not overlap compare sources:
5608 clear dest first, emit strict_low_part setcc.
5609 1 -- destination does overlap compare sources:
5610 emit subreg setcc, zero extend.
5611 2 -- destination is in QImode:
5617 if (GET_MODE (dest) == QImode)
5619 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5620 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5624 emit_move_insn (dest, const0_rtx);
5626 ret = ix86_expand_compare (code, &second_test, &bypass_test);
5627 PUT_MODE (ret, QImode);
5633 tmp = gen_lowpart (QImode, dest);
5635 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5639 if (!cse_not_expected)
5640 tmp = gen_reg_rtx (QImode);
5642 tmp = gen_lowpart (QImode, dest);
5646 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5647 if (bypass_test || second_test)
5649 rtx test = second_test;
5651 rtx tmp2 = gen_reg_rtx (QImode);
5658 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5660 PUT_MODE (test, QImode);
5661 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5664 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5666 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5673 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5674 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5675 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5676 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5680 return 1; /* DONE */
5684 ix86_expand_int_movcc (operands)
5687 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5688 rtx compare_seq, compare_op;
5689 rtx second_test, bypass_test;
5691 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5692 In case comparsion is done with immediate, we can convert it to LTU or
5693 GEU by altering the integer. */
5695 if ((code == LEU || code == GTU)
5696 && GET_CODE (ix86_compare_op1) == CONST_INT
5697 && GET_MODE (operands[0]) != HImode
5698 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5699 && GET_CODE (operands[2]) == CONST_INT
5700 && GET_CODE (operands[3]) == CONST_INT)
5706 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5710 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5711 compare_seq = gen_sequence ();
5714 compare_code = GET_CODE (compare_op);
5716 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5717 HImode insns, we'd be swallowed in word prefix ops. */
5719 if (GET_MODE (operands[0]) != HImode
5720 && GET_CODE (operands[2]) == CONST_INT
5721 && GET_CODE (operands[3]) == CONST_INT)
5723 rtx out = operands[0];
5724 HOST_WIDE_INT ct = INTVAL (operands[2]);
5725 HOST_WIDE_INT cf = INTVAL (operands[3]);
5728 if ((compare_code == LTU || compare_code == GEU)
5729 && !second_test && !bypass_test)
5732 /* Detect overlap between destination and compare sources. */
5735 /* To simplify rest of code, restrict to the GEU case. */
5736 if (compare_code == LTU)
5741 compare_code = reverse_condition (compare_code);
5742 code = reverse_condition (code);
5746 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5747 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5748 tmp = gen_reg_rtx (SImode);
5750 emit_insn (compare_seq);
5751 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5763 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5774 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5776 else if (diff == -1 && ct)
5786 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5788 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5795 * andl cf - ct, dest
5800 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5802 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5806 emit_move_insn (out, tmp);
5808 return 1; /* DONE */
5815 tmp = ct, ct = cf, cf = tmp;
5817 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5819 /* We may be reversing unordered compare to normal compare, that
5820 is not valid in general (we may convert non-trapping condition
5821 to trapping one), however on i386 we currently emit all
5822 comparisons unordered. */
5823 compare_code = reverse_condition_maybe_unordered (compare_code);
5824 code = reverse_condition_maybe_unordered (code);
5828 compare_code = reverse_condition (compare_code);
5829 code = reverse_condition (code);
5832 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5833 || diff == 3 || diff == 5 || diff == 9)
5839 * lea cf(dest*(ct-cf)),dest
5843 * This also catches the degenerate setcc-only case.
5849 out = emit_store_flag (out, code, ix86_compare_op0,
5850 ix86_compare_op1, VOIDmode, 0, 1);
5857 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5861 tmp = gen_rtx_PLUS (SImode, tmp, out);
5867 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5873 emit_move_insn (out, tmp);
5878 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5879 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5881 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5882 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5886 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5888 if (out != operands[0])
5889 emit_move_insn (operands[0], out);
5891 return 1; /* DONE */
5895 * General case: Jumpful:
5896 * xorl dest,dest cmpl op1, op2
5897 * cmpl op1, op2 movl ct, dest
5899 * decl dest movl cf, dest
5900 * andl (cf-ct),dest 1:
5905 * This is reasonably steep, but branch mispredict costs are
5906 * high on modern cpus, so consider failing only if optimizing
5909 * %%% Parameterize branch_cost on the tuning architecture, then
5910 * use that. The 80386 couldn't care less about mispredicts.
5913 if (!optimize_size && !TARGET_CMOVE)
5919 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5921 /* We may be reversing unordered compare to normal compare,
5922 that is not valid in general (we may convert non-trapping
5923 condition to trapping one), however on i386 we currently
5924 emit all comparisons unordered. */
5925 compare_code = reverse_condition_maybe_unordered (compare_code);
5926 code = reverse_condition_maybe_unordered (code);
5930 compare_code = reverse_condition (compare_code);
5931 code = reverse_condition (code);
5935 out = emit_store_flag (out, code, ix86_compare_op0,
5936 ix86_compare_op1, VOIDmode, 0, 1);
5938 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5939 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5941 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5942 if (out != operands[0])
5943 emit_move_insn (operands[0], out);
5945 return 1; /* DONE */
5951 /* Try a few things more with specific constants and a variable. */
5954 rtx var, orig_out, out, tmp;
5957 return 0; /* FAIL */
5959 /* If one of the two operands is an interesting constant, load a
5960 constant with the above and mask it in with a logical operation. */
5962 if (GET_CODE (operands[2]) == CONST_INT)
5965 if (INTVAL (operands[2]) == 0)
5966 operands[3] = constm1_rtx, op = and_optab;
5967 else if (INTVAL (operands[2]) == -1)
5968 operands[3] = const0_rtx, op = ior_optab;
5970 return 0; /* FAIL */
5972 else if (GET_CODE (operands[3]) == CONST_INT)
5975 if (INTVAL (operands[3]) == 0)
5976 operands[2] = constm1_rtx, op = and_optab;
5977 else if (INTVAL (operands[3]) == -1)
5978 operands[2] = const0_rtx, op = ior_optab;
5980 return 0; /* FAIL */
5983 return 0; /* FAIL */
5985 orig_out = operands[0];
5986 tmp = gen_reg_rtx (GET_MODE (orig_out));
5989 /* Recurse to get the constant loaded. */
5990 if (ix86_expand_int_movcc (operands) == 0)
5991 return 0; /* FAIL */
5993 /* Mask in the interesting variable. */
5994 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5996 if (out != orig_out)
5997 emit_move_insn (orig_out, out);
5999 return 1; /* DONE */
6003 * For comparison with above,
6013 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6014 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6015 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6016 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6018 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6020 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6021 emit_move_insn (tmp, operands[3]);
6024 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6026 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6027 emit_move_insn (tmp, operands[2]);
6031 emit_insn (compare_seq);
6032 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6033 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6034 compare_op, operands[2],
6037 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6038 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6043 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6044 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6049 return 1; /* DONE */
6053 ix86_expand_fp_movcc (operands)
6058 rtx compare_op, second_test, bypass_test;
6060 /* For SF/DFmode conditional moves based on comparisons
6061 in same mode, we may want to use SSE min/max instructions. */
6062 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6063 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6064 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6065 /* We may be called from the post-reload splitter. */
6066 && (!REG_P (operands[0])
6067 || SSE_REG_P (operands[0])
6068 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
6070 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6071 code = GET_CODE (operands[1]);
6073 /* See if we have (cross) match between comparison operands and
6074 conditional move operands. */
6075 if (rtx_equal_p (operands[2], op1))
6080 code = reverse_condition_maybe_unordered (code);
6082 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6084 /* Check for min operation. */
6087 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6088 if (memory_operand (op0, VOIDmode))
6089 op0 = force_reg (GET_MODE (operands[0]), op0);
6090 if (GET_MODE (operands[0]) == SFmode)
6091 emit_insn (gen_minsf3 (operands[0], op0, op1));
6093 emit_insn (gen_mindf3 (operands[0], op0, op1));
6096 /* Check for max operation. */
6099 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6100 if (memory_operand (op0, VOIDmode))
6101 op0 = force_reg (GET_MODE (operands[0]), op0);
6102 if (GET_MODE (operands[0]) == SFmode)
6103 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6105 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6109 /* Manage condition to be sse_comparison_operator. In case we are
6110 in non-ieee mode, try to canonicalize the destination operand
6111 to be first in the comparison - this helps reload to avoid extra
6113 if (!sse_comparison_operator (operands[1], VOIDmode)
6114 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6116 rtx tmp = ix86_compare_op0;
6117 ix86_compare_op0 = ix86_compare_op1;
6118 ix86_compare_op1 = tmp;
6119 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6120 VOIDmode, ix86_compare_op0,
6123 /* Similary try to manage result to be first operand of conditional
6125 if (rtx_equal_p (operands[0], operands[3]))
6127 rtx tmp = operands[2];
6128 operands[2] = operands[3];
6130 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6131 (GET_CODE (operands[1])),
6132 VOIDmode, ix86_compare_op0,
6135 if (GET_MODE (operands[0]) == SFmode)
6136 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6137 operands[2], operands[3],
6138 ix86_compare_op0, ix86_compare_op1));
6140 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6141 operands[2], operands[3],
6142 ix86_compare_op0, ix86_compare_op1));
6146 /* The floating point conditional move instructions don't directly
6147 support conditions resulting from a signed integer comparison. */
6149 code = GET_CODE (operands[1]);
6150 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6152 /* The floating point conditional move instructions don't directly
6153 support signed integer comparisons. */
6155 if (!fcmov_comparison_operator (compare_op, VOIDmode))
6157 if (second_test != NULL || bypass_test != NULL)
6159 tmp = gen_reg_rtx (QImode);
6160 ix86_expand_setcc (code, tmp);
6162 ix86_compare_op0 = tmp;
6163 ix86_compare_op1 = const0_rtx;
6164 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6166 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6168 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6169 emit_move_insn (tmp, operands[3]);
6172 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6174 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6175 emit_move_insn (tmp, operands[2]);
6179 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6180 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6185 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6186 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6191 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6192 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6200 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6201 works for floating pointer parameters and nonoffsetable memories.
6202 For pushes, it returns just stack offsets; the values will be saved
6203 in the right order. Maximally three parts are generated. */
6206 ix86_split_to_parts (operand, parts, mode)
6209 enum machine_mode mode;
6211 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
6213 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6215 if (size < 2 || size > 3)
6218 /* Optimize constant pool reference to immediates. This is used by fp moves,
6219 that force all constants to memory to allow combining. */
6221 if (GET_CODE (operand) == MEM
6222 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6223 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6224 operand = get_pool_constant (XEXP (operand, 0));
6226 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
6228 /* The only non-offsetable memories we handle are pushes. */
6229 if (! push_operand (operand, VOIDmode))
6232 PUT_MODE (operand, SImode);
6233 parts[0] = parts[1] = parts[2] = operand;
6238 split_di (&operand, 1, &parts[0], &parts[1]);
6241 if (REG_P (operand))
6243 if (!reload_completed)
6245 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6246 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6248 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6250 else if (offsettable_memref_p (operand))
6252 PUT_MODE (operand, SImode);
6254 parts[1] = adj_offsettable_operand (operand, 4);
6256 parts[2] = adj_offsettable_operand (operand, 8);
6258 else if (GET_CODE (operand) == CONST_DOUBLE)
6263 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6268 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6269 parts[2] = GEN_INT (l[2]);
6272 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6277 parts[1] = GEN_INT (l[1]);
6278 parts[0] = GEN_INT (l[0]);
6288 /* Emit insns to perform a move or push of DI, DF, and XF values.
6289 Return false when normal moves are needed; true when all required
6290 insns have been emitted. Operands 2-4 contain the input values
6291 int the correct order; operands 5-7 contain the output values. */
6294 ix86_split_long_move (operands1)
6303 /* Make our own copy to avoid clobbering the operands. */
6304 operands[0] = copy_rtx (operands1[0]);
6305 operands[1] = copy_rtx (operands1[1]);
6307 /* The only non-offsettable memory we handle is push. */
6308 if (push_operand (operands[0], VOIDmode))
6310 else if (GET_CODE (operands[0]) == MEM
6311 && ! offsettable_memref_p (operands[0]))
6314 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
6315 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6317 /* When emitting push, take care for source operands on the stack. */
6318 if (push && GET_CODE (operands[1]) == MEM
6319 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6322 part[1][1] = part[1][2];
6323 part[1][0] = part[1][1];
6326 /* We need to do copy in the right order in case an address register
6327 of the source overlaps the destination. */
6328 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6330 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6332 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6335 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6338 /* Collision in the middle part can be handled by reordering. */
6339 if (collisions == 1 && size == 3
6340 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6343 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6344 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6347 /* If there are more collisions, we can't handle it by reordering.
6348 Do an lea to the last part and use only one colliding move. */
6349 else if (collisions > 1)
6352 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6353 XEXP (part[1][0], 0)));
6354 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6355 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6357 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6365 /* We use only first 12 bytes of TFmode value, but for pushing we
6366 are required to adjust stack as if we were pushing real 16byte
6368 if (GET_MODE (operands1[0]) == TFmode)
6369 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6371 emit_insn (gen_push (part[1][2]));
6373 emit_insn (gen_push (part[1][1]));
6374 emit_insn (gen_push (part[1][0]));
6378 /* Choose correct order to not overwrite the source before it is copied. */
6379 if ((REG_P (part[0][0])
6380 && REG_P (part[1][1])
6381 && (REGNO (part[0][0]) == REGNO (part[1][1])
6383 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6385 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6389 operands1[2] = part[0][2];
6390 operands1[3] = part[0][1];
6391 operands1[4] = part[0][0];
6392 operands1[5] = part[1][2];
6393 operands1[6] = part[1][1];
6394 operands1[7] = part[1][0];
6398 operands1[2] = part[0][1];
6399 operands1[3] = part[0][0];
6400 operands1[5] = part[1][1];
6401 operands1[6] = part[1][0];
6408 operands1[2] = part[0][0];
6409 operands1[3] = part[0][1];
6410 operands1[4] = part[0][2];
6411 operands1[5] = part[1][0];
6412 operands1[6] = part[1][1];
6413 operands1[7] = part[1][2];
6417 operands1[2] = part[0][0];
6418 operands1[3] = part[0][1];
6419 operands1[5] = part[1][0];
6420 operands1[6] = part[1][1];
6428 ix86_split_ashldi (operands, scratch)
6429 rtx *operands, scratch;
6431 rtx low[2], high[2];
6434 if (GET_CODE (operands[2]) == CONST_INT)
6436 split_di (operands, 2, low, high);
6437 count = INTVAL (operands[2]) & 63;
6441 emit_move_insn (high[0], low[1]);
6442 emit_move_insn (low[0], const0_rtx);
6445 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6449 if (!rtx_equal_p (operands[0], operands[1]))
6450 emit_move_insn (operands[0], operands[1]);
6451 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6452 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6457 if (!rtx_equal_p (operands[0], operands[1]))
6458 emit_move_insn (operands[0], operands[1]);
6460 split_di (operands, 1, low, high);
6462 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6463 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
6465 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6467 if (! no_new_pseudos)
6468 scratch = force_reg (SImode, const0_rtx);
6470 emit_move_insn (scratch, const0_rtx);
6472 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6476 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6481 ix86_split_ashrdi (operands, scratch)
6482 rtx *operands, scratch;
6484 rtx low[2], high[2];
6487 if (GET_CODE (operands[2]) == CONST_INT)
6489 split_di (operands, 2, low, high);
6490 count = INTVAL (operands[2]) & 63;
6494 emit_move_insn (low[0], high[1]);
6496 if (! reload_completed)
6497 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6500 emit_move_insn (high[0], low[0]);
6501 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6505 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6509 if (!rtx_equal_p (operands[0], operands[1]))
6510 emit_move_insn (operands[0], operands[1]);
6511 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6512 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6517 if (!rtx_equal_p (operands[0], operands[1]))
6518 emit_move_insn (operands[0], operands[1]);
6520 split_di (operands, 1, low, high);
6522 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6523 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6525 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6527 if (! no_new_pseudos)
6528 scratch = gen_reg_rtx (SImode);
6529 emit_move_insn (scratch, high[0]);
6530 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6531 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6535 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
6540 ix86_split_lshrdi (operands, scratch)
6541 rtx *operands, scratch;
6543 rtx low[2], high[2];
6546 if (GET_CODE (operands[2]) == CONST_INT)
6548 split_di (operands, 2, low, high);
6549 count = INTVAL (operands[2]) & 63;
6553 emit_move_insn (low[0], high[1]);
6554 emit_move_insn (high[0], const0_rtx);
6557 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6561 if (!rtx_equal_p (operands[0], operands[1]))
6562 emit_move_insn (operands[0], operands[1]);
6563 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6564 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6569 if (!rtx_equal_p (operands[0], operands[1]))
6570 emit_move_insn (operands[0], operands[1]);
6572 split_di (operands, 1, low, high);
6574 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6575 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6577 /* Heh. By reversing the arguments, we can reuse this pattern. */
6578 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6580 if (! no_new_pseudos)
6581 scratch = force_reg (SImode, const0_rtx);
6583 emit_move_insn (scratch, const0_rtx);
6585 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6589 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6593 /* Expand the appropriate insns for doing strlen if not just doing
6596 out = result, initialized with the start address
6597 align_rtx = alignment of the address.
6598 scratch = scratch register, initialized with the startaddress when
6599 not aligned, otherwise undefined
6601 This is just the body. It needs the initialisations mentioned above and
6602 some address computing at the end. These things are done in i386.md. */
6605 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6606 rtx out, align_rtx, scratch;
6610 rtx align_2_label = NULL_RTX;
6611 rtx align_3_label = NULL_RTX;
6612 rtx align_4_label = gen_label_rtx ();
6613 rtx end_0_label = gen_label_rtx ();
6615 rtx tmpreg = gen_reg_rtx (SImode);
6618 if (GET_CODE (align_rtx) == CONST_INT)
6619 align = INTVAL (align_rtx);
6621 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6623 /* Is there a known alignment and is it less than 4? */
6626 /* Is there a known alignment and is it not 2? */
6629 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6630 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6632 /* Leave just the 3 lower bits. */
6633 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6634 NULL_RTX, 0, OPTAB_WIDEN);
6636 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6637 SImode, 1, 0, align_4_label);
6638 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6639 SImode, 1, 0, align_2_label);
6640 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6641 SImode, 1, 0, align_3_label);
6645 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6646 check if is aligned to 4 - byte. */
6648 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6649 NULL_RTX, 0, OPTAB_WIDEN);
6651 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6652 SImode, 1, 0, align_4_label);
6655 mem = gen_rtx_MEM (QImode, out);
6657 /* Now compare the bytes. */
6659 /* Compare the first n unaligned byte on a byte per byte basis. */
6660 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6661 QImode, 1, 0, end_0_label);
6663 /* Increment the address. */
6664 emit_insn (gen_addsi3 (out, out, const1_rtx));
6666 /* Not needed with an alignment of 2 */
6669 emit_label (align_2_label);
6671 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6672 QImode, 1, 0, end_0_label);
6674 emit_insn (gen_addsi3 (out, out, const1_rtx));
6676 emit_label (align_3_label);
6679 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6680 QImode, 1, 0, end_0_label);
6682 emit_insn (gen_addsi3 (out, out, const1_rtx));
6685 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6686 align this loop. It gives only huge programs, but does not help to
6688 emit_label (align_4_label);
6690 mem = gen_rtx_MEM (SImode, out);
6691 emit_move_insn (scratch, mem);
6692 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6694 /* This formula yields a nonzero result iff one of the bytes is zero.
6695 This saves three branches inside loop and many cycles. */
6697 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6698 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6699 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6700 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6701 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6702 SImode, 1, 0, align_4_label);
6706 rtx reg = gen_reg_rtx (SImode);
6707 emit_move_insn (reg, tmpreg);
6708 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6710 /* If zero is not in the first two bytes, move two bytes forward. */
6711 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6712 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6713 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6714 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6715 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6718 /* Emit lea manually to avoid clobbering of flags. */
6719 emit_insn (gen_rtx_SET (SImode, reg,
6720 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6722 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6723 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6724 emit_insn (gen_rtx_SET (VOIDmode, out,
6725 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6732 rtx end_2_label = gen_label_rtx ();
6733 /* Is zero in the first two bytes? */
6735 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6736 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6737 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6738 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6739 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6741 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6742 JUMP_LABEL (tmp) = end_2_label;
6744 /* Not in the first two. Move two bytes forward. */
6745 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6746 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6748 emit_label (end_2_label);
6752 /* Avoid branch in fixing the byte. */
6753 tmpreg = gen_lowpart (QImode, tmpreg);
6754 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6755 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6757 emit_label (end_0_label);
6760 /* Clear stack slot assignments remembered from previous functions.
6761 This is called from INIT_EXPANDERS once before RTL is emitted for each
6765 ix86_init_machine_status (p)
6768 p->machine = (struct machine_function *)
6769 xcalloc (1, sizeof (struct machine_function));
6772 /* Mark machine specific bits of P for GC. */
6774 ix86_mark_machine_status (p)
6777 struct machine_function *machine = p->machine;
6778 enum machine_mode mode;
6784 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6785 mode = (enum machine_mode) ((int) mode + 1))
6786 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6787 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
6791 ix86_free_machine_status (p)
6798 /* Return a MEM corresponding to a stack slot with mode MODE.
6799 Allocate a new slot if necessary.
6801 The RTL for a function can have several slots available: N is
6802 which slot to use. */
6805 assign_386_stack_local (mode, n)
6806 enum machine_mode mode;
6809 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6812 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6813 ix86_stack_locals[(int) mode][n]
6814 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6816 return ix86_stack_locals[(int) mode][n];
6819 /* Calculate the length of the memory address in the instruction
6820 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6823 memory_address_length (addr)
6826 struct ix86_address parts;
6827 rtx base, index, disp;
6830 if (GET_CODE (addr) == PRE_DEC
6831 || GET_CODE (addr) == POST_INC)
6834 if (! ix86_decompose_address (addr, &parts))
6838 index = parts.index;
6842 /* Register Indirect. */
6843 if (base && !index && !disp)
6845 /* Special cases: ebp and esp need the two-byte modrm form. */
6846 if (addr == stack_pointer_rtx
6847 || addr == arg_pointer_rtx
6848 || addr == frame_pointer_rtx
6849 || addr == hard_frame_pointer_rtx)
6853 /* Direct Addressing. */
6854 else if (disp && !base && !index)
6859 /* Find the length of the displacement constant. */
6862 if (GET_CODE (disp) == CONST_INT
6863 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6869 /* An index requires the two-byte modrm form. */
6877 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6878 expect that insn have 8bit immediate alternative. */
6880 ix86_attr_length_immediate_default (insn, shortform)
6886 extract_insn_cached (insn);
6887 for (i = recog_data.n_operands - 1; i >= 0; --i)
6888 if (CONSTANT_P (recog_data.operand[i]))
6893 && GET_CODE (recog_data.operand[i]) == CONST_INT
6894 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6898 switch (get_attr_mode (insn))
6910 fatal_insn ("Unknown insn mode", insn);
6916 /* Compute default value for "length_address" attribute. */
6918 ix86_attr_length_address_default (insn)
6922 extract_insn_cached (insn);
6923 for (i = recog_data.n_operands - 1; i >= 0; --i)
6924 if (GET_CODE (recog_data.operand[i]) == MEM)
6926 return memory_address_length (XEXP (recog_data.operand[i], 0));
6932 /* Return the maximum number of instructions a cpu can issue. */
6939 case PROCESSOR_PENTIUM:
6943 case PROCESSOR_PENTIUMPRO:
6944 case PROCESSOR_PENTIUM4:
6945 case PROCESSOR_ATHLON:
6953 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6954 by DEP_INSN and nothing set by DEP_INSN. */
6957 ix86_flags_dependant (insn, dep_insn, insn_type)
6959 enum attr_type insn_type;
6963 /* Simplify the test for uninteresting insns. */
6964 if (insn_type != TYPE_SETCC
6965 && insn_type != TYPE_ICMOV
6966 && insn_type != TYPE_FCMOV
6967 && insn_type != TYPE_IBR)
6970 if ((set = single_set (dep_insn)) != 0)
6972 set = SET_DEST (set);
6975 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6976 && XVECLEN (PATTERN (dep_insn), 0) == 2
6977 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6978 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6980 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6981 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6986 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6989 /* This test is true if the dependant insn reads the flags but
6990 not any other potentially set register. */
6991 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6994 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
7000 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
7001 address with operands set by DEP_INSN. */
7004 ix86_agi_dependant (insn, dep_insn, insn_type)
7006 enum attr_type insn_type;
7010 if (insn_type == TYPE_LEA)
7012 addr = PATTERN (insn);
7013 if (GET_CODE (addr) == SET)
7015 else if (GET_CODE (addr) == PARALLEL
7016 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
7017 addr = XVECEXP (addr, 0, 0);
7020 addr = SET_SRC (addr);
7025 extract_insn_cached (insn);
7026 for (i = recog_data.n_operands - 1; i >= 0; --i)
7027 if (GET_CODE (recog_data.operand[i]) == MEM)
7029 addr = XEXP (recog_data.operand[i], 0);
7036 return modified_in_p (addr, dep_insn);
7040 ix86_adjust_cost (insn, link, dep_insn, cost)
7041 rtx insn, link, dep_insn;
7044 enum attr_type insn_type, dep_insn_type;
7045 enum attr_memory memory;
7047 int dep_insn_code_number;
7049 /* Anti and output depenancies have zero cost on all CPUs. */
7050 if (REG_NOTE_KIND (link) != 0)
7053 dep_insn_code_number = recog_memoized (dep_insn);
7055 /* If we can't recognize the insns, we can't really do anything. */
7056 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
7059 insn_type = get_attr_type (insn);
7060 dep_insn_type = get_attr_type (dep_insn);
7062 /* Prologue and epilogue allocators can have a false dependency on ebp.
7063 This results in one cycle extra stall on Pentium prologue scheduling,
7064 so handle this important case manually. */
7065 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
7066 && dep_insn_type == TYPE_ALU
7067 && !reg_mentioned_p (stack_pointer_rtx, insn))
7072 case PROCESSOR_PENTIUM:
7073 /* Address Generation Interlock adds a cycle of latency. */
7074 if (ix86_agi_dependant (insn, dep_insn, insn_type))
7077 /* ??? Compares pair with jump/setcc. */
7078 if (ix86_flags_dependant (insn, dep_insn, insn_type))
7081 /* Floating point stores require value to be ready one cycle ealier. */
7082 if (insn_type == TYPE_FMOV
7083 && get_attr_memory (insn) == MEMORY_STORE
7084 && !ix86_agi_dependant (insn, dep_insn, insn_type))
7088 case PROCESSOR_PENTIUMPRO:
7089 /* Since we can't represent delayed latencies of load+operation,
7090 increase the cost here for non-imov insns. */
7091 if (dep_insn_type != TYPE_IMOV
7092 && dep_insn_type != TYPE_FMOV
7093 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7094 || memory == MEMORY_BOTH))
7097 /* INT->FP conversion is expensive. */
7098 if (get_attr_fp_int_src (dep_insn))
7101 /* There is one cycle extra latency between an FP op and a store. */
7102 if (insn_type == TYPE_FMOV
7103 && (set = single_set (dep_insn)) != NULL_RTX
7104 && (set2 = single_set (insn)) != NULL_RTX
7105 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7106 && GET_CODE (SET_DEST (set2)) == MEM)
7111 /* The esp dependency is resolved before the instruction is really
7113 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
7114 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
7117 /* Since we can't represent delayed latencies of load+operation,
7118 increase the cost here for non-imov insns. */
7119 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7120 || memory == MEMORY_BOTH)
7121 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
7123 /* INT->FP conversion is expensive. */
7124 if (get_attr_fp_int_src (dep_insn))
7128 case PROCESSOR_ATHLON:
7129 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
7130 || memory == MEMORY_BOTH)
7132 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
7147 struct ppro_sched_data
7150 int issued_this_cycle;
7155 ix86_safe_length (insn)
7158 if (recog_memoized (insn) >= 0)
7159 return get_attr_length(insn);
7165 ix86_safe_length_prefix (insn)
7168 if (recog_memoized (insn) >= 0)
7169 return get_attr_length(insn);
7174 static enum attr_memory
7175 ix86_safe_memory (insn)
7178 if (recog_memoized (insn) >= 0)
7179 return get_attr_memory(insn);
7181 return MEMORY_UNKNOWN;
7184 static enum attr_pent_pair
7185 ix86_safe_pent_pair (insn)
7188 if (recog_memoized (insn) >= 0)
7189 return get_attr_pent_pair(insn);
7191 return PENT_PAIR_NP;
7194 static enum attr_ppro_uops
7195 ix86_safe_ppro_uops (insn)
7198 if (recog_memoized (insn) >= 0)
7199 return get_attr_ppro_uops (insn);
7201 return PPRO_UOPS_MANY;
7205 ix86_dump_ppro_packet (dump)
7208 if (ix86_sched_data.ppro.decode[0])
7210 fprintf (dump, "PPRO packet: %d",
7211 INSN_UID (ix86_sched_data.ppro.decode[0]));
7212 if (ix86_sched_data.ppro.decode[1])
7213 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
7214 if (ix86_sched_data.ppro.decode[2])
7215 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
7220 /* We're beginning a new block. Initialize data structures as necessary. */
7223 ix86_sched_init (dump, sched_verbose)
7224 FILE *dump ATTRIBUTE_UNUSED;
7225 int sched_verbose ATTRIBUTE_UNUSED;
7227 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
7230 /* Shift INSN to SLOT, and shift everything else down. */
7233 ix86_reorder_insn (insnp, slot)
7240 insnp[0] = insnp[1];
7241 while (++insnp != slot);
7246 /* Find an instruction with given pairability and minimal amount of cycles
7247 lost by the fact that the CPU waits for both pipelines to finish before
7248 reading next instructions. Also take care that both instructions together
7249 can not exceed 7 bytes. */
7252 ix86_pent_find_pair (e_ready, ready, type, first)
7255 enum attr_pent_pair type;
7258 int mincycles, cycles;
7259 enum attr_pent_pair tmp;
7260 enum attr_memory memory;
7261 rtx *insnp, *bestinsnp = NULL;
7263 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
7266 memory = ix86_safe_memory (first);
7267 cycles = result_ready_cost (first);
7268 mincycles = INT_MAX;
7270 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
7271 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
7272 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
7274 enum attr_memory second_memory;
7275 int secondcycles, currentcycles;
7277 second_memory = ix86_safe_memory (*insnp);
7278 secondcycles = result_ready_cost (*insnp);
7279 currentcycles = abs (cycles - secondcycles);
7281 if (secondcycles >= 1 && cycles >= 1)
7283 /* Two read/modify/write instructions together takes two
7285 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7288 /* Read modify/write instruction followed by read/modify
7289 takes one cycle longer. */
7290 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7291 && tmp != PENT_PAIR_UV
7292 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7295 if (currentcycles < mincycles)
7296 bestinsnp = insnp, mincycles = currentcycles;
7302 /* Subroutines of ix86_sched_reorder. */
7305 ix86_sched_reorder_pentium (ready, e_ready)
7309 enum attr_pent_pair pair1, pair2;
7312 /* This wouldn't be necessary if Haifa knew that static insn ordering
7313 is important to which pipe an insn is issued to. So we have to make
7314 some minor rearrangements. */
7316 pair1 = ix86_safe_pent_pair (*e_ready);
7318 /* If the first insn is non-pairable, let it be. */
7319 if (pair1 == PENT_PAIR_NP)
7322 pair2 = PENT_PAIR_NP;
7325 /* If the first insn is UV or PV pairable, search for a PU
7327 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
7329 insnp = ix86_pent_find_pair (e_ready-1, ready,
7330 PENT_PAIR_PU, *e_ready);
7332 pair2 = PENT_PAIR_PU;
7335 /* If the first insn is PU or UV pairable, search for a PV
7337 if (pair2 == PENT_PAIR_NP
7338 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7340 insnp = ix86_pent_find_pair (e_ready-1, ready,
7341 PENT_PAIR_PV, *e_ready);
7343 pair2 = PENT_PAIR_PV;
7346 /* If the first insn is pairable, search for a UV
7348 if (pair2 == PENT_PAIR_NP)
7350 insnp = ix86_pent_find_pair (e_ready-1, ready,
7351 PENT_PAIR_UV, *e_ready);
7353 pair2 = PENT_PAIR_UV;
7356 if (pair2 == PENT_PAIR_NP)
7359 /* Found something! Decide if we need to swap the order. */
7360 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7361 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7362 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7363 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7364 ix86_reorder_insn (insnp, e_ready);
7366 ix86_reorder_insn (insnp, e_ready - 1);
7370 ix86_sched_reorder_ppro (ready, e_ready)
7375 enum attr_ppro_uops cur_uops;
7376 int issued_this_cycle;
7380 /* At this point .ppro.decode contains the state of the three
7381 decoders from last "cycle". That is, those insns that were
7382 actually independent. But here we're scheduling for the
7383 decoder, and we may find things that are decodable in the
7386 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
7387 issued_this_cycle = 0;
7390 cur_uops = ix86_safe_ppro_uops (*insnp);
7392 /* If the decoders are empty, and we've a complex insn at the
7393 head of the priority queue, let it issue without complaint. */
7394 if (decode[0] == NULL)
7396 if (cur_uops == PPRO_UOPS_MANY)
7402 /* Otherwise, search for a 2-4 uop unsn to issue. */
7403 while (cur_uops != PPRO_UOPS_FEW)
7407 cur_uops = ix86_safe_ppro_uops (*--insnp);
7410 /* If so, move it to the head of the line. */
7411 if (cur_uops == PPRO_UOPS_FEW)
7412 ix86_reorder_insn (insnp, e_ready);
7414 /* Issue the head of the queue. */
7415 issued_this_cycle = 1;
7416 decode[0] = *e_ready--;
7419 /* Look for simple insns to fill in the other two slots. */
7420 for (i = 1; i < 3; ++i)
7421 if (decode[i] == NULL)
7423 if (ready >= e_ready)
7427 cur_uops = ix86_safe_ppro_uops (*insnp);
7428 while (cur_uops != PPRO_UOPS_ONE)
7432 cur_uops = ix86_safe_ppro_uops (*--insnp);
7435 /* Found one. Move it to the head of the queue and issue it. */
7436 if (cur_uops == PPRO_UOPS_ONE)
7438 ix86_reorder_insn (insnp, e_ready);
7439 decode[i] = *e_ready--;
7440 issued_this_cycle++;
7444 /* ??? Didn't find one. Ideally, here we would do a lazy split
7445 of 2-uop insns, issue one and queue the other. */
7449 if (issued_this_cycle == 0)
7450 issued_this_cycle = 1;
7451 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7454 /* We are about to being issuing insns for this clock cycle.
7455 Override the default sort algorithm to better slot instructions. */
7457 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7458 FILE *dump ATTRIBUTE_UNUSED;
7459 int sched_verbose ATTRIBUTE_UNUSED;
7462 int clock_var ATTRIBUTE_UNUSED;
7464 rtx *e_ready = ready + n_ready - 1;
7474 case PROCESSOR_PENTIUM:
7475 ix86_sched_reorder_pentium (ready, e_ready);
7478 case PROCESSOR_PENTIUMPRO:
7479 ix86_sched_reorder_ppro (ready, e_ready);
7484 return ix86_issue_rate ();
7487 /* We are about to issue INSN. Return the number of insns left on the
7488 ready queue that can be issued this cycle. */
7491 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7501 return can_issue_more - 1;
7503 case PROCESSOR_PENTIUMPRO:
7505 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
7507 if (uops == PPRO_UOPS_MANY)
7510 ix86_dump_ppro_packet (dump);
7511 ix86_sched_data.ppro.decode[0] = insn;
7512 ix86_sched_data.ppro.decode[1] = NULL;
7513 ix86_sched_data.ppro.decode[2] = NULL;
7515 ix86_dump_ppro_packet (dump);
7516 ix86_sched_data.ppro.decode[0] = NULL;
7518 else if (uops == PPRO_UOPS_FEW)
7521 ix86_dump_ppro_packet (dump);
7522 ix86_sched_data.ppro.decode[0] = insn;
7523 ix86_sched_data.ppro.decode[1] = NULL;
7524 ix86_sched_data.ppro.decode[2] = NULL;
7528 for (i = 0; i < 3; ++i)
7529 if (ix86_sched_data.ppro.decode[i] == NULL)
7531 ix86_sched_data.ppro.decode[i] = insn;
7539 ix86_dump_ppro_packet (dump);
7540 ix86_sched_data.ppro.decode[0] = NULL;
7541 ix86_sched_data.ppro.decode[1] = NULL;
7542 ix86_sched_data.ppro.decode[2] = NULL;
7546 return --ix86_sched_data.ppro.issued_this_cycle;
7550 /* Walk through INSNS and look for MEM references whose address is DSTREG or
7551 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
7555 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
7557 rtx dstref, srcref, dstreg, srcreg;
7561 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
7563 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
7567 /* Subroutine of above to actually do the updating by recursively walking
7571 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
7573 rtx dstref, srcref, dstreg, srcreg;
7575 enum rtx_code code = GET_CODE (x);
7576 const char *format_ptr = GET_RTX_FORMAT (code);
7579 if (code == MEM && XEXP (x, 0) == dstreg)
7580 MEM_COPY_ATTRIBUTES (x, dstref);
7581 else if (code == MEM && XEXP (x, 0) == srcreg)
7582 MEM_COPY_ATTRIBUTES (x, srcref);
7584 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
7586 if (*format_ptr == 'e')
7587 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
7589 else if (*format_ptr == 'E')
7590 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7591 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
7596 /* Compute the alignment given to a constant that is being placed in memory.
7597 EXP is the constant and ALIGN is the alignment that the object would
7599 The value of this function is used instead of that alignment to align
7603 ix86_constant_alignment (exp, align)
7607 if (TREE_CODE (exp) == REAL_CST)
7609 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7611 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7614 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7621 /* Compute the alignment for a static variable.
7622 TYPE is the data type, and ALIGN is the alignment that
7623 the object would ordinarily have. The value of this function is used
7624 instead of that alignment to align the object. */
7627 ix86_data_alignment (type, align)
7631 if (AGGREGATE_TYPE_P (type)
7633 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7634 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7635 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7638 if (TREE_CODE (type) == ARRAY_TYPE)
7640 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7642 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7645 else if (TREE_CODE (type) == COMPLEX_TYPE)
7648 if (TYPE_MODE (type) == DCmode && align < 64)
7650 if (TYPE_MODE (type) == XCmode && align < 128)
7653 else if ((TREE_CODE (type) == RECORD_TYPE
7654 || TREE_CODE (type) == UNION_TYPE
7655 || TREE_CODE (type) == QUAL_UNION_TYPE)
7656 && TYPE_FIELDS (type))
7658 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7660 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7663 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7664 || TREE_CODE (type) == INTEGER_TYPE)
7666 if (TYPE_MODE (type) == DFmode && align < 64)
7668 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7675 /* Compute the alignment for a local variable.
7676 TYPE is the data type, and ALIGN is the alignment that
7677 the object would ordinarily have. The value of this macro is used
7678 instead of that alignment to align the object. */
7681 ix86_local_alignment (type, align)
7685 if (TREE_CODE (type) == ARRAY_TYPE)
7687 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7689 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7692 else if (TREE_CODE (type) == COMPLEX_TYPE)
7694 if (TYPE_MODE (type) == DCmode && align < 64)
7696 if (TYPE_MODE (type) == XCmode && align < 128)
7699 else if ((TREE_CODE (type) == RECORD_TYPE
7700 || TREE_CODE (type) == UNION_TYPE
7701 || TREE_CODE (type) == QUAL_UNION_TYPE)
7702 && TYPE_FIELDS (type))
7704 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7706 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7709 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7710 || TREE_CODE (type) == INTEGER_TYPE)
7713 if (TYPE_MODE (type) == DFmode && align < 64)
7715 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7721 #define def_builtin(NAME, TYPE, CODE) \
7722 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7723 struct builtin_description
7725 enum insn_code icode;
7727 enum ix86_builtins code;
7728 enum rtx_code comparison;
7732 static struct builtin_description bdesc_comi[] =
7734 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7735 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7736 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7737 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7738 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7739 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7740 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7741 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7742 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7743 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7744 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7745 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7748 static struct builtin_description bdesc_2arg[] =
7751 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7752 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7753 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7754 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7755 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7756 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7757 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7758 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7760 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7761 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7762 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7763 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7764 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7765 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7766 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7767 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7768 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7769 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7770 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7771 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7772 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7773 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7774 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7775 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7776 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7777 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7778 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7779 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7780 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7781 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7782 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7783 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7785 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7786 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7787 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7788 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7790 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7791 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7792 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7793 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7795 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7796 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7797 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7798 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7799 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7802 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7803 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7804 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7805 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7806 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7807 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7809 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7810 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7811 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7812 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7813 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7814 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7815 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7816 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7818 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7819 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7820 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7822 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7823 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7824 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7825 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7827 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7828 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7830 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7831 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7832 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7833 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7834 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7835 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7837 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7838 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7839 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7840 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7842 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7843 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7844 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7845 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7846 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7847 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7850 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7851 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7852 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7854 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7855 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7857 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7858 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7859 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7860 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7861 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7862 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7864 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7865 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7866 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7867 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7868 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7869 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7871 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7872 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7873 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7874 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7876 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7877 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7881 static struct builtin_description bdesc_1arg[] =
7883 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7884 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7886 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7887 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7888 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7890 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7891 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7892 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7893 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7897 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7898 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7901 ix86_init_builtins ()
7903 struct builtin_description * d;
7905 tree endlink = void_list_node;
7907 tree pchar_type_node = build_pointer_type (char_type_node);
7908 tree pfloat_type_node = build_pointer_type (float_type_node);
7909 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7910 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7913 tree int_ftype_v4sf_v4sf
7914 = build_function_type (integer_type_node,
7915 tree_cons (NULL_TREE, V4SF_type_node,
7916 tree_cons (NULL_TREE,
7919 tree v4si_ftype_v4sf_v4sf
7920 = build_function_type (V4SI_type_node,
7921 tree_cons (NULL_TREE, V4SF_type_node,
7922 tree_cons (NULL_TREE,
7925 /* MMX/SSE/integer conversions. */
7926 tree int_ftype_v4sf_int
7927 = build_function_type (integer_type_node,
7928 tree_cons (NULL_TREE, V4SF_type_node,
7929 tree_cons (NULL_TREE,
7933 = build_function_type (integer_type_node,
7934 tree_cons (NULL_TREE, V4SF_type_node,
7937 = build_function_type (integer_type_node,
7938 tree_cons (NULL_TREE, V8QI_type_node,
7941 = build_function_type (integer_type_node,
7942 tree_cons (NULL_TREE, V2SI_type_node,
7945 = build_function_type (V2SI_type_node,
7946 tree_cons (NULL_TREE, integer_type_node,
7948 tree v4sf_ftype_v4sf_int
7949 = build_function_type (integer_type_node,
7950 tree_cons (NULL_TREE, V4SF_type_node,
7951 tree_cons (NULL_TREE, integer_type_node,
7953 tree v4sf_ftype_v4sf_v2si
7954 = build_function_type (V4SF_type_node,
7955 tree_cons (NULL_TREE, V4SF_type_node,
7956 tree_cons (NULL_TREE, V2SI_type_node,
7958 tree int_ftype_v4hi_int
7959 = build_function_type (integer_type_node,
7960 tree_cons (NULL_TREE, V4HI_type_node,
7961 tree_cons (NULL_TREE, integer_type_node,
7963 tree v4hi_ftype_v4hi_int_int
7964 = build_function_type (V4HI_type_node,
7965 tree_cons (NULL_TREE, V4HI_type_node,
7966 tree_cons (NULL_TREE, integer_type_node,
7967 tree_cons (NULL_TREE,
7970 /* Miscellaneous. */
7971 tree v8qi_ftype_v4hi_v4hi
7972 = build_function_type (V8QI_type_node,
7973 tree_cons (NULL_TREE, V4HI_type_node,
7974 tree_cons (NULL_TREE, V4HI_type_node,
7976 tree v4hi_ftype_v2si_v2si
7977 = build_function_type (V4HI_type_node,
7978 tree_cons (NULL_TREE, V2SI_type_node,
7979 tree_cons (NULL_TREE, V2SI_type_node,
7981 tree v4sf_ftype_v4sf_v4sf_int
7982 = build_function_type (V4SF_type_node,
7983 tree_cons (NULL_TREE, V4SF_type_node,
7984 tree_cons (NULL_TREE, V4SF_type_node,
7985 tree_cons (NULL_TREE,
7988 tree v4hi_ftype_v8qi_v8qi
7989 = build_function_type (V4HI_type_node,
7990 tree_cons (NULL_TREE, V8QI_type_node,
7991 tree_cons (NULL_TREE, V8QI_type_node,
7993 tree v2si_ftype_v4hi_v4hi
7994 = build_function_type (V2SI_type_node,
7995 tree_cons (NULL_TREE, V4HI_type_node,
7996 tree_cons (NULL_TREE, V4HI_type_node,
7998 tree v4hi_ftype_v4hi_int
7999 = build_function_type (V4HI_type_node,
8000 tree_cons (NULL_TREE, V4HI_type_node,
8001 tree_cons (NULL_TREE, integer_type_node,
8003 tree di_ftype_di_int
8004 = build_function_type (long_long_unsigned_type_node,
8005 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8006 tree_cons (NULL_TREE, integer_type_node,
8008 tree v8qi_ftype_v8qi_di
8009 = build_function_type (V8QI_type_node,
8010 tree_cons (NULL_TREE, V8QI_type_node,
8011 tree_cons (NULL_TREE,
8012 long_long_integer_type_node,
8014 tree v4hi_ftype_v4hi_di
8015 = build_function_type (V4HI_type_node,
8016 tree_cons (NULL_TREE, V4HI_type_node,
8017 tree_cons (NULL_TREE,
8018 long_long_integer_type_node,
8020 tree v2si_ftype_v2si_di
8021 = build_function_type (V2SI_type_node,
8022 tree_cons (NULL_TREE, V2SI_type_node,
8023 tree_cons (NULL_TREE,
8024 long_long_integer_type_node,
8026 tree void_ftype_void
8027 = build_function_type (void_type_node, endlink);
8028 tree void_ftype_pchar_int
8029 = build_function_type (void_type_node,
8030 tree_cons (NULL_TREE, pchar_type_node,
8031 tree_cons (NULL_TREE, integer_type_node,
8033 tree void_ftype_unsigned
8034 = build_function_type (void_type_node,
8035 tree_cons (NULL_TREE, unsigned_type_node,
8037 tree unsigned_ftype_void
8038 = build_function_type (unsigned_type_node, endlink);
8040 = build_function_type (long_long_unsigned_type_node, endlink);
8042 = build_function_type (intTI_type_node, endlink);
8043 tree v2si_ftype_v4sf
8044 = build_function_type (V2SI_type_node,
8045 tree_cons (NULL_TREE, V4SF_type_node,
8048 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
8049 tree_cons (NULL_TREE, V8QI_type_node,
8050 tree_cons (NULL_TREE,
8053 tree void_ftype_v8qi_v8qi_pchar
8054 = build_function_type (void_type_node, maskmovq_args);
8055 tree v4sf_ftype_pfloat
8056 = build_function_type (V4SF_type_node,
8057 tree_cons (NULL_TREE, pfloat_type_node,
8059 tree v4sf_ftype_float
8060 = build_function_type (V4SF_type_node,
8061 tree_cons (NULL_TREE, float_type_node,
8063 tree v4sf_ftype_float_float_float_float
8064 = build_function_type (V4SF_type_node,
8065 tree_cons (NULL_TREE, float_type_node,
8066 tree_cons (NULL_TREE, float_type_node,
8067 tree_cons (NULL_TREE,
8069 tree_cons (NULL_TREE,
8072 /* @@@ the type is bogus */
8073 tree v4sf_ftype_v4sf_pv2si
8074 = build_function_type (V4SF_type_node,
8075 tree_cons (NULL_TREE, V4SF_type_node,
8076 tree_cons (NULL_TREE, pv2si_type_node,
8078 tree v4sf_ftype_pv2si_v4sf
8079 = build_function_type (V4SF_type_node,
8080 tree_cons (NULL_TREE, V4SF_type_node,
8081 tree_cons (NULL_TREE, pv2si_type_node,
8083 tree void_ftype_pfloat_v4sf
8084 = build_function_type (void_type_node,
8085 tree_cons (NULL_TREE, pfloat_type_node,
8086 tree_cons (NULL_TREE, V4SF_type_node,
8088 tree void_ftype_pdi_di
8089 = build_function_type (void_type_node,
8090 tree_cons (NULL_TREE, pdi_type_node,
8091 tree_cons (NULL_TREE,
8092 long_long_unsigned_type_node,
8094 /* Normal vector unops. */
8095 tree v4sf_ftype_v4sf
8096 = build_function_type (V4SF_type_node,
8097 tree_cons (NULL_TREE, V4SF_type_node,
8100 /* Normal vector binops. */
8101 tree v4sf_ftype_v4sf_v4sf
8102 = build_function_type (V4SF_type_node,
8103 tree_cons (NULL_TREE, V4SF_type_node,
8104 tree_cons (NULL_TREE, V4SF_type_node,
8106 tree v8qi_ftype_v8qi_v8qi
8107 = build_function_type (V8QI_type_node,
8108 tree_cons (NULL_TREE, V8QI_type_node,
8109 tree_cons (NULL_TREE, V8QI_type_node,
8111 tree v4hi_ftype_v4hi_v4hi
8112 = build_function_type (V4HI_type_node,
8113 tree_cons (NULL_TREE, V4HI_type_node,
8114 tree_cons (NULL_TREE, V4HI_type_node,
8116 tree v2si_ftype_v2si_v2si
8117 = build_function_type (V2SI_type_node,
8118 tree_cons (NULL_TREE, V2SI_type_node,
8119 tree_cons (NULL_TREE, V2SI_type_node,
8122 = build_function_type (intTI_type_node,
8123 tree_cons (NULL_TREE, intTI_type_node,
8124 tree_cons (NULL_TREE, intTI_type_node,
8127 = build_function_type (long_long_unsigned_type_node,
8128 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8129 tree_cons (NULL_TREE,
8130 long_long_unsigned_type_node,
8133 /* Add all builtins that are more or less simple operations on two
8135 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8137 /* Use one of the operands; the target can have a different mode for
8138 mask-generating compares. */
8139 enum machine_mode mode;
8144 mode = insn_data[d->icode].operand[1].mode;
8146 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
8152 type = v4sf_ftype_v4sf_v4sf;
8155 type = v8qi_ftype_v8qi_v8qi;
8158 type = v4hi_ftype_v4hi_v4hi;
8161 type = v2si_ftype_v2si_v2si;
8164 type = ti_ftype_ti_ti;
8167 type = di_ftype_di_di;
8174 /* Override for comparisons. */
8175 if (d->icode == CODE_FOR_maskcmpv4sf3
8176 || d->icode == CODE_FOR_maskncmpv4sf3
8177 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8178 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8179 type = v4si_ftype_v4sf_v4sf;
8181 def_builtin (d->name, type, d->code);
8184 /* Add the remaining MMX insns with somewhat more complicated types. */
8185 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
8186 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
8187 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
8188 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
8189 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
8190 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
8191 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
8192 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
8193 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
8195 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
8196 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
8197 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
8199 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
8200 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
8202 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
8203 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
8205 /* Everything beyond this point is SSE only. */
8209 /* comi/ucomi insns. */
8210 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8211 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
8213 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
8214 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
8215 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
8217 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
8218 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
8219 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
8220 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
8221 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
8222 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
8224 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
8225 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
8227 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
8229 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
8230 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
8231 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
8232 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
8233 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
8234 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
8236 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
8237 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
8238 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
8239 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
8241 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
8242 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
8243 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
8244 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
8246 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
8247 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
8249 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
8251 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
8252 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8253 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
8254 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
8255 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
8256 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
8258 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
8260 /* Composite intrinsics. */
8261 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
8262 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
8263 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
8264 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
8265 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
8266 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
8267 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
8270 /* Errors in the source file can cause expand_expr to return const0_rtx
8271 where we expect a vector. To avoid crashing, use one of the vector
8272 clear instructions. */
8274 safe_vector_operand (x, mode)
8276 enum machine_mode mode;
8278 if (x != const0_rtx)
8280 x = gen_reg_rtx (mode);
8282 if (VALID_MMX_REG_MODE (mode))
8283 emit_insn (gen_mmx_clrdi (mode == DImode ? x
8284 : gen_rtx_SUBREG (DImode, x, 0)));
8286 emit_insn (gen_sse_clrti (mode == TImode ? x
8287 : gen_rtx_SUBREG (TImode, x, 0)));
8291 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
8294 ix86_expand_binop_builtin (icode, arglist, target)
8295 enum insn_code icode;
8300 tree arg0 = TREE_VALUE (arglist);
8301 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8302 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8303 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8304 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8305 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8306 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
8308 if (VECTOR_MODE_P (mode0))
8309 op0 = safe_vector_operand (op0, mode0);
8310 if (VECTOR_MODE_P (mode1))
8311 op1 = safe_vector_operand (op1, mode1);
8314 || GET_MODE (target) != tmode
8315 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8316 target = gen_reg_rtx (tmode);
8318 /* In case the insn wants input operands in modes different from
8319 the result, abort. */
8320 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8323 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8324 op0 = copy_to_mode_reg (mode0, op0);
8325 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8326 op1 = copy_to_mode_reg (mode1, op1);
8328 pat = GEN_FCN (icode) (target, op0, op1);
8335 /* Subroutine of ix86_expand_builtin to take care of stores. */
8338 ix86_expand_store_builtin (icode, arglist, shuffle)
8339 enum insn_code icode;
8344 tree arg0 = TREE_VALUE (arglist);
8345 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8346 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8347 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8348 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8349 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8351 if (VECTOR_MODE_P (mode1))
8352 op1 = safe_vector_operand (op1, mode1);
8354 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8355 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8356 op1 = copy_to_mode_reg (mode1, op1);
8358 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8359 pat = GEN_FCN (icode) (op0, op1);
8365 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
8368 ix86_expand_unop_builtin (icode, arglist, target, do_load)
8369 enum insn_code icode;
8375 tree arg0 = TREE_VALUE (arglist);
8376 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8377 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8378 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8381 || GET_MODE (target) != tmode
8382 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8383 target = gen_reg_rtx (tmode);
8385 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8388 if (VECTOR_MODE_P (mode0))
8389 op0 = safe_vector_operand (op0, mode0);
8391 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8392 op0 = copy_to_mode_reg (mode0, op0);
8395 pat = GEN_FCN (icode) (target, op0);
8402 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8403 sqrtss, rsqrtss, rcpss. */
8406 ix86_expand_unop1_builtin (icode, arglist, target)
8407 enum insn_code icode;
8412 tree arg0 = TREE_VALUE (arglist);
8413 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8414 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8415 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8418 || GET_MODE (target) != tmode
8419 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8420 target = gen_reg_rtx (tmode);
8422 if (VECTOR_MODE_P (mode0))
8423 op0 = safe_vector_operand (op0, mode0);
8425 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8426 op0 = copy_to_mode_reg (mode0, op0);
8428 pat = GEN_FCN (icode) (target, op0, op0);
8435 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8438 ix86_expand_sse_compare (d, arglist, target)
8439 struct builtin_description *d;
8444 tree arg0 = TREE_VALUE (arglist);
8445 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8446 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8447 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8449 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8450 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8451 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8452 enum rtx_code comparison = d->comparison;
8454 if (VECTOR_MODE_P (mode0))
8455 op0 = safe_vector_operand (op0, mode0);
8456 if (VECTOR_MODE_P (mode1))
8457 op1 = safe_vector_operand (op1, mode1);
8459 /* Swap operands if we have a comparison that isn't available in
8463 target = gen_reg_rtx (tmode);
8464 emit_move_insn (target, op1);
8467 comparison = swap_condition (comparison);
8470 || GET_MODE (target) != tmode
8471 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8472 target = gen_reg_rtx (tmode);
8474 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8475 op0 = copy_to_mode_reg (mode0, op0);
8476 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8477 op1 = copy_to_mode_reg (mode1, op1);
8479 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8480 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8487 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
8490 ix86_expand_sse_comi (d, arglist, target)
8491 struct builtin_description *d;
8496 tree arg0 = TREE_VALUE (arglist);
8497 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8498 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8499 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8501 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8502 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8503 enum rtx_code comparison = d->comparison;
8505 if (VECTOR_MODE_P (mode0))
8506 op0 = safe_vector_operand (op0, mode0);
8507 if (VECTOR_MODE_P (mode1))
8508 op1 = safe_vector_operand (op1, mode1);
8510 /* Swap operands if we have a comparison that isn't available in
8517 comparison = swap_condition (comparison);
8520 target = gen_reg_rtx (SImode);
8521 emit_move_insn (target, const0_rtx);
8522 target = gen_rtx_SUBREG (QImode, target, 0);
8524 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8525 op0 = copy_to_mode_reg (mode0, op0);
8526 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8527 op1 = copy_to_mode_reg (mode1, op1);
8529 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8530 pat = GEN_FCN (d->icode) (op0, op1, op2);
8534 emit_insn (gen_setcc_2 (target, op2));
8539 /* Expand an expression EXP that calls a built-in function,
8540 with result going to TARGET if that's convenient
8541 (and in mode MODE if that's convenient).
8542 SUBTARGET may be used as the target for computing one of EXP's operands.
8543 IGNORE is nonzero if the value is to be ignored. */
8546 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8549 rtx subtarget ATTRIBUTE_UNUSED;
8550 enum machine_mode mode ATTRIBUTE_UNUSED;
8551 int ignore ATTRIBUTE_UNUSED;
8553 struct builtin_description *d;
8555 enum insn_code icode;
8556 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8557 tree arglist = TREE_OPERAND (exp, 1);
8558 tree arg0, arg1, arg2, arg3;
8559 rtx op0, op1, op2, pat;
8560 enum machine_mode tmode, mode0, mode1, mode2;
8561 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8565 case IX86_BUILTIN_EMMS:
8566 emit_insn (gen_emms ());
8569 case IX86_BUILTIN_SFENCE:
8570 emit_insn (gen_sfence ());
8573 case IX86_BUILTIN_M_FROM_INT:
8574 target = gen_reg_rtx (DImode);
8575 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8576 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8579 case IX86_BUILTIN_M_TO_INT:
8580 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8581 op0 = copy_to_mode_reg (DImode, op0);
8582 target = gen_reg_rtx (SImode);
8583 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8586 case IX86_BUILTIN_PEXTRW:
8587 icode = CODE_FOR_mmx_pextrw;
8588 arg0 = TREE_VALUE (arglist);
8589 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8590 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8591 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8592 tmode = insn_data[icode].operand[0].mode;
8593 mode0 = insn_data[icode].operand[1].mode;
8594 mode1 = insn_data[icode].operand[2].mode;
8596 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8597 op0 = copy_to_mode_reg (mode0, op0);
8598 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8600 /* @@@ better error message */
8601 error ("selector must be an immediate");
8605 || GET_MODE (target) != tmode
8606 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8607 target = gen_reg_rtx (tmode);
8608 pat = GEN_FCN (icode) (target, op0, op1);
8614 case IX86_BUILTIN_PINSRW:
8615 icode = CODE_FOR_mmx_pinsrw;
8616 arg0 = TREE_VALUE (arglist);
8617 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8618 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8619 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8620 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8621 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8622 tmode = insn_data[icode].operand[0].mode;
8623 mode0 = insn_data[icode].operand[1].mode;
8624 mode1 = insn_data[icode].operand[2].mode;
8625 mode2 = insn_data[icode].operand[3].mode;
8627 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8628 op0 = copy_to_mode_reg (mode0, op0);
8629 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8630 op1 = copy_to_mode_reg (mode1, op1);
8631 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8633 /* @@@ better error message */
8634 error ("selector must be an immediate");
8638 || GET_MODE (target) != tmode
8639 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8640 target = gen_reg_rtx (tmode);
8641 pat = GEN_FCN (icode) (target, op0, op1, op2);
8647 case IX86_BUILTIN_MASKMOVQ:
8648 icode = CODE_FOR_mmx_maskmovq;
8649 /* Note the arg order is different from the operand order. */
8650 arg1 = TREE_VALUE (arglist);
8651 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8652 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8653 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8654 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8655 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8656 mode0 = insn_data[icode].operand[0].mode;
8657 mode1 = insn_data[icode].operand[1].mode;
8658 mode2 = insn_data[icode].operand[2].mode;
8660 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8661 op0 = copy_to_mode_reg (mode0, op0);
8662 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8663 op1 = copy_to_mode_reg (mode1, op1);
8664 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8665 op2 = copy_to_mode_reg (mode2, op2);
8666 pat = GEN_FCN (icode) (op0, op1, op2);
8672 case IX86_BUILTIN_SQRTSS:
8673 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8674 case IX86_BUILTIN_RSQRTSS:
8675 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8676 case IX86_BUILTIN_RCPSS:
8677 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8679 case IX86_BUILTIN_LOADAPS:
8680 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8682 case IX86_BUILTIN_LOADUPS:
8683 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8685 case IX86_BUILTIN_STOREAPS:
8686 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8687 case IX86_BUILTIN_STOREUPS:
8688 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8690 case IX86_BUILTIN_LOADSS:
8691 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8693 case IX86_BUILTIN_STORESS:
8694 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8696 case IX86_BUILTIN_LOADHPS:
8697 case IX86_BUILTIN_LOADLPS:
8698 icode = (fcode == IX86_BUILTIN_LOADHPS
8699 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8700 arg0 = TREE_VALUE (arglist);
8701 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8702 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8703 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8704 tmode = insn_data[icode].operand[0].mode;
8705 mode0 = insn_data[icode].operand[1].mode;
8706 mode1 = insn_data[icode].operand[2].mode;
8708 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8709 op0 = copy_to_mode_reg (mode0, op0);
8710 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8712 || GET_MODE (target) != tmode
8713 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8714 target = gen_reg_rtx (tmode);
8715 pat = GEN_FCN (icode) (target, op0, op1);
8721 case IX86_BUILTIN_STOREHPS:
8722 case IX86_BUILTIN_STORELPS:
8723 icode = (fcode == IX86_BUILTIN_STOREHPS
8724 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8725 arg0 = TREE_VALUE (arglist);
8726 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8727 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8728 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8729 mode0 = insn_data[icode].operand[1].mode;
8730 mode1 = insn_data[icode].operand[2].mode;
8732 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8733 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8734 op1 = copy_to_mode_reg (mode1, op1);
8736 pat = GEN_FCN (icode) (op0, op0, op1);
8742 case IX86_BUILTIN_MOVNTPS:
8743 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8744 case IX86_BUILTIN_MOVNTQ:
8745 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8747 case IX86_BUILTIN_LDMXCSR:
8748 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8749 target = assign_386_stack_local (SImode, 0);
8750 emit_move_insn (target, op0);
8751 emit_insn (gen_ldmxcsr (target));
8754 case IX86_BUILTIN_STMXCSR:
8755 target = assign_386_stack_local (SImode, 0);
8756 emit_insn (gen_stmxcsr (target));
8757 return copy_to_mode_reg (SImode, target);
8759 case IX86_BUILTIN_PREFETCH:
8760 icode = CODE_FOR_prefetch;
8761 arg0 = TREE_VALUE (arglist);
8762 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8763 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8764 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8765 mode0 = insn_data[icode].operand[0].mode;
8766 mode1 = insn_data[icode].operand[1].mode;
8768 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8770 /* @@@ better error message */
8771 error ("selector must be an immediate");
8775 op0 = copy_to_mode_reg (Pmode, op0);
8776 pat = GEN_FCN (icode) (op0, op1);
8782 case IX86_BUILTIN_SHUFPS:
8783 icode = CODE_FOR_sse_shufps;
8784 arg0 = TREE_VALUE (arglist);
8785 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8786 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8787 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8788 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8789 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8790 tmode = insn_data[icode].operand[0].mode;
8791 mode0 = insn_data[icode].operand[1].mode;
8792 mode1 = insn_data[icode].operand[2].mode;
8793 mode2 = insn_data[icode].operand[3].mode;
8795 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8796 op0 = copy_to_mode_reg (mode0, op0);
8797 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8798 op1 = copy_to_mode_reg (mode1, op1);
8799 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8801 /* @@@ better error message */
8802 error ("mask must be an immediate");
8806 || GET_MODE (target) != tmode
8807 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8808 target = gen_reg_rtx (tmode);
8809 pat = GEN_FCN (icode) (target, op0, op1, op2);
8815 case IX86_BUILTIN_PSHUFW:
8816 icode = CODE_FOR_mmx_pshufw;
8817 arg0 = TREE_VALUE (arglist);
8818 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8819 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8820 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8821 tmode = insn_data[icode].operand[0].mode;
8822 mode0 = insn_data[icode].operand[2].mode;
8823 mode1 = insn_data[icode].operand[3].mode;
8825 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8826 op0 = copy_to_mode_reg (mode0, op0);
8827 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8829 /* @@@ better error message */
8830 error ("mask must be an immediate");
8834 || GET_MODE (target) != tmode
8835 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8836 target = gen_reg_rtx (tmode);
8837 pat = GEN_FCN (icode) (target, target, op0, op1);
8843 /* Composite intrinsics. */
8844 case IX86_BUILTIN_SETPS1:
8845 target = assign_386_stack_local (SFmode, 0);
8846 arg0 = TREE_VALUE (arglist);
8847 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8848 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8849 op0 = gen_reg_rtx (V4SFmode);
8850 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8851 XEXP (target, 0))));
8852 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8855 case IX86_BUILTIN_SETPS:
8856 target = assign_386_stack_local (V4SFmode, 0);
8857 op0 = change_address (target, SFmode, XEXP (target, 0));
8858 arg0 = TREE_VALUE (arglist);
8859 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8860 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8861 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8862 emit_move_insn (op0,
8863 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8864 emit_move_insn (adj_offsettable_operand (op0, 4),
8865 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8866 emit_move_insn (adj_offsettable_operand (op0, 8),
8867 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8868 emit_move_insn (adj_offsettable_operand (op0, 12),
8869 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8870 op0 = gen_reg_rtx (V4SFmode);
8871 emit_insn (gen_sse_movaps (op0, target));
8874 case IX86_BUILTIN_CLRPS:
8875 target = gen_reg_rtx (TImode);
8876 emit_insn (gen_sse_clrti (target));
8879 case IX86_BUILTIN_LOADRPS:
8880 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8881 gen_reg_rtx (V4SFmode), 1);
8882 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8885 case IX86_BUILTIN_LOADPS1:
8886 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8887 gen_reg_rtx (V4SFmode), 1);
8888 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8891 case IX86_BUILTIN_STOREPS1:
8892 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8893 case IX86_BUILTIN_STORERPS:
8894 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8896 case IX86_BUILTIN_MMX_ZERO:
8897 target = gen_reg_rtx (DImode);
8898 emit_insn (gen_mmx_clrdi (target));
8905 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8906 if (d->code == fcode)
8908 /* Compares are treated specially. */
8909 if (d->icode == CODE_FOR_maskcmpv4sf3
8910 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8911 || d->icode == CODE_FOR_maskncmpv4sf3
8912 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8913 return ix86_expand_sse_compare (d, arglist, target);
8915 return ix86_expand_binop_builtin (d->icode, arglist, target);
8918 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8919 if (d->code == fcode)
8920 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
8922 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8923 if (d->code == fcode)
8924 return ix86_expand_sse_comi (d, arglist, target);
8926 /* @@@ Should really do something sensible here. */
8930 /* Store OPERAND to the memory after reload is completed. This means
8931 that we can't easilly use assign_stack_local. */
8933 ix86_force_to_memory (mode, operand)
8934 enum machine_mode mode;
8937 if (!reload_completed)
8944 split_di (&operand, 1, operands, operands+1);
8946 gen_rtx_SET (VOIDmode,
8947 gen_rtx_MEM (SImode,
8948 gen_rtx_PRE_DEC (Pmode,
8949 stack_pointer_rtx)),
8952 gen_rtx_SET (VOIDmode,
8953 gen_rtx_MEM (SImode,
8954 gen_rtx_PRE_DEC (Pmode,
8955 stack_pointer_rtx)),
8960 /* It is better to store HImodes as SImodes. */
8961 if (!TARGET_PARTIAL_REG_STALL)
8962 operand = gen_lowpart (SImode, operand);
8966 gen_rtx_SET (VOIDmode,
8967 gen_rtx_MEM (GET_MODE (operand),
8968 gen_rtx_PRE_DEC (SImode,
8969 stack_pointer_rtx)),
8975 return gen_rtx_MEM (mode, stack_pointer_rtx);
8978 /* Free operand from the memory. */
8980 ix86_free_from_memory (mode)
8981 enum machine_mode mode;
8983 /* Use LEA to deallocate stack space. In peephole2 it will be converted
8984 to pop or add instruction if registers are available. */
8985 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8986 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8987 GEN_INT (mode == DImode
8989 : mode == HImode && TARGET_PARTIAL_REG_STALL
8994 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
8995 QImode must go into class Q_REGS.
8996 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
8997 movdf to do mem-to-mem moves through integer regs. */
8999 ix86_preferred_reload_class (x, class)
9001 enum reg_class class;
9003 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
9005 /* SSE can't load any constant directly yet. */
9006 if (SSE_CLASS_P (class))
9008 /* Floats can load 0 and 1. */
9009 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
9011 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
9012 if (MAYBE_SSE_CLASS_P (class))
9013 return (reg_class_subset_p (class, GENERAL_REGS)
9014 ? GENERAL_REGS : FLOAT_REGS);
9018 /* General regs can load everything. */
9019 if (reg_class_subset_p (class, GENERAL_REGS))
9020 return GENERAL_REGS;
9021 /* In case we haven't resolved FLOAT or SSE yet, give up. */
9022 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
9025 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
9027 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
9032 /* If we are copying between general and FP registers, we need a memory
9033 location. The same is true for SSE and MMX registers.
9035 The macro can't work reliably when one of the CLASSES is class containing
9036 registers from multiple units (SSE, MMX, integer). We avoid this by never
9037 combining those units in single alternative in the machine description.
9038 Ensure that this constraint holds to avoid unexpected surprises.
9040 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
9041 enforce these sanity checks. */
9043 ix86_secondary_memory_needed (class1, class2, mode, strict)
9044 enum reg_class class1, class2;
9045 enum machine_mode mode;
9048 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
9049 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
9050 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
9051 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
9052 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
9053 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
9060 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
9061 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
9062 && (mode) != SImode)
9063 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9064 && (mode) != SImode));
9066 /* Return the cost of moving data from a register in class CLASS1 to
9067 one in class CLASS2.
9069 It is not required that the cost always equal 2 when FROM is the same as TO;
9070 on some machines it is expensive to move between registers if they are not
9071 general registers. */
9073 ix86_register_move_cost (mode, class1, class2)
9074 enum machine_mode mode;
9075 enum reg_class class1, class2;
9077 /* In case we require secondary memory, compute cost of the store followed
9078 by load. In case of copying from general_purpose_register we may emit
9079 multiple stores followed by single load causing memory size mismatch
9080 stall. Count this as arbitarily high cost of 20. */
9081 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
9083 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
9085 return (MEMORY_MOVE_COST (mode, class1, 0)
9086 + MEMORY_MOVE_COST (mode, class2, 1));
9088 /* Moves between SSE/MMX and integer unit are expensive.
9089 ??? We should make this cost CPU specific. */
9090 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9091 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
9092 return ix86_cost->mmxsse_to_integer;
9093 if (MAYBE_FLOAT_CLASS_P (class1))
9094 return ix86_cost->fp_move;
9095 if (MAYBE_SSE_CLASS_P (class1))
9096 return ix86_cost->sse_move;
9097 if (MAYBE_MMX_CLASS_P (class1))
9098 return ix86_cost->mmx_move;
9102 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
9104 ix86_hard_regno_mode_ok (regno, mode)
9106 enum machine_mode mode;
9108 /* Flags and only flags can only hold CCmode values. */
9109 if (CC_REGNO_P (regno))
9110 return GET_MODE_CLASS (mode) == MODE_CC;
9111 if (GET_MODE_CLASS (mode) == MODE_CC
9112 || GET_MODE_CLASS (mode) == MODE_RANDOM
9113 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
9115 if (FP_REGNO_P (regno))
9116 return VALID_FP_MODE_P (mode);
9117 if (SSE_REGNO_P (regno))
9118 return VALID_SSE_REG_MODE (mode);
9119 if (MMX_REGNO_P (regno))
9120 return VALID_MMX_REG_MODE (mode);
9121 /* We handle both integer and floats in the general purpose registers.
9122 In future we should be able to handle vector modes as well. */
9123 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
9125 /* Take care for QImode values - they can be in non-QI regs, but then
9126 they do cause partial register stalls. */
9127 if (regno < 4 || mode != QImode)
9129 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
9132 /* Return the cost of moving data of mode M between a
9133 register and memory. A value of 2 is the default; this cost is
9134 relative to those in `REGISTER_MOVE_COST'.
9136 If moving between registers and memory is more expensive than
9137 between two registers, you should define this macro to express the
9140 Model also increased moving costs of QImode registers in non
9144 ix86_memory_move_cost (mode, class, in)
9145 enum machine_mode mode;
9146 enum reg_class class;
9149 if (FLOAT_CLASS_P (class))
9167 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
9169 if (SSE_CLASS_P (class))
9172 switch (GET_MODE_SIZE (mode))
9186 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
9188 if (MMX_CLASS_P (class))
9191 switch (GET_MODE_SIZE (mode))
9202 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
9204 switch (GET_MODE_SIZE (mode))
9208 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
9209 : ix86_cost->movzbl_load);
9211 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
9212 : ix86_cost->int_store[0] + 4);
9215 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
9217 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
9220 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
9221 * (int) GET_MODE_SIZE (mode) / 4);