1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8}, /* cost of loading integer registers */
69 2, /* cost of moving MMX register */
70 {4, 8}, /* cost of loading MMX registers
71 in SImode and DImode */
72 {4, 8}, /* cost of storing MMX registers
73 in SImode and DImode */
74 2, /* cost of moving SSE register */
75 {4, 8, 16}, /* cost of loading SSE registers
76 in SImode, DImode and TImode */
77 {4, 8, 16}, /* cost of storing SSE registers
78 in SImode, DImode and TImode */
79 3, /* MMX or SSE register to integer */
82 struct processor_costs i486_cost = { /* 486 specific costs */
83 1, /* cost of an add instruction */
84 1, /* cost of a lea instruction */
85 3, /* variable shift costs */
86 2, /* constant shift costs */
87 12, /* cost of starting a multiply */
88 1, /* cost of multiply per each bit set */
89 40, /* cost of a divide/mod */
90 15, /* "large" insn */
92 4, /* cost for loading QImode using movzbl */
93 {2, 4, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 4, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {8, 8, 8}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {8, 8, 8}, /* cost of loading integer registers */
101 2, /* cost of moving MMX register */
102 {4, 8}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {4, 8}, /* cost of storing MMX registers
105 in SImode and DImode */
106 2, /* cost of moving SSE register */
107 {4, 8, 16}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {4, 8, 16}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3 /* MMX or SSE register to integer */
114 struct processor_costs pentium_cost = {
115 1, /* cost of an add instruction */
116 1, /* cost of a lea instruction */
117 4, /* variable shift costs */
118 1, /* constant shift costs */
119 11, /* cost of starting a multiply */
120 0, /* cost of multiply per each bit set */
121 25, /* cost of a divide/mod */
122 8, /* "large" insn */
124 6, /* cost for loading QImode using movzbl */
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 6}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {4, 4, 6}, /* cost of loading integer registers */
133 8, /* cost of moving MMX register */
134 {8, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {8, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3 /* MMX or SSE register to integer */
146 struct processor_costs pentiumpro_cost = {
147 1, /* cost of an add instruction */
148 1, /* cost of a lea instruction */
149 1, /* variable shift costs */
150 1, /* constant shift costs */
151 4, /* cost of starting a multiply */
152 0, /* cost of multiply per each bit set */
153 17, /* cost of a divide/mod */
154 8, /* "large" insn */
156 2, /* cost for loading QImode using movzbl */
157 {4, 4, 4}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 6}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {4, 4, 6}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {2, 2}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {2, 2}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {2, 2, 8}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {2, 2, 8}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3 /* MMX or SSE register to integer */
178 struct processor_costs k6_cost = {
179 1, /* cost of an add instruction */
180 2, /* cost of a lea instruction */
181 1, /* variable shift costs */
182 1, /* constant shift costs */
183 3, /* cost of starting a multiply */
184 0, /* cost of multiply per each bit set */
185 18, /* cost of a divide/mod */
186 8, /* "large" insn */
188 3, /* cost for loading QImode using movzbl */
189 {4, 5, 4}, /* cost of loading integer registers
190 in QImode, HImode and SImode.
191 Relative to reg-reg move (2). */
192 {2, 3, 2}, /* cost of storing integer registers */
193 4, /* cost of reg,reg fld/fst */
194 {6, 6, 6}, /* cost of loading fp registers
195 in SFmode, DFmode and XFmode */
196 {4, 4, 4}, /* cost of loading integer registers */
197 2, /* cost of moving MMX register */
198 {2, 2}, /* cost of loading MMX registers
199 in SImode and DImode */
200 {2, 2}, /* cost of storing MMX registers
201 in SImode and DImode */
202 2, /* cost of moving SSE register */
203 {2, 2, 8}, /* cost of loading SSE registers
204 in SImode, DImode and TImode */
205 {2, 2, 8}, /* cost of storing SSE registers
206 in SImode, DImode and TImode */
207 6 /* MMX or SSE register to integer */
210 struct processor_costs athlon_cost = {
211 1, /* cost of an add instruction */
212 2, /* cost of a lea instruction */
213 1, /* variable shift costs */
214 1, /* constant shift costs */
215 5, /* cost of starting a multiply */
216 0, /* cost of multiply per each bit set */
217 42, /* cost of a divide/mod */
218 8, /* "large" insn */
220 4, /* cost for loading QImode using movzbl */
221 {4, 5, 4}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
223 Relative to reg-reg move (2). */
224 {2, 3, 2}, /* cost of storing integer registers */
225 4, /* cost of reg,reg fld/fst */
226 {6, 6, 20}, /* cost of loading fp registers
227 in SFmode, DFmode and XFmode */
228 {4, 4, 16}, /* cost of loading integer registers */
229 2, /* cost of moving MMX register */
230 {2, 2}, /* cost of loading MMX registers
231 in SImode and DImode */
232 {2, 2}, /* cost of storing MMX registers
233 in SImode and DImode */
234 2, /* cost of moving SSE register */
235 {2, 2, 8}, /* cost of loading SSE registers
236 in SImode, DImode and TImode */
237 {2, 2, 8}, /* cost of storing SSE registers
238 in SImode, DImode and TImode */
239 6 /* MMX or SSE register to integer */
242 struct processor_costs pentium4_cost = {
243 1, /* cost of an add instruction */
244 1, /* cost of a lea instruction */
245 8, /* variable shift costs */
246 8, /* constant shift costs */
247 30, /* cost of starting a multiply */
248 0, /* cost of multiply per each bit set */
249 112, /* cost of a divide/mod */
250 16, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 5, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 3, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 12, /* cost of moving SSE register */
267 {12, 12, 12}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 10, /* MMX or SSE register to integer */
274 struct processor_costs *ix86_cost = &pentium_cost;
276 /* Processor feature/optimization bitmasks. */
277 #define m_386 (1<<PROCESSOR_I386)
278 #define m_486 (1<<PROCESSOR_I486)
279 #define m_PENT (1<<PROCESSOR_PENTIUM)
280 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
281 #define m_K6 (1<<PROCESSOR_K6)
282 #define m_ATHLON (1<<PROCESSOR_ATHLON)
283 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
285 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
286 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
287 const int x86_zero_extend_with_and = m_486 | m_PENT;
288 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
289 const int x86_double_with_add = ~m_386;
290 const int x86_use_bit_test = m_386;
291 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
292 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
293 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
294 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
295 const int x86_partial_reg_stall = m_PPRO;
296 const int x86_use_loop = m_K6;
297 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
298 const int x86_use_mov0 = m_K6;
299 const int x86_use_cltd = ~(m_PENT | m_K6);
300 const int x86_read_modify_write = ~m_PENT;
301 const int x86_read_modify = ~(m_PENT | m_PPRO);
302 const int x86_split_long_moves = m_PPRO;
303 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
304 const int x86_single_stringop = m_386 | m_PENT4;
305 const int x86_qimode_math = ~(0);
306 const int x86_promote_qi_regs = 0;
307 const int x86_himode_math = ~(m_PPRO);
308 const int x86_promote_hi_regs = m_PPRO;
309 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
310 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
311 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
312 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
313 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
314 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
315 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
317 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
319 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
320 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
321 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
323 /* Array of the smallest class containing reg number REGNO, indexed by
324 REGNO. Used by REGNO_REG_CLASS in i386.h. */
326 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
329 AREG, DREG, CREG, BREG,
331 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
333 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
334 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
337 /* flags, fpsr, dirflag, frame */
338 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
339 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
341 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
343 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
344 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
345 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
349 /* The "default" register map used in 32bit mode. */
351 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
353 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
354 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
355 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
362 /* The "default" register map used in 64bit mode. */
363 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
365 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
366 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
367 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
368 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
369 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
370 8,9,10,11,12,13,14,15, /* extended integer registers */
371 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
374 /* Define the register numbers to be used in Dwarf debugging information.
375 The SVR4 reference port C compiler uses the following register numbers
376 in its Dwarf output code:
377 0 for %eax (gcc regno = 0)
378 1 for %ecx (gcc regno = 2)
379 2 for %edx (gcc regno = 1)
380 3 for %ebx (gcc regno = 3)
381 4 for %esp (gcc regno = 7)
382 5 for %ebp (gcc regno = 6)
383 6 for %esi (gcc regno = 4)
384 7 for %edi (gcc regno = 5)
385 The following three DWARF register numbers are never generated by
386 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
387 believes these numbers have these meanings.
388 8 for %eip (no gcc equivalent)
389 9 for %eflags (gcc regno = 17)
390 10 for %trapno (no gcc equivalent)
391 It is not at all clear how we should number the FP stack registers
392 for the x86 architecture. If the version of SDB on x86/svr4 were
393 a bit less brain dead with respect to floating-point then we would
394 have a precedent to follow with respect to DWARF register numbers
395 for x86 FP registers, but the SDB on x86/svr4 is so completely
396 broken with respect to FP registers that it is hardly worth thinking
397 of it as something to strive for compatibility with.
398 The version of x86/svr4 SDB I have at the moment does (partially)
399 seem to believe that DWARF register number 11 is associated with
400 the x86 register %st(0), but that's about all. Higher DWARF
401 register numbers don't seem to be associated with anything in
402 particular, and even for DWARF regno 11, SDB only seems to under-
403 stand that it should say that a variable lives in %st(0) (when
404 asked via an `=' command) if we said it was in DWARF regno 11,
405 but SDB still prints garbage when asked for the value of the
406 variable in question (via a `/' command).
407 (Also note that the labels SDB prints for various FP stack regs
408 when doing an `x' command are all wrong.)
409 Note that these problems generally don't affect the native SVR4
410 C compiler because it doesn't allow the use of -O with -g and
411 because when it is *not* optimizing, it allocates a memory
412 location for each floating-point variable, and the memory
413 location is what gets described in the DWARF AT_location
414 attribute for the variable in question.
415 Regardless of the severe mental illness of the x86/svr4 SDB, we
416 do something sensible here and we use the following DWARF
417 register numbers. Note that these are all stack-top-relative
419 11 for %st(0) (gcc regno = 8)
420 12 for %st(1) (gcc regno = 9)
421 13 for %st(2) (gcc regno = 10)
422 14 for %st(3) (gcc regno = 11)
423 15 for %st(4) (gcc regno = 12)
424 16 for %st(5) (gcc regno = 13)
425 17 for %st(6) (gcc regno = 14)
426 18 for %st(7) (gcc regno = 15)
428 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
430 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
431 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
432 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
433 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
434 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
435 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
436 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
439 /* Test and compare insns in i386.md store the information needed to
440 generate branch and scc insns here. */
442 struct rtx_def *ix86_compare_op0 = NULL_RTX;
443 struct rtx_def *ix86_compare_op1 = NULL_RTX;
445 #define MAX_386_STACK_LOCALS 2
447 /* Define the structure for the machine field in struct function. */
448 struct machine_function
450 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
451 int accesses_prev_frame;
454 #define ix86_stack_locals (cfun->machine->stack_locals)
456 /* Structure describing stack frame layout.
457 Stack grows downward:
463 saved frame pointer if frame_pointer_needed
464 <- HARD_FRAME_POINTER
470 > to_allocate <- FRAME_POINTER
481 int outgoing_arguments_size;
483 HOST_WIDE_INT to_allocate;
484 /* The offsets relative to ARG_POINTER. */
485 HOST_WIDE_INT frame_pointer_offset;
486 HOST_WIDE_INT hard_frame_pointer_offset;
487 HOST_WIDE_INT stack_pointer_offset;
490 /* Code model option as passed by user. */
491 const char *ix86_cmodel_string;
493 enum cmodel ix86_cmodel;
495 /* which cpu are we scheduling for */
496 enum processor_type ix86_cpu;
498 /* which instruction set architecture to use. */
501 /* Strings to hold which cpu and instruction set architecture to use. */
502 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
503 const char *ix86_arch_string; /* for -march=<xxx> */
505 /* # of registers to use to pass arguments. */
506 const char *ix86_regparm_string;
508 /* ix86_regparm_string as a number */
511 /* Alignment to use for loops and jumps: */
513 /* Power of two alignment for loops. */
514 const char *ix86_align_loops_string;
516 /* Power of two alignment for non-loop jumps. */
517 const char *ix86_align_jumps_string;
519 /* Power of two alignment for stack boundary in bytes. */
520 const char *ix86_preferred_stack_boundary_string;
522 /* Preferred alignment for stack boundary in bits. */
523 int ix86_preferred_stack_boundary;
525 /* Values 1-5: see jump.c */
526 int ix86_branch_cost;
527 const char *ix86_branch_cost_string;
529 /* Power of two alignment for functions. */
530 int ix86_align_funcs;
531 const char *ix86_align_funcs_string;
533 /* Power of two alignment for loops. */
534 int ix86_align_loops;
536 /* Power of two alignment for non-loop jumps. */
537 int ix86_align_jumps;
539 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
540 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
542 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
543 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
545 static rtx gen_push PARAMS ((rtx));
546 static int memory_address_length PARAMS ((rtx addr));
547 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
548 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
549 static int ix86_safe_length PARAMS ((rtx));
550 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
551 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
552 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
553 static void ix86_dump_ppro_packet PARAMS ((FILE *));
554 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
555 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
557 static void ix86_init_machine_status PARAMS ((struct function *));
558 static void ix86_mark_machine_status PARAMS ((struct function *));
559 static void ix86_free_machine_status PARAMS ((struct function *));
560 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
561 static int ix86_safe_length_prefix PARAMS ((rtx));
562 static int ix86_nsaved_regs PARAMS((void));
563 static void ix86_emit_save_regs PARAMS((void));
564 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
565 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
566 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
567 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
568 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
569 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
573 rtx base, index, disp;
577 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
579 struct builtin_description;
580 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
582 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
584 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
585 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
586 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
587 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
588 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
589 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
590 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
594 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
596 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
597 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
598 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
599 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
600 static int ix86_save_reg PARAMS ((int));
601 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
603 /* Sometimes certain combinations of command options do not make
604 sense on a particular target machine. You can define a macro
605 `OVERRIDE_OPTIONS' to take account of this. This macro, if
606 defined, is executed once just after all the command options have
609 Don't use this macro to turn on various extra optimizations for
610 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
616 /* Comes from final.c -- no real reason to change it. */
617 #define MAX_CODE_ALIGN 16
621 struct processor_costs *cost; /* Processor costs */
622 int target_enable; /* Target flags to enable. */
623 int target_disable; /* Target flags to disable. */
624 int align_loop; /* Default alignments. */
629 const processor_target_table[PROCESSOR_max] =
631 {&i386_cost, 0, 0, 2, 2, 2, 1},
632 {&i486_cost, 0, 0, 4, 4, 4, 1},
633 {&pentium_cost, 0, 0, -4, -4, -4, 1},
634 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
635 {&k6_cost, 0, 0, -5, -5, 4, 1},
636 {&athlon_cost, 0, 0, 4, -4, 4, 1},
637 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
642 const char *name; /* processor name or nickname. */
643 enum processor_type processor;
645 const processor_alias_table[] =
647 {"i386", PROCESSOR_I386},
648 {"i486", PROCESSOR_I486},
649 {"i586", PROCESSOR_PENTIUM},
650 {"pentium", PROCESSOR_PENTIUM},
651 {"i686", PROCESSOR_PENTIUMPRO},
652 {"pentiumpro", PROCESSOR_PENTIUMPRO},
653 {"k6", PROCESSOR_K6},
654 {"athlon", PROCESSOR_ATHLON},
655 {"pentium4", PROCESSOR_PENTIUM4},
658 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
660 #ifdef SUBTARGET_OVERRIDE_OPTIONS
661 SUBTARGET_OVERRIDE_OPTIONS;
664 ix86_arch = PROCESSOR_I386;
665 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
667 if (ix86_cmodel_string != 0)
669 if (!strcmp (ix86_cmodel_string, "small"))
670 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
672 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
673 else if (!strcmp (ix86_cmodel_string, "32"))
675 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
676 ix86_cmodel = CM_KERNEL;
677 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
678 ix86_cmodel = CM_MEDIUM;
679 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
680 ix86_cmodel = CM_LARGE;
682 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
688 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
690 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
691 error ("Code model `%s' not supported in the %s bit mode.",
692 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
693 if (ix86_cmodel == CM_LARGE)
694 sorry ("Code model `large' not supported yet.");
696 if (ix86_arch_string != 0)
698 for (i = 0; i < pta_size; i++)
699 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
701 ix86_arch = processor_alias_table[i].processor;
702 /* Default cpu tuning to the architecture. */
703 ix86_cpu = ix86_arch;
708 error ("bad value (%s) for -march= switch", ix86_arch_string);
711 if (ix86_cpu_string != 0)
713 for (i = 0; i < pta_size; i++)
714 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
716 ix86_cpu = processor_alias_table[i].processor;
720 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
723 ix86_cost = processor_target_table[ix86_cpu].cost;
724 target_flags |= processor_target_table[ix86_cpu].target_enable;
725 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
727 /* Arrange to set up i386_stack_locals for all functions. */
728 init_machine_status = ix86_init_machine_status;
729 mark_machine_status = ix86_mark_machine_status;
730 free_machine_status = ix86_free_machine_status;
732 /* Validate -mregparm= value. */
733 if (ix86_regparm_string)
735 i = atoi (ix86_regparm_string);
736 if (i < 0 || i > REGPARM_MAX)
737 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
742 /* Validate -malign-loops= value, or provide default. */
743 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
744 if (ix86_align_loops_string)
746 i = atoi (ix86_align_loops_string);
747 if (i < 0 || i > MAX_CODE_ALIGN)
748 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
750 ix86_align_loops = i;
753 /* Validate -malign-jumps= value, or provide default. */
754 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
755 if (ix86_align_jumps_string)
757 i = atoi (ix86_align_jumps_string);
758 if (i < 0 || i > MAX_CODE_ALIGN)
759 error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
761 ix86_align_jumps = i;
764 /* Validate -malign-functions= value, or provide default. */
765 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
766 if (ix86_align_funcs_string)
768 i = atoi (ix86_align_funcs_string);
769 if (i < 0 || i > MAX_CODE_ALIGN)
770 error ("-malign-functions=%d is not between 0 and %d",
773 ix86_align_funcs = i;
776 /* Validate -mpreferred-stack-boundary= value, or provide default.
777 The default of 128 bits is for Pentium III's SSE __m128. */
778 ix86_preferred_stack_boundary = 128;
779 if (ix86_preferred_stack_boundary_string)
781 i = atoi (ix86_preferred_stack_boundary_string);
783 error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
785 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
788 /* Validate -mbranch-cost= value, or provide default. */
789 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
790 if (ix86_branch_cost_string)
792 i = atoi (ix86_branch_cost_string);
794 error ("-mbranch-cost=%d is not between 0 and 5", i);
796 ix86_branch_cost = i;
799 /* Keep nonleaf frame pointers. */
800 if (TARGET_OMIT_LEAF_FRAME_POINTER)
801 flag_omit_frame_pointer = 1;
803 /* If we're doing fast math, we don't care about comparison order
804 wrt NaNs. This lets us use a shorter comparison sequence. */
805 if (flag_unsafe_math_optimizations)
806 target_flags &= ~MASK_IEEE_FP;
808 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
811 target_flags |= MASK_MMX;
815 optimization_options (level, size)
817 int size ATTRIBUTE_UNUSED;
819 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
820 make the problem with not enough registers even worse. */
821 #ifdef INSN_SCHEDULING
823 flag_schedule_insns = 0;
827 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
828 attribute for DECL. The attributes in ATTRIBUTES have previously been
832 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
833 tree decl ATTRIBUTE_UNUSED;
834 tree attributes ATTRIBUTE_UNUSED;
835 tree identifier ATTRIBUTE_UNUSED;
836 tree args ATTRIBUTE_UNUSED;
841 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
842 attribute for TYPE. The attributes in ATTRIBUTES have previously been
846 ix86_valid_type_attribute_p (type, attributes, identifier, args)
848 tree attributes ATTRIBUTE_UNUSED;
852 if (TREE_CODE (type) != FUNCTION_TYPE
853 && TREE_CODE (type) != METHOD_TYPE
854 && TREE_CODE (type) != FIELD_DECL
855 && TREE_CODE (type) != TYPE_DECL)
858 /* Stdcall attribute says callee is responsible for popping arguments
859 if they are not variable. */
860 if (is_attribute_p ("stdcall", identifier))
861 return (args == NULL_TREE);
863 /* Cdecl attribute says the callee is a normal C declaration. */
864 if (is_attribute_p ("cdecl", identifier))
865 return (args == NULL_TREE);
867 /* Regparm attribute specifies how many integer arguments are to be
868 passed in registers. */
869 if (is_attribute_p ("regparm", identifier))
873 if (! args || TREE_CODE (args) != TREE_LIST
874 || TREE_CHAIN (args) != NULL_TREE
875 || TREE_VALUE (args) == NULL_TREE)
878 cst = TREE_VALUE (args);
879 if (TREE_CODE (cst) != INTEGER_CST)
882 if (compare_tree_int (cst, REGPARM_MAX) > 0)
891 /* Return 0 if the attributes for two types are incompatible, 1 if they
892 are compatible, and 2 if they are nearly compatible (which causes a
893 warning to be generated). */
896 ix86_comp_type_attributes (type1, type2)
900 /* Check for mismatch of non-default calling convention. */
901 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
903 if (TREE_CODE (type1) != FUNCTION_TYPE)
906 /* Check for mismatched return types (cdecl vs stdcall). */
907 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
908 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
913 /* Value is the number of bytes of arguments automatically
914 popped when returning from a subroutine call.
915 FUNDECL is the declaration node of the function (as a tree),
916 FUNTYPE is the data type of the function (as a tree),
917 or for a library call it is an identifier node for the subroutine name.
918 SIZE is the number of bytes of arguments passed on the stack.
920 On the 80386, the RTD insn may be used to pop them if the number
921 of args is fixed, but if the number is variable then the caller
922 must pop them all. RTD can't be used for library calls now
923 because the library is compiled with the Unix compiler.
924 Use of RTD is a selectable option, since it is incompatible with
925 standard Unix calling sequences. If the option is not selected,
926 the caller must always pop the args.
928 The attribute stdcall is equivalent to RTD on a per module basis. */
931 ix86_return_pops_args (fundecl, funtype, size)
936 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
938 /* Cdecl functions override -mrtd, and never pop the stack. */
939 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
941 /* Stdcall functions will pop the stack if not variable args. */
942 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
946 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
947 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
952 /* Lose any fake structure return argument. */
953 if (aggregate_value_p (TREE_TYPE (funtype)))
954 return GET_MODE_SIZE (Pmode);
959 /* Argument support functions. */
961 /* Initialize a variable CUM of type CUMULATIVE_ARGS
962 for a call to a function whose data type is FNTYPE.
963 For a library call, FNTYPE is 0. */
966 init_cumulative_args (cum, fntype, libname)
967 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
968 tree fntype; /* tree ptr for function decl */
969 rtx libname; /* SYMBOL_REF of library name or 0 */
971 static CUMULATIVE_ARGS zero_cum;
972 tree param, next_param;
974 if (TARGET_DEBUG_ARG)
976 fprintf (stderr, "\ninit_cumulative_args (");
978 fprintf (stderr, "fntype code = %s, ret code = %s",
979 tree_code_name[(int) TREE_CODE (fntype)],
980 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
982 fprintf (stderr, "no fntype");
985 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
990 /* Set up the number of registers to use for passing arguments. */
991 cum->nregs = ix86_regparm;
994 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
997 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1000 /* Determine if this function has variable arguments. This is
1001 indicated by the last argument being 'void_type_mode' if there
1002 are no variable arguments. If there are variable arguments, then
1003 we won't pass anything in registers */
1007 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1008 param != 0; param = next_param)
1010 next_param = TREE_CHAIN (param);
1011 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1016 if (TARGET_DEBUG_ARG)
1017 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1022 /* Update the data in CUM to advance over an argument
1023 of mode MODE and data type TYPE.
1024 (TYPE is null for libcalls where that information may not be available.) */
1027 function_arg_advance (cum, mode, type, named)
1028 CUMULATIVE_ARGS *cum; /* current arg information */
1029 enum machine_mode mode; /* current arg mode */
1030 tree type; /* type of the argument or 0 if lib support */
1031 int named; /* whether or not the argument was named */
1034 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1035 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1037 if (TARGET_DEBUG_ARG)
1039 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1040 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1041 if (TARGET_SSE && mode == TImode)
1043 cum->sse_words += words;
1044 cum->sse_nregs -= 1;
1045 cum->sse_regno += 1;
1046 if (cum->sse_nregs <= 0)
1054 cum->words += words;
1055 cum->nregs -= words;
1056 cum->regno += words;
1058 if (cum->nregs <= 0)
1067 /* Define where to put the arguments to a function.
1068 Value is zero to push the argument on the stack,
1069 or a hard register in which to store the argument.
1071 MODE is the argument's machine mode.
1072 TYPE is the data type of the argument (as a tree).
1073 This is null for libcalls where that information may
1075 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1076 the preceding args and about the function being called.
1077 NAMED is nonzero if this argument is a named parameter
1078 (otherwise it is an extra parameter matching an ellipsis). */
1081 function_arg (cum, mode, type, named)
1082 CUMULATIVE_ARGS *cum; /* current arg information */
1083 enum machine_mode mode; /* current arg mode */
1084 tree type; /* type of the argument or 0 if lib support */
1085 int named; /* != 0 for normal args, == 0 for ... args */
1089 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1090 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1094 /* For now, pass fp/complex values on the stack. */
1103 if (words <= cum->nregs)
1104 ret = gen_rtx_REG (mode, cum->regno);
1108 ret = gen_rtx_REG (mode, cum->sse_regno);
1112 if (TARGET_DEBUG_ARG)
1115 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1116 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1119 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1121 fprintf (stderr, ", stack");
1123 fprintf (stderr, " )\n");
1130 /* Return nonzero if OP is (const_int 1), else return zero. */
1133 const_int_1_operand (op, mode)
1135 enum machine_mode mode ATTRIBUTE_UNUSED;
1137 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1140 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1141 reference and a constant. */
1144 symbolic_operand (op, mode)
1146 enum machine_mode mode ATTRIBUTE_UNUSED;
1148 switch (GET_CODE (op))
1156 if (GET_CODE (op) == SYMBOL_REF
1157 || GET_CODE (op) == LABEL_REF
1158 || (GET_CODE (op) == UNSPEC
1159 && XINT (op, 1) >= 6
1160 && XINT (op, 1) <= 7))
1162 if (GET_CODE (op) != PLUS
1163 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1167 if (GET_CODE (op) == SYMBOL_REF
1168 || GET_CODE (op) == LABEL_REF)
1170 /* Only @GOTOFF gets offsets. */
1171 if (GET_CODE (op) != UNSPEC
1172 || XINT (op, 1) != 7)
1175 op = XVECEXP (op, 0, 0);
1176 if (GET_CODE (op) == SYMBOL_REF
1177 || GET_CODE (op) == LABEL_REF)
1186 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1189 pic_symbolic_operand (op, mode)
1191 enum machine_mode mode ATTRIBUTE_UNUSED;
1193 if (GET_CODE (op) == CONST)
1196 if (GET_CODE (op) == UNSPEC)
1198 if (GET_CODE (op) != PLUS
1199 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1202 if (GET_CODE (op) == UNSPEC)
1208 /* Test for a valid operand for a call instruction. Don't allow the
1209 arg pointer register or virtual regs since they may decay into
1210 reg + const, which the patterns can't handle. */
1213 call_insn_operand (op, mode)
1215 enum machine_mode mode ATTRIBUTE_UNUSED;
1217 /* Disallow indirect through a virtual register. This leads to
1218 compiler aborts when trying to eliminate them. */
1219 if (GET_CODE (op) == REG
1220 && (op == arg_pointer_rtx
1221 || op == frame_pointer_rtx
1222 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1223 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1226 /* Disallow `call 1234'. Due to varying assembler lameness this
1227 gets either rejected or translated to `call .+1234'. */
1228 if (GET_CODE (op) == CONST_INT)
1231 /* Explicitly allow SYMBOL_REF even if pic. */
1232 if (GET_CODE (op) == SYMBOL_REF)
1235 /* Half-pic doesn't allow anything but registers and constants.
1236 We've just taken care of the later. */
1238 return register_operand (op, Pmode);
1240 /* Otherwise we can allow any general_operand in the address. */
1241 return general_operand (op, Pmode);
1245 constant_call_address_operand (op, mode)
1247 enum machine_mode mode ATTRIBUTE_UNUSED;
1249 if (GET_CODE (op) == CONST
1250 && GET_CODE (XEXP (op, 0)) == PLUS
1251 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1252 op = XEXP (XEXP (op, 0), 0);
1253 return GET_CODE (op) == SYMBOL_REF;
1256 /* Match exactly zero and one. */
1259 const0_operand (op, mode)
1261 enum machine_mode mode;
1263 return op == CONST0_RTX (mode);
1267 const1_operand (op, mode)
1269 enum machine_mode mode ATTRIBUTE_UNUSED;
1271 return op == const1_rtx;
1274 /* Match 2, 4, or 8. Used for leal multiplicands. */
1277 const248_operand (op, mode)
1279 enum machine_mode mode ATTRIBUTE_UNUSED;
1281 return (GET_CODE (op) == CONST_INT
1282 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1285 /* True if this is a constant appropriate for an increment or decremenmt. */
1288 incdec_operand (op, mode)
1290 enum machine_mode mode;
1292 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1293 registers, since carry flag is not set. */
1294 if (TARGET_PENTIUM4 && !optimize_size)
1296 if (op == const1_rtx || op == constm1_rtx)
1298 if (GET_CODE (op) != CONST_INT)
1300 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1302 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1304 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1309 /* Return false if this is the stack pointer, or any other fake
1310 register eliminable to the stack pointer. Otherwise, this is
1313 This is used to prevent esp from being used as an index reg.
1314 Which would only happen in pathological cases. */
1317 reg_no_sp_operand (op, mode)
1319 enum machine_mode mode;
1322 if (GET_CODE (t) == SUBREG)
1324 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1327 return register_operand (op, mode);
1331 mmx_reg_operand (op, mode)
1333 enum machine_mode mode ATTRIBUTE_UNUSED;
1335 return MMX_REG_P (op);
1338 /* Return false if this is any eliminable register. Otherwise
1342 general_no_elim_operand (op, mode)
1344 enum machine_mode mode;
1347 if (GET_CODE (t) == SUBREG)
1349 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1350 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1351 || t == virtual_stack_dynamic_rtx)
1354 return general_operand (op, mode);
1357 /* Return false if this is any eliminable register. Otherwise
1358 register_operand or const_int. */
1361 nonmemory_no_elim_operand (op, mode)
1363 enum machine_mode mode;
1366 if (GET_CODE (t) == SUBREG)
1368 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1369 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1370 || t == virtual_stack_dynamic_rtx)
1373 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1376 /* Return true if op is a Q_REGS class register. */
1379 q_regs_operand (op, mode)
1381 enum machine_mode mode;
1383 if (mode != VOIDmode && GET_MODE (op) != mode)
1385 if (GET_CODE (op) == SUBREG)
1386 op = SUBREG_REG (op);
1387 return QI_REG_P (op);
1390 /* Return true if op is a NON_Q_REGS class register. */
1393 non_q_regs_operand (op, mode)
1395 enum machine_mode mode;
1397 if (mode != VOIDmode && GET_MODE (op) != mode)
1399 if (GET_CODE (op) == SUBREG)
1400 op = SUBREG_REG (op);
1401 return NON_QI_REG_P (op);
1404 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1407 sse_comparison_operator (op, mode)
1409 enum machine_mode mode ATTRIBUTE_UNUSED;
1411 enum rtx_code code = GET_CODE (op);
1414 /* Operations supported directly. */
1424 /* These are equivalent to ones above in non-IEEE comparisons. */
1431 return !TARGET_IEEE_FP;
1436 /* Return 1 if OP is a valid comparison operator in valid mode. */
1438 ix86_comparison_operator (op, mode)
1440 enum machine_mode mode;
1442 enum machine_mode inmode;
1443 enum rtx_code code = GET_CODE (op);
1444 if (mode != VOIDmode && GET_MODE (op) != mode)
1446 if (GET_RTX_CLASS (code) != '<')
1448 inmode = GET_MODE (XEXP (op, 0));
1450 if (inmode == CCFPmode || inmode == CCFPUmode)
1452 enum rtx_code second_code, bypass_code;
1453 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1454 return (bypass_code == NIL && second_code == NIL);
1461 if (inmode == CCmode || inmode == CCGCmode
1462 || inmode == CCGOCmode || inmode == CCNOmode)
1465 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1466 if (inmode == CCmode)
1470 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1478 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1481 fcmov_comparison_operator (op, mode)
1483 enum machine_mode mode;
1485 enum machine_mode inmode;
1486 enum rtx_code code = GET_CODE (op);
1487 if (mode != VOIDmode && GET_MODE (op) != mode)
1489 if (GET_RTX_CLASS (code) != '<')
1491 inmode = GET_MODE (XEXP (op, 0));
1492 if (inmode == CCFPmode || inmode == CCFPUmode)
1494 enum rtx_code second_code, bypass_code;
1495 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1496 if (bypass_code != NIL || second_code != NIL)
1498 code = ix86_fp_compare_code_to_integer (code);
1500 /* i387 supports just limited amount of conditional codes. */
1503 case LTU: case GTU: case LEU: case GEU:
1504 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1507 case ORDERED: case UNORDERED:
1515 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1518 promotable_binary_operator (op, mode)
1520 enum machine_mode mode ATTRIBUTE_UNUSED;
1522 switch (GET_CODE (op))
1525 /* Modern CPUs have same latency for HImode and SImode multiply,
1526 but 386 and 486 do HImode multiply faster. */
1527 return ix86_cpu > PROCESSOR_I486;
1539 /* Nearly general operand, but accept any const_double, since we wish
1540 to be able to drop them into memory rather than have them get pulled
1544 cmp_fp_expander_operand (op, mode)
1546 enum machine_mode mode;
1548 if (mode != VOIDmode && mode != GET_MODE (op))
1550 if (GET_CODE (op) == CONST_DOUBLE)
1552 return general_operand (op, mode);
1555 /* Match an SI or HImode register for a zero_extract. */
1558 ext_register_operand (op, mode)
1560 enum machine_mode mode ATTRIBUTE_UNUSED;
1562 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1564 return register_operand (op, VOIDmode);
1567 /* Return 1 if this is a valid binary floating-point operation.
1568 OP is the expression matched, and MODE is its mode. */
1571 binary_fp_operator (op, mode)
1573 enum machine_mode mode;
1575 if (mode != VOIDmode && mode != GET_MODE (op))
1578 switch (GET_CODE (op))
1584 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1592 mult_operator(op, mode)
1594 enum machine_mode mode ATTRIBUTE_UNUSED;
1596 return GET_CODE (op) == MULT;
1600 div_operator(op, mode)
1602 enum machine_mode mode ATTRIBUTE_UNUSED;
1604 return GET_CODE (op) == DIV;
1608 arith_or_logical_operator (op, mode)
1610 enum machine_mode mode;
1612 return ((mode == VOIDmode || GET_MODE (op) == mode)
1613 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1614 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1617 /* Returns 1 if OP is memory operand with a displacement. */
1620 memory_displacement_operand (op, mode)
1622 enum machine_mode mode;
1624 struct ix86_address parts;
1626 if (! memory_operand (op, mode))
1629 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1632 return parts.disp != NULL_RTX;
1635 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1636 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1638 ??? It seems likely that this will only work because cmpsi is an
1639 expander, and no actual insns use this. */
1642 cmpsi_operand (op, mode)
1644 enum machine_mode mode;
1646 if (general_operand (op, mode))
1649 if (GET_CODE (op) == AND
1650 && GET_MODE (op) == SImode
1651 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1652 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1653 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1654 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1655 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1656 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1662 /* Returns 1 if OP is memory operand that can not be represented by the
1666 long_memory_operand (op, mode)
1668 enum machine_mode mode;
1670 if (! memory_operand (op, mode))
1673 return memory_address_length (op) != 0;
1676 /* Return nonzero if the rtx is known aligned. */
1679 aligned_operand (op, mode)
1681 enum machine_mode mode;
1683 struct ix86_address parts;
1685 if (!general_operand (op, mode))
1688 /* Registers and immediate operands are always "aligned". */
1689 if (GET_CODE (op) != MEM)
1692 /* Don't even try to do any aligned optimizations with volatiles. */
1693 if (MEM_VOLATILE_P (op))
1698 /* Pushes and pops are only valid on the stack pointer. */
1699 if (GET_CODE (op) == PRE_DEC
1700 || GET_CODE (op) == POST_INC)
1703 /* Decode the address. */
1704 if (! ix86_decompose_address (op, &parts))
1707 /* Look for some component that isn't known to be aligned. */
1711 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1716 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1721 if (GET_CODE (parts.disp) != CONST_INT
1722 || (INTVAL (parts.disp) & 3) != 0)
1726 /* Didn't find one -- this must be an aligned address. */
1730 /* Return true if the constant is something that can be loaded with
1731 a special instruction. Only handle 0.0 and 1.0; others are less
1735 standard_80387_constant_p (x)
1738 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
1740 /* Note that on the 80387, other constants, such as pi, that we should support
1741 too. On some machines, these are much slower to load as standard constant,
1742 than to load from doubles in memory. */
1743 if (x == CONST0_RTX (GET_MODE (x)))
1745 if (x == CONST1_RTX (GET_MODE (x)))
1750 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1753 standard_sse_constant_p (x)
1756 if (GET_CODE (x) != CONST_DOUBLE)
1758 return (x == CONST0_RTX (GET_MODE (x)));
1761 /* Returns 1 if OP contains a symbol reference */
1764 symbolic_reference_mentioned_p (op)
1767 register const char *fmt;
1770 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1773 fmt = GET_RTX_FORMAT (GET_CODE (op));
1774 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1780 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1781 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1785 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1792 /* Return 1 if it is appropriate to emit `ret' instructions in the
1793 body of a function. Do this only if the epilogue is simple, needing a
1794 couple of insns. Prior to reloading, we can't tell how many registers
1795 must be saved, so return 0 then. Return 0 if there is no frame
1796 marker to de-allocate.
1798 If NON_SAVING_SETJMP is defined and true, then it is not possible
1799 for the epilogue to be simple, so return 0. This is a special case
1800 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1801 until final, but jump_optimize may need to know sooner if a
1805 ix86_can_use_return_insn_p ()
1807 struct ix86_frame frame;
1809 #ifdef NON_SAVING_SETJMP
1810 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1813 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1814 if (profile_block_flag == 2)
1818 if (! reload_completed || frame_pointer_needed)
1821 /* Don't allow more than 32 pop, since that's all we can do
1822 with one instruction. */
1823 if (current_function_pops_args
1824 && current_function_args_size >= 32768)
1827 ix86_compute_frame_layout (&frame);
1828 return frame.to_allocate == 0 && frame.nregs == 0;
1831 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
1833 x86_64_sign_extended_value (value)
1836 switch (GET_CODE (value))
1838 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
1839 to be at least 32 and this all acceptable constants are
1840 represented as CONST_INT. */
1842 if (HOST_BITS_PER_WIDE_INT == 32)
1846 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
1847 return trunc_int_for_mode (val, SImode) == val;
1851 /* For certain code models, the symbolic references are known to fit. */
1853 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
1855 /* For certain code models, the code is near as well. */
1857 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
1859 /* We also may accept the offsetted memory references in certain special
1862 if (GET_CODE (XEXP (value, 0)) == UNSPEC
1863 && XVECLEN (XEXP (value, 0), 0) == 1
1864 && XINT (XEXP (value, 0), 1) == 15)
1866 else if (GET_CODE (XEXP (value, 0)) == PLUS)
1868 rtx op1 = XEXP (XEXP (value, 0), 0);
1869 rtx op2 = XEXP (XEXP (value, 0), 1);
1870 HOST_WIDE_INT offset;
1872 if (ix86_cmodel == CM_LARGE)
1874 if (GET_CODE (op2) != CONST_INT)
1876 offset = trunc_int_for_mode (INTVAL (op2), DImode);
1877 switch (GET_CODE (op1))
1880 /* For CM_SMALL assume that latest object is 1MB before
1881 end of 31bits boundary. We may also accept pretty
1882 large negative constants knowing that all objects are
1883 in the positive half of address space. */
1884 if (ix86_cmodel == CM_SMALL
1885 && offset < 1024*1024*1024
1886 && trunc_int_for_mode (offset, SImode) == offset)
1888 /* For CM_KERNEL we know that all object resist in the
1889 negative half of 32bits address space. We may not
1890 accept negative offsets, since they may be just off
1891 and we may accept pretty large possitive ones. */
1892 if (ix86_cmodel == CM_KERNEL
1894 && trunc_int_for_mode (offset, SImode) == offset)
1898 /* These conditions are similar to SYMBOL_REF ones, just the
1899 constraints for code models differ. */
1900 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
1901 && offset < 1024*1024*1024
1902 && trunc_int_for_mode (offset, SImode) == offset)
1904 if (ix86_cmodel == CM_KERNEL
1906 && trunc_int_for_mode (offset, SImode) == offset)
1919 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
1921 x86_64_zero_extended_value (value)
1924 switch (GET_CODE (value))
1927 if (HOST_BITS_PER_WIDE_INT == 32)
1928 return (GET_MODE (value) == VOIDmode
1929 && !CONST_DOUBLE_HIGH (value));
1933 if (HOST_BITS_PER_WIDE_INT == 32)
1934 return INTVAL (value) >= 0;
1936 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
1939 /* For certain code models, the symbolic references are known to fit. */
1941 return ix86_cmodel == CM_SMALL;
1943 /* For certain code models, the code is near as well. */
1945 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
1947 /* We also may accept the offsetted memory references in certain special
1950 if (GET_CODE (XEXP (value, 0)) == PLUS)
1952 rtx op1 = XEXP (XEXP (value, 0), 0);
1953 rtx op2 = XEXP (XEXP (value, 0), 1);
1955 if (ix86_cmodel == CM_LARGE)
1957 switch (GET_CODE (op1))
1961 /* For small code model we may accept pretty large possitive
1962 offsets, since one bit is available for free. Negative
1963 offsets are limited by the size of NULL pointer area
1964 specified by the ABI. */
1965 if (ix86_cmodel == CM_SMALL
1966 && GET_CODE (op2) == CONST_INT
1967 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
1968 && (trunc_int_for_mode (INTVAL (op2), SImode)
1971 /* ??? For the kernel, we may accept adjustment of
1972 -0x10000000, since we know that it will just convert
1973 negative address space to possitive, but perhaps this
1974 is not worthwhile. */
1977 /* These conditions are similar to SYMBOL_REF ones, just the
1978 constraints for code models differ. */
1979 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
1980 && GET_CODE (op2) == CONST_INT
1981 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
1982 && (trunc_int_for_mode (INTVAL (op2), SImode)
1996 /* Value should be nonzero if functions must have frame pointers.
1997 Zero means the frame pointer need not be set up (and parms may
1998 be accessed via the stack pointer) in functions that seem suitable. */
2001 ix86_frame_pointer_required ()
2003 /* If we accessed previous frames, then the generated code expects
2004 to be able to access the saved ebp value in our frame. */
2005 if (cfun->machine->accesses_prev_frame)
2008 /* Several x86 os'es need a frame pointer for other reasons,
2009 usually pertaining to setjmp. */
2010 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2013 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2014 the frame pointer by default. Turn it back on now if we've not
2015 got a leaf function. */
2016 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2022 /* Record that the current function accesses previous call frames. */
2025 ix86_setup_frame_addresses ()
2027 cfun->machine->accesses_prev_frame = 1;
2030 static char pic_label_name[32];
2032 /* This function generates code for -fpic that loads %ebx with
2033 the return address of the caller and then returns. */
2036 ix86_asm_file_end (file)
2041 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2044 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2045 to updating relocations to a section being discarded such that this
2046 doesn't work. Ought to detect this at configure time. */
2047 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
2048 /* The trick here is to create a linkonce section containing the
2049 pic label thunk, but to refer to it with an internal label.
2050 Because the label is internal, we don't have inter-dso name
2051 binding issues on hosts that don't support ".hidden".
2053 In order to use these macros, however, we must create a fake
2056 tree decl = build_decl (FUNCTION_DECL,
2057 get_identifier ("i686.get_pc_thunk"),
2059 DECL_ONE_ONLY (decl) = 1;
2060 UNIQUE_SECTION (decl, 0);
2061 named_section (decl, NULL, 0);
2067 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2068 internal (non-global) label that's being emitted, it didn't make
2069 sense to have .type information for local labels. This caused
2070 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2071 me debug info for a label that you're declaring non-global?) this
2072 was changed to call ASM_OUTPUT_LABEL() instead. */
2074 ASM_OUTPUT_LABEL (file, pic_label_name);
2076 xops[0] = pic_offset_table_rtx;
2077 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2078 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2079 output_asm_insn ("ret", xops);
2083 load_pic_register ()
2087 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2089 if (TARGET_DEEP_BRANCH_PREDICTION)
2091 if (! pic_label_name[0])
2092 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
2093 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
2097 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
2100 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2102 if (! TARGET_DEEP_BRANCH_PREDICTION)
2103 emit_insn (gen_popsi1 (pic_offset_table_rtx));
2105 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
2108 /* Generate an SImode "push" pattern for input ARG. */
2114 return gen_rtx_SET (VOIDmode,
2115 gen_rtx_MEM (SImode,
2116 gen_rtx_PRE_DEC (SImode,
2117 stack_pointer_rtx)),
2121 /* Return 1 if we need to save REGNO. */
2123 ix86_save_reg (regno)
2126 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2127 || current_function_uses_const_pool);
2128 return ((regs_ever_live[regno] && !call_used_regs[regno]
2129 && !fixed_regs[regno]
2130 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
2131 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used));
2135 /* Return number of registers to be saved on the stack. */
2143 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2144 if (ix86_save_reg (regno))
2149 /* Return the offset between two registers, one to be eliminated, and the other
2150 its replacement, at the start of a routine. */
2153 ix86_initial_elimination_offset (from, to)
2157 struct ix86_frame frame;
2158 ix86_compute_frame_layout (&frame);
2160 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2161 return frame.hard_frame_pointer_offset;
2162 else if (from == FRAME_POINTER_REGNUM
2163 && to == HARD_FRAME_POINTER_REGNUM)
2164 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
2167 if (to != STACK_POINTER_REGNUM)
2169 else if (from == ARG_POINTER_REGNUM)
2170 return frame.stack_pointer_offset;
2171 else if (from != FRAME_POINTER_REGNUM)
2174 return frame.stack_pointer_offset - frame.frame_pointer_offset;
2178 /* Fill structure ix86_frame about frame of currently computed function. */
2181 ix86_compute_frame_layout (frame)
2182 struct ix86_frame *frame;
2184 HOST_WIDE_INT total_size;
2185 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
2187 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
2188 HOST_WIDE_INT size = get_frame_size ();
2190 frame->nregs = ix86_nsaved_regs ();
2193 /* Skip return value and save base pointer. */
2194 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2196 frame->hard_frame_pointer_offset = offset;
2198 /* Do some sanity checking of stack_alignment_needed and
2199 preferred_alignment, since i386 port is the only using those features
2200 that may break easilly. */
2202 if (size && !stack_alignment_needed)
2204 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2206 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2208 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2211 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2212 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
2214 /* Register save area */
2215 offset += frame->nregs * UNITS_PER_WORD;
2217 /* Align start of frame for local function. */
2218 frame->padding1 = ((offset + stack_alignment_needed - 1)
2219 & -stack_alignment_needed) - offset;
2221 offset += frame->padding1;
2223 /* Frame pointer points here. */
2224 frame->frame_pointer_offset = offset;
2228 /* Add outgoing arguments area. */
2229 if (ACCUMULATE_OUTGOING_ARGS)
2231 offset += current_function_outgoing_args_size;
2232 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2235 frame->outgoing_arguments_size = 0;
2237 /* Align stack boundary. */
2238 frame->padding2 = ((offset + preferred_alignment - 1)
2239 & -preferred_alignment) - offset;
2241 offset += frame->padding2;
2243 /* We've reached end of stack frame. */
2244 frame->stack_pointer_offset = offset;
2246 /* Size prologue needs to allocate. */
2247 frame->to_allocate =
2248 (size + frame->padding1 + frame->padding2
2249 + frame->outgoing_arguments_size);
2252 fprintf (stderr, "nregs: %i\n", frame->nregs);
2253 fprintf (stderr, "size: %i\n", size);
2254 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2255 fprintf (stderr, "padding1: %i\n", frame->padding1);
2256 fprintf (stderr, "padding2: %i\n", frame->padding2);
2257 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2258 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2259 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2260 frame->hard_frame_pointer_offset);
2261 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2265 /* Emit code to save registers in the prologue. */
2268 ix86_emit_save_regs ()
2273 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2274 if (ix86_save_reg (regno))
2276 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
2277 RTX_FRAME_RELATED_P (insn) = 1;
2281 /* Expand the prologue into a bunch of separate insns. */
2284 ix86_expand_prologue ()
2287 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2288 || current_function_uses_const_pool);
2289 struct ix86_frame frame;
2291 ix86_compute_frame_layout (&frame);
2293 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2294 slower on all targets. Also sdb doesn't like it. */
2296 if (frame_pointer_needed)
2298 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
2299 RTX_FRAME_RELATED_P (insn) = 1;
2301 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2302 RTX_FRAME_RELATED_P (insn) = 1;
2305 ix86_emit_save_regs ();
2307 if (frame.to_allocate == 0)
2309 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
2311 if (frame_pointer_needed)
2312 insn = emit_insn (gen_pro_epilogue_adjust_stack
2313 (stack_pointer_rtx, stack_pointer_rtx,
2314 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
2316 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2317 GEN_INT (-frame.to_allocate)));
2318 RTX_FRAME_RELATED_P (insn) = 1;
2322 /* ??? Is this only valid for Win32? */
2326 arg0 = gen_rtx_REG (SImode, 0);
2327 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
2329 sym = gen_rtx_MEM (FUNCTION_MODE,
2330 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2331 insn = emit_call_insn (gen_call (sym, const0_rtx));
2333 CALL_INSN_FUNCTION_USAGE (insn)
2334 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2335 CALL_INSN_FUNCTION_USAGE (insn));
2338 #ifdef SUBTARGET_PROLOGUE
2343 load_pic_register ();
2345 /* If we are profiling, make sure no instructions are scheduled before
2346 the call to mcount. However, if -fpic, the above call will have
2348 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2349 emit_insn (gen_blockage ());
2352 /* Emit code to add TSIZE to esp value. Use POP instruction when
2356 ix86_emit_epilogue_esp_adjustment (tsize)
2359 /* If a frame pointer is present, we must be sure to tie the sp
2360 to the fp so that we don't mis-schedule. */
2361 if (frame_pointer_needed)
2362 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2365 hard_frame_pointer_rtx));
2367 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2371 /* Emit code to restore saved registers using MOV insns. First register
2372 is restored from POINTER + OFFSET. */
2374 ix86_emit_restore_regs_using_mov (pointer, offset)
2380 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2381 if (ix86_save_reg (regno))
2383 emit_move_insn (gen_rtx_REG (Pmode, regno),
2384 adj_offsettable_operand (gen_rtx_MEM (Pmode,
2387 offset += UNITS_PER_WORD;
2391 /* Restore function stack, frame, and registers. */
2394 ix86_expand_epilogue (emit_return)
2398 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2399 struct ix86_frame frame;
2400 HOST_WIDE_INT offset;
2402 ix86_compute_frame_layout (&frame);
2404 /* Calculate start of saved registers relative to ebp. */
2405 offset = -frame.nregs * UNITS_PER_WORD;
2407 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2408 if (profile_block_flag == 2)
2410 FUNCTION_BLOCK_PROFILER_EXIT;
2414 /* If we're only restoring one register and sp is not valid then
2415 using a move instruction to restore the register since it's
2416 less work than reloading sp and popping the register.
2418 The default code result in stack adjustment using add/lea instruction,
2419 while this code results in LEAVE instruction (or discrete equivalent),
2420 so it is profitable in some other cases as well. Especially when there
2421 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2422 and there is exactly one register to pop. This heruistic may need some
2423 tuning in future. */
2424 if ((!sp_valid && frame.nregs <= 1)
2425 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2426 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2427 && frame.nregs == 1))
2429 /* Restore registers. We can use ebp or esp to address the memory
2430 locations. If both are available, default to ebp, since offsets
2431 are known to be small. Only exception is esp pointing directly to the
2432 end of block of saved registers, where we may simplify addressing
2435 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2436 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2438 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2440 if (!frame_pointer_needed)
2441 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2442 + frame.nregs * UNITS_PER_WORD);
2443 /* If not an i386, mov & pop is faster than "leave". */
2444 else if (TARGET_USE_LEAVE || optimize_size)
2445 emit_insn (gen_leave ());
2448 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2449 hard_frame_pointer_rtx,
2451 hard_frame_pointer_rtx));
2452 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2457 /* First step is to deallocate the stack frame so that we can
2458 pop the registers. */
2461 if (!frame_pointer_needed)
2463 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2464 hard_frame_pointer_rtx,
2466 hard_frame_pointer_rtx));
2468 else if (frame.to_allocate)
2469 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
2471 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2472 if (ix86_save_reg (regno))
2473 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2474 if (frame_pointer_needed)
2475 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2478 /* Sibcall epilogues don't want a return instruction. */
2482 if (current_function_pops_args && current_function_args_size)
2484 rtx popc = GEN_INT (current_function_pops_args);
2486 /* i386 can only pop 64K bytes. If asked to pop more, pop
2487 return address, do explicit add, and jump indirectly to the
2490 if (current_function_pops_args >= 65536)
2492 rtx ecx = gen_rtx_REG (SImode, 2);
2494 emit_insn (gen_popsi1 (ecx));
2495 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2496 emit_jump_insn (gen_return_indirect_internal (ecx));
2499 emit_jump_insn (gen_return_pop_internal (popc));
2502 emit_jump_insn (gen_return_internal ());
2505 /* Extract the parts of an RTL expression that is a valid memory address
2506 for an instruction. Return false if the structure of the address is
2510 ix86_decompose_address (addr, out)
2512 struct ix86_address *out;
2514 rtx base = NULL_RTX;
2515 rtx index = NULL_RTX;
2516 rtx disp = NULL_RTX;
2517 HOST_WIDE_INT scale = 1;
2518 rtx scale_rtx = NULL_RTX;
2520 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2522 else if (GET_CODE (addr) == PLUS)
2524 rtx op0 = XEXP (addr, 0);
2525 rtx op1 = XEXP (addr, 1);
2526 enum rtx_code code0 = GET_CODE (op0);
2527 enum rtx_code code1 = GET_CODE (op1);
2529 if (code0 == REG || code0 == SUBREG)
2531 if (code1 == REG || code1 == SUBREG)
2532 index = op0, base = op1; /* index + base */
2534 base = op0, disp = op1; /* base + displacement */
2536 else if (code0 == MULT)
2538 index = XEXP (op0, 0);
2539 scale_rtx = XEXP (op0, 1);
2540 if (code1 == REG || code1 == SUBREG)
2541 base = op1; /* index*scale + base */
2543 disp = op1; /* index*scale + disp */
2545 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2547 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2548 scale_rtx = XEXP (XEXP (op0, 0), 1);
2549 base = XEXP (op0, 1);
2552 else if (code0 == PLUS)
2554 index = XEXP (op0, 0); /* index + base + disp */
2555 base = XEXP (op0, 1);
2561 else if (GET_CODE (addr) == MULT)
2563 index = XEXP (addr, 0); /* index*scale */
2564 scale_rtx = XEXP (addr, 1);
2566 else if (GET_CODE (addr) == ASHIFT)
2570 /* We're called for lea too, which implements ashift on occasion. */
2571 index = XEXP (addr, 0);
2572 tmp = XEXP (addr, 1);
2573 if (GET_CODE (tmp) != CONST_INT)
2575 scale = INTVAL (tmp);
2576 if ((unsigned HOST_WIDE_INT) scale > 3)
2581 disp = addr; /* displacement */
2583 /* Extract the integral value of scale. */
2586 if (GET_CODE (scale_rtx) != CONST_INT)
2588 scale = INTVAL (scale_rtx);
2591 /* Allow arg pointer and stack pointer as index if there is not scaling */
2592 if (base && index && scale == 1
2593 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2594 || index == stack_pointer_rtx))
2601 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2602 if ((base == hard_frame_pointer_rtx
2603 || base == frame_pointer_rtx
2604 || base == arg_pointer_rtx) && !disp)
2607 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2608 Avoid this by transforming to [%esi+0]. */
2609 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2610 && base && !index && !disp
2612 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2615 /* Special case: encode reg+reg instead of reg*2. */
2616 if (!base && index && scale && scale == 2)
2617 base = index, scale = 1;
2619 /* Special case: scaling cannot be encoded without base or displacement. */
2620 if (!base && !disp && index && scale != 1)
2631 /* Return cost of the memory address x.
2632 For i386, it is better to use a complex address than let gcc copy
2633 the address into a reg and make a new pseudo. But not if the address
2634 requires to two regs - that would mean more pseudos with longer
2637 ix86_address_cost (x)
2640 struct ix86_address parts;
2643 if (!ix86_decompose_address (x, &parts))
2646 /* More complex memory references are better. */
2647 if (parts.disp && parts.disp != const0_rtx)
2650 /* Attempt to minimize number of registers in the address. */
2652 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2654 && (!REG_P (parts.index)
2655 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2659 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2661 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2662 && parts.base != parts.index)
2665 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2666 since it's predecode logic can't detect the length of instructions
2667 and it degenerates to vector decoded. Increase cost of such
2668 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2669 to split such addresses or even refuse such addresses at all.
2671 Following addressing modes are affected:
2676 The first and last case may be avoidable by explicitly coding the zero in
2677 memory address, but I don't have AMD-K6 machine handy to check this
2681 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2682 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2683 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2689 /* If X is a machine specific address (i.e. a symbol or label being
2690 referenced as a displacement from the GOT implemented using an
2691 UNSPEC), then return the base term. Otherwise return X. */
2694 ix86_find_base_term (x)
2699 if (GET_CODE (x) != PLUS
2700 || XEXP (x, 0) != pic_offset_table_rtx
2701 || GET_CODE (XEXP (x, 1)) != CONST)
2704 term = XEXP (XEXP (x, 1), 0);
2706 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2707 term = XEXP (term, 0);
2709 if (GET_CODE (term) != UNSPEC
2710 || XVECLEN (term, 0) != 1
2711 || XINT (term, 1) != 7)
2714 term = XVECEXP (term, 0, 0);
2716 if (GET_CODE (term) != SYMBOL_REF
2717 && GET_CODE (term) != LABEL_REF)
2723 /* Determine if a given CONST RTX is a valid memory displacement
2727 legitimate_pic_address_disp_p (disp)
2730 if (GET_CODE (disp) != CONST)
2732 disp = XEXP (disp, 0);
2734 if (GET_CODE (disp) == PLUS)
2736 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2738 disp = XEXP (disp, 0);
2741 if (GET_CODE (disp) != UNSPEC
2742 || XVECLEN (disp, 0) != 1)
2745 /* Must be @GOT or @GOTOFF. */
2746 if (XINT (disp, 1) != 6
2747 && XINT (disp, 1) != 7)
2750 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2751 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2757 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2758 memory address for an instruction. The MODE argument is the machine mode
2759 for the MEM expression that wants to use this address.
2761 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2762 convert common non-canonical forms to canonical form so that they will
2766 legitimate_address_p (mode, addr, strict)
2767 enum machine_mode mode;
2771 struct ix86_address parts;
2772 rtx base, index, disp;
2773 HOST_WIDE_INT scale;
2774 const char *reason = NULL;
2775 rtx reason_rtx = NULL_RTX;
2777 if (TARGET_DEBUG_ADDR)
2780 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2781 GET_MODE_NAME (mode), strict);
2785 if (! ix86_decompose_address (addr, &parts))
2787 reason = "decomposition failed";
2792 index = parts.index;
2794 scale = parts.scale;
2796 /* Validate base register.
2798 Don't allow SUBREG's here, it can lead to spill failures when the base
2799 is one word out of a two word structure, which is represented internally
2806 if (GET_CODE (base) != REG)
2808 reason = "base is not a register";
2812 if (GET_MODE (base) != Pmode)
2814 reason = "base is not in Pmode";
2818 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2819 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2821 reason = "base is not valid";
2826 /* Validate index register.
2828 Don't allow SUBREG's here, it can lead to spill failures when the index
2829 is one word out of a two word structure, which is represented internally
2836 if (GET_CODE (index) != REG)
2838 reason = "index is not a register";
2842 if (GET_MODE (index) != Pmode)
2844 reason = "index is not in Pmode";
2848 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2849 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2851 reason = "index is not valid";
2856 /* Validate scale factor. */
2859 reason_rtx = GEN_INT (scale);
2862 reason = "scale without index";
2866 if (scale != 2 && scale != 4 && scale != 8)
2868 reason = "scale is not a valid multiplier";
2873 /* Validate displacement. */
2878 if (!CONSTANT_ADDRESS_P (disp))
2880 reason = "displacement is not constant";
2884 if (GET_CODE (disp) == CONST_DOUBLE)
2886 reason = "displacement is a const_double";
2890 if (flag_pic && SYMBOLIC_CONST (disp))
2892 if (! legitimate_pic_address_disp_p (disp))
2894 reason = "displacement is an invalid pic construct";
2898 /* This code used to verify that a symbolic pic displacement
2899 includes the pic_offset_table_rtx register.
2901 While this is good idea, unfortunately these constructs may
2902 be created by "adds using lea" optimization for incorrect
2911 This code is nonsensical, but results in addressing
2912 GOT table with pic_offset_table_rtx base. We can't
2913 just refuse it easilly, since it gets matched by
2914 "addsi3" pattern, that later gets split to lea in the
2915 case output register differs from input. While this
2916 can be handled by separate addsi pattern for this case
2917 that never results in lea, this seems to be easier and
2918 correct fix for crash to disable this test. */
2920 else if (HALF_PIC_P ())
2922 if (! HALF_PIC_ADDRESS_P (disp)
2923 || (base != NULL_RTX || index != NULL_RTX))
2925 reason = "displacement is an invalid half-pic reference";
2931 /* Everything looks valid. */
2932 if (TARGET_DEBUG_ADDR)
2933 fprintf (stderr, "Success.\n");
2937 if (TARGET_DEBUG_ADDR)
2939 fprintf (stderr, "Error: %s\n", reason);
2940 debug_rtx (reason_rtx);
2945 /* Return an unique alias set for the GOT. */
2947 static HOST_WIDE_INT
2948 ix86_GOT_alias_set ()
2950 static HOST_WIDE_INT set = -1;
2952 set = new_alias_set ();
2956 /* Return a legitimate reference for ORIG (an address) using the
2957 register REG. If REG is 0, a new pseudo is generated.
2959 There are two types of references that must be handled:
2961 1. Global data references must load the address from the GOT, via
2962 the PIC reg. An insn is emitted to do this load, and the reg is
2965 2. Static data references, constant pool addresses, and code labels
2966 compute the address as an offset from the GOT, whose base is in
2967 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2968 differentiate them from global data objects. The returned
2969 address is the PIC reg + an unspec constant.
2971 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2972 reg also appears in the address. */
2975 legitimize_pic_address (orig, reg)
2983 if (GET_CODE (addr) == LABEL_REF
2984 || (GET_CODE (addr) == SYMBOL_REF
2985 && (CONSTANT_POOL_ADDRESS_P (addr)
2986 || SYMBOL_REF_FLAG (addr))))
2988 /* This symbol may be referenced via a displacement from the PIC
2989 base address (@GOTOFF). */
2991 current_function_uses_pic_offset_table = 1;
2992 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2993 new = gen_rtx_CONST (Pmode, new);
2994 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2998 emit_move_insn (reg, new);
3002 else if (GET_CODE (addr) == SYMBOL_REF)
3004 /* This symbol must be referenced via a load from the
3005 Global Offset Table (@GOT). */
3007 current_function_uses_pic_offset_table = 1;
3008 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3009 new = gen_rtx_CONST (Pmode, new);
3010 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3011 new = gen_rtx_MEM (Pmode, new);
3012 RTX_UNCHANGING_P (new) = 1;
3013 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3016 reg = gen_reg_rtx (Pmode);
3017 emit_move_insn (reg, new);
3022 if (GET_CODE (addr) == CONST)
3024 addr = XEXP (addr, 0);
3025 if (GET_CODE (addr) == UNSPEC)
3027 /* Check that the unspec is one of the ones we generate? */
3029 else if (GET_CODE (addr) != PLUS)
3032 if (GET_CODE (addr) == PLUS)
3034 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
3036 /* Check first to see if this is a constant offset from a @GOTOFF
3037 symbol reference. */
3038 if ((GET_CODE (op0) == LABEL_REF
3039 || (GET_CODE (op0) == SYMBOL_REF
3040 && (CONSTANT_POOL_ADDRESS_P (op0)
3041 || SYMBOL_REF_FLAG (op0))))
3042 && GET_CODE (op1) == CONST_INT)
3044 current_function_uses_pic_offset_table = 1;
3045 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3046 new = gen_rtx_PLUS (Pmode, new, op1);
3047 new = gen_rtx_CONST (Pmode, new);
3048 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3052 emit_move_insn (reg, new);
3058 base = legitimize_pic_address (XEXP (addr, 0), reg);
3059 new = legitimize_pic_address (XEXP (addr, 1),
3060 base == reg ? NULL_RTX : reg);
3062 if (GET_CODE (new) == CONST_INT)
3063 new = plus_constant (base, INTVAL (new));
3066 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3068 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3069 new = XEXP (new, 1);
3071 new = gen_rtx_PLUS (Pmode, base, new);
3079 /* Try machine-dependent ways of modifying an illegitimate address
3080 to be legitimate. If we find one, return the new, valid address.
3081 This macro is used in only one place: `memory_address' in explow.c.
3083 OLDX is the address as it was before break_out_memory_refs was called.
3084 In some cases it is useful to look at this to decide what needs to be done.
3086 MODE and WIN are passed so that this macro can use
3087 GO_IF_LEGITIMATE_ADDRESS.
3089 It is always safe for this macro to do nothing. It exists to recognize
3090 opportunities to optimize the output.
3092 For the 80386, we handle X+REG by loading X into a register R and
3093 using R+REG. R will go in a general reg and indexing will be used.
3094 However, if REG is a broken-out memory address or multiplication,
3095 nothing needs to be done because REG can certainly go in a general reg.
3097 When -fpic is used, special handling is needed for symbolic references.
3098 See comments by legitimize_pic_address in i386.c for details. */
3101 legitimize_address (x, oldx, mode)
3103 register rtx oldx ATTRIBUTE_UNUSED;
3104 enum machine_mode mode;
3109 if (TARGET_DEBUG_ADDR)
3111 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3112 GET_MODE_NAME (mode));
3116 if (flag_pic && SYMBOLIC_CONST (x))
3117 return legitimize_pic_address (x, 0);
3119 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3120 if (GET_CODE (x) == ASHIFT
3121 && GET_CODE (XEXP (x, 1)) == CONST_INT
3122 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3125 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3126 GEN_INT (1 << log));
3129 if (GET_CODE (x) == PLUS)
3131 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
3133 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3134 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3135 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3138 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3139 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3140 GEN_INT (1 << log));
3143 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3144 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3145 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3148 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3149 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3150 GEN_INT (1 << log));
3153 /* Put multiply first if it isn't already. */
3154 if (GET_CODE (XEXP (x, 1)) == MULT)
3156 rtx tmp = XEXP (x, 0);
3157 XEXP (x, 0) = XEXP (x, 1);
3162 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3163 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3164 created by virtual register instantiation, register elimination, and
3165 similar optimizations. */
3166 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3169 x = gen_rtx_PLUS (Pmode,
3170 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3171 XEXP (XEXP (x, 1), 0)),
3172 XEXP (XEXP (x, 1), 1));
3176 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3177 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3178 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3179 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3180 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3181 && CONSTANT_P (XEXP (x, 1)))
3184 rtx other = NULL_RTX;
3186 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3188 constant = XEXP (x, 1);
3189 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3191 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3193 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3194 other = XEXP (x, 1);
3202 x = gen_rtx_PLUS (Pmode,
3203 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3204 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3205 plus_constant (other, INTVAL (constant)));
3209 if (changed && legitimate_address_p (mode, x, FALSE))
3212 if (GET_CODE (XEXP (x, 0)) == MULT)
3215 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3218 if (GET_CODE (XEXP (x, 1)) == MULT)
3221 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3225 && GET_CODE (XEXP (x, 1)) == REG
3226 && GET_CODE (XEXP (x, 0)) == REG)
3229 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3232 x = legitimize_pic_address (x, 0);
3235 if (changed && legitimate_address_p (mode, x, FALSE))
3238 if (GET_CODE (XEXP (x, 0)) == REG)
3240 register rtx temp = gen_reg_rtx (Pmode);
3241 register rtx val = force_operand (XEXP (x, 1), temp);
3243 emit_move_insn (temp, val);
3249 else if (GET_CODE (XEXP (x, 1)) == REG)
3251 register rtx temp = gen_reg_rtx (Pmode);
3252 register rtx val = force_operand (XEXP (x, 0), temp);
3254 emit_move_insn (temp, val);
3264 /* Print an integer constant expression in assembler syntax. Addition
3265 and subtraction are the only arithmetic that may appear in these
3266 expressions. FILE is the stdio stream to write to, X is the rtx, and
3267 CODE is the operand print code from the output string. */
3270 output_pic_addr_const (file, x, code)
3277 switch (GET_CODE (x))
3287 assemble_name (file, XSTR (x, 0));
3288 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3289 fputs ("@PLT", file);
3296 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3297 assemble_name (asm_out_file, buf);
3301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3305 /* This used to output parentheses around the expression,
3306 but that does not work on the 386 (either ATT or BSD assembler). */
3307 output_pic_addr_const (file, XEXP (x, 0), code);
3311 if (GET_MODE (x) == VOIDmode)
3313 /* We can use %d if the number is <32 bits and positive. */
3314 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
3315 fprintf (file, "0x%lx%08lx",
3316 (unsigned long) CONST_DOUBLE_HIGH (x),
3317 (unsigned long) CONST_DOUBLE_LOW (x));
3319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
3322 /* We can't handle floating point constants;
3323 PRINT_OPERAND must handle them. */
3324 output_operand_lossage ("floating constant misused");
3328 /* Some assemblers need integer constants to appear first. */
3329 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3331 output_pic_addr_const (file, XEXP (x, 0), code);
3333 output_pic_addr_const (file, XEXP (x, 1), code);
3335 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3337 output_pic_addr_const (file, XEXP (x, 1), code);
3339 output_pic_addr_const (file, XEXP (x, 0), code);
3346 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3347 output_pic_addr_const (file, XEXP (x, 0), code);
3349 output_pic_addr_const (file, XEXP (x, 1), code);
3350 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3354 if (XVECLEN (x, 0) != 1)
3356 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3357 switch (XINT (x, 1))
3360 fputs ("@GOT", file);
3363 fputs ("@GOTOFF", file);
3366 fputs ("@PLT", file);
3369 output_operand_lossage ("invalid UNSPEC as operand");
3375 output_operand_lossage ("invalid expression as operand");
3379 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3380 We need to handle our special PIC relocations. */
3383 i386_dwarf_output_addr_const (file, x)
3387 fprintf (file, "%s", INT_ASM_OP);
3389 output_pic_addr_const (file, x, '\0');
3391 output_addr_const (file, x);
3395 /* In the name of slightly smaller debug output, and to cater to
3396 general assembler losage, recognize PIC+GOTOFF and turn it back
3397 into a direct symbol reference. */
3400 i386_simplify_dwarf_addr (orig_x)
3405 if (GET_CODE (x) != PLUS
3406 || GET_CODE (XEXP (x, 0)) != REG
3407 || GET_CODE (XEXP (x, 1)) != CONST)
3410 x = XEXP (XEXP (x, 1), 0);
3411 if (GET_CODE (x) == UNSPEC
3412 && (XINT (x, 1) == 6
3413 || XINT (x, 1) == 7))
3414 return XVECEXP (x, 0, 0);
3416 if (GET_CODE (x) == PLUS
3417 && GET_CODE (XEXP (x, 0)) == UNSPEC
3418 && GET_CODE (XEXP (x, 1)) == CONST_INT
3419 && (XINT (XEXP (x, 0), 1) == 6
3420 || XINT (XEXP (x, 0), 1) == 7))
3421 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3427 put_condition_code (code, mode, reverse, fp, file)
3429 enum machine_mode mode;
3435 if (mode == CCFPmode || mode == CCFPUmode)
3437 enum rtx_code second_code, bypass_code;
3438 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3439 if (bypass_code != NIL || second_code != NIL)
3441 code = ix86_fp_compare_code_to_integer (code);
3445 code = reverse_condition (code);
3456 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3461 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3462 Those same assemblers have the same but opposite losage on cmov. */
3465 suffix = fp ? "nbe" : "a";
3468 if (mode == CCNOmode || mode == CCGOCmode)
3470 else if (mode == CCmode || mode == CCGCmode)
3481 if (mode == CCNOmode || mode == CCGOCmode)
3483 else if (mode == CCmode || mode == CCGCmode)
3492 suffix = fp ? "nb" : "ae";
3495 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3505 suffix = fp ? "u" : "p";
3508 suffix = fp ? "nu" : "np";
3513 fputs (suffix, file);
3517 print_reg (x, code, file)
3522 if (REGNO (x) == ARG_POINTER_REGNUM
3523 || REGNO (x) == FRAME_POINTER_REGNUM
3524 || REGNO (x) == FLAGS_REG
3525 || REGNO (x) == FPSR_REG)
3528 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3533 else if (code == 'b')
3535 else if (code == 'k')
3537 else if (code == 'q')
3539 else if (code == 'y')
3541 else if (code == 'h')
3543 else if (code == 'm' || MMX_REG_P (x))
3546 code = GET_MODE_SIZE (GET_MODE (x));
3548 /* Irritatingly, AMD extended registers use different naming convention
3549 from the normal registers. */
3550 if (REX_INT_REG_P (x))
3555 error ("Extended registers have no high halves\n");
3558 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3561 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3564 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3567 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3570 error ("Unsupported operand size for extended register.\n");
3578 fputs (hi_reg_name[REGNO (x)], file);
3581 if (STACK_TOP_P (x))
3583 fputs ("st(0)", file);
3590 if (! ANY_FP_REG_P (x))
3591 putc (code == 8 ? 'r' : 'e', file);
3595 fputs (hi_reg_name[REGNO (x)], file);
3598 fputs (qi_reg_name[REGNO (x)], file);
3601 fputs (qi_high_reg_name[REGNO (x)], file);
3609 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3610 C -- print opcode suffix for set/cmov insn.
3611 c -- like C, but print reversed condition
3612 R -- print the prefix for register names.
3613 z -- print the opcode suffix for the size of the current operand.
3614 * -- print a star (in certain assembler syntax)
3615 A -- print an absolute memory reference.
3616 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3617 s -- print a shift double count, followed by the assemblers argument
3619 b -- print the QImode name of the register for the indicated operand.
3620 %b0 would print %al if operands[0] is reg 0.
3621 w -- likewise, print the HImode name of the register.
3622 k -- likewise, print the SImode name of the register.
3623 q -- likewise, print the DImode name of the register.
3624 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3625 y -- print "st(0)" instead of "st" as a register.
3626 m -- print "st(n)" as an mmx register.
3627 D -- print condition for SSE cmp instruction.
3631 print_operand (file, x, code)
3641 if (ASSEMBLER_DIALECT == 0)
3646 if (ASSEMBLER_DIALECT == 0)
3648 else if (ASSEMBLER_DIALECT == 1)
3650 /* Intel syntax. For absolute addresses, registers should not
3651 be surrounded by braces. */
3652 if (GET_CODE (x) != REG)
3655 PRINT_OPERAND (file, x, 0);
3661 PRINT_OPERAND (file, x, 0);
3666 if (ASSEMBLER_DIALECT == 0)
3671 if (ASSEMBLER_DIALECT == 0)
3676 if (ASSEMBLER_DIALECT == 0)
3681 if (ASSEMBLER_DIALECT == 0)
3686 if (ASSEMBLER_DIALECT == 0)
3691 if (ASSEMBLER_DIALECT == 0)
3696 /* 387 opcodes don't get size suffixes if the operands are
3699 if (STACK_REG_P (x))
3702 /* this is the size of op from size of operand */
3703 switch (GET_MODE_SIZE (GET_MODE (x)))
3706 #ifdef HAVE_GAS_FILDS_FISTS
3712 if (GET_MODE (x) == SFmode)
3727 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3729 #ifdef GAS_MNEMONICS
3756 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3758 PRINT_OPERAND (file, x, 0);
3764 /* Little bit of braindamage here. The SSE compare instructions
3765 does use completely different names for the comparisons that the
3766 fp conditional moves. */
3767 switch (GET_CODE (x))
3782 fputs ("unord", file);
3786 fputs ("neq", file);
3790 fputs ("nlt", file);
3794 fputs ("nle", file);
3797 fputs ("ord", file);
3805 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3808 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3811 /* Like above, but reverse condition */
3813 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3816 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3822 sprintf (str, "invalid operand code `%c'", code);
3823 output_operand_lossage (str);
3828 if (GET_CODE (x) == REG)
3830 PRINT_REG (x, code, file);
3833 else if (GET_CODE (x) == MEM)
3835 /* No `byte ptr' prefix for call instructions. */
3836 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3839 switch (GET_MODE_SIZE (GET_MODE (x)))
3841 case 1: size = "BYTE"; break;
3842 case 2: size = "WORD"; break;
3843 case 4: size = "DWORD"; break;
3844 case 8: size = "QWORD"; break;
3845 case 12: size = "XWORD"; break;
3846 case 16: size = "XMMWORD"; break;
3851 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3854 else if (code == 'w')
3856 else if (code == 'k')
3860 fputs (" PTR ", file);
3864 if (flag_pic && CONSTANT_ADDRESS_P (x))
3865 output_pic_addr_const (file, x, code);
3870 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3875 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3876 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3878 if (ASSEMBLER_DIALECT == 0)
3880 fprintf (file, "0x%lx", l);
3883 /* These float cases don't actually occur as immediate operands. */
3884 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3889 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3890 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3891 fprintf (file, "%s", dstr);
3894 else if (GET_CODE (x) == CONST_DOUBLE
3895 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
3900 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3901 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3902 fprintf (file, "%s", dstr);
3908 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3910 if (ASSEMBLER_DIALECT == 0)
3913 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3914 || GET_CODE (x) == LABEL_REF)
3916 if (ASSEMBLER_DIALECT == 0)
3919 fputs ("OFFSET FLAT:", file);
3922 if (GET_CODE (x) == CONST_INT)
3923 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3925 output_pic_addr_const (file, x, code);
3927 output_addr_const (file, x);
3931 /* Print a memory operand whose address is ADDR. */
3934 print_operand_address (file, addr)
3938 struct ix86_address parts;
3939 rtx base, index, disp;
3942 if (! ix86_decompose_address (addr, &parts))
3946 index = parts.index;
3948 scale = parts.scale;
3950 if (!base && !index)
3952 /* Displacement only requires special attention. */
3954 if (GET_CODE (disp) == CONST_INT)
3956 if (ASSEMBLER_DIALECT != 0)
3958 if (USER_LABEL_PREFIX[0] == 0)
3960 fputs ("ds:", file);
3962 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3965 output_pic_addr_const (file, addr, 0);
3967 output_addr_const (file, addr);
3971 if (ASSEMBLER_DIALECT == 0)
3976 output_pic_addr_const (file, disp, 0);
3977 else if (GET_CODE (disp) == LABEL_REF)
3978 output_asm_label (disp);
3980 output_addr_const (file, disp);
3985 PRINT_REG (base, 0, file);
3989 PRINT_REG (index, 0, file);
3991 fprintf (file, ",%d", scale);
3997 rtx offset = NULL_RTX;
4001 /* Pull out the offset of a symbol; print any symbol itself. */
4002 if (GET_CODE (disp) == CONST
4003 && GET_CODE (XEXP (disp, 0)) == PLUS
4004 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4006 offset = XEXP (XEXP (disp, 0), 1);
4007 disp = gen_rtx_CONST (VOIDmode,
4008 XEXP (XEXP (disp, 0), 0));
4012 output_pic_addr_const (file, disp, 0);
4013 else if (GET_CODE (disp) == LABEL_REF)
4014 output_asm_label (disp);
4015 else if (GET_CODE (disp) == CONST_INT)
4018 output_addr_const (file, disp);
4024 PRINT_REG (base, 0, file);
4027 if (INTVAL (offset) >= 0)
4029 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4033 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4040 PRINT_REG (index, 0, file);
4042 fprintf (file, "*%d", scale);
4049 /* Split one or more DImode RTL references into pairs of SImode
4050 references. The RTL can be REG, offsettable MEM, integer constant, or
4051 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4052 split and "num" is its length. lo_half and hi_half are output arrays
4053 that parallel "operands". */
4056 split_di (operands, num, lo_half, hi_half)
4059 rtx lo_half[], hi_half[];
4063 rtx op = operands[num];
4064 if (CONSTANT_P (op))
4065 split_double (op, &lo_half[num], &hi_half[num]);
4066 else if (! reload_completed)
4068 lo_half[num] = gen_lowpart (SImode, op);
4069 hi_half[num] = gen_highpart (SImode, op);
4071 else if (GET_CODE (op) == REG)
4073 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4074 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
4076 else if (offsettable_memref_p (op))
4078 rtx lo_addr = XEXP (op, 0);
4079 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
4080 lo_half[num] = change_address (op, SImode, lo_addr);
4081 hi_half[num] = change_address (op, SImode, hi_addr);
4088 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
4089 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4090 is the expression of the binary operation. The output may either be
4091 emitted here, or returned to the caller, like all output_* functions.
4093 There is no guarantee that the operands are the same mode, as they
4094 might be within FLOAT or FLOAT_EXTEND expressions. */
4096 #ifndef SYSV386_COMPAT
4097 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
4098 wants to fix the assemblers because that causes incompatibility
4099 with gcc. No-one wants to fix gcc because that causes
4100 incompatibility with assemblers... You can use the option of
4101 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4102 #define SYSV386_COMPAT 1
4106 output_387_binary_op (insn, operands)
4110 static char buf[30];
4113 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
4115 #ifdef ENABLE_CHECKING
4116 /* Even if we do not want to check the inputs, this documents input
4117 constraints. Which helps in understanding the following code. */
4118 if (STACK_REG_P (operands[0])
4119 && ((REG_P (operands[1])
4120 && REGNO (operands[0]) == REGNO (operands[1])
4121 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4122 || (REG_P (operands[2])
4123 && REGNO (operands[0]) == REGNO (operands[2])
4124 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4125 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4131 switch (GET_CODE (operands[3]))
4134 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4135 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4143 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4144 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4152 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4153 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4161 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4162 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4176 if (GET_MODE (operands[0]) == SFmode)
4177 strcat (buf, "ss\t{%2, %0|%0, %2}");
4179 strcat (buf, "sd\t{%2, %0|%0, %2}");
4184 switch (GET_CODE (operands[3]))
4188 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4190 rtx temp = operands[2];
4191 operands[2] = operands[1];
4195 /* know operands[0] == operands[1]. */
4197 if (GET_CODE (operands[2]) == MEM)
4203 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4205 if (STACK_TOP_P (operands[0]))
4206 /* How is it that we are storing to a dead operand[2]?
4207 Well, presumably operands[1] is dead too. We can't
4208 store the result to st(0) as st(0) gets popped on this
4209 instruction. Instead store to operands[2] (which I
4210 think has to be st(1)). st(1) will be popped later.
4211 gcc <= 2.8.1 didn't have this check and generated
4212 assembly code that the Unixware assembler rejected. */
4213 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4215 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4219 if (STACK_TOP_P (operands[0]))
4220 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4222 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4227 if (GET_CODE (operands[1]) == MEM)
4233 if (GET_CODE (operands[2]) == MEM)
4239 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4242 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4243 derived assemblers, confusingly reverse the direction of
4244 the operation for fsub{r} and fdiv{r} when the
4245 destination register is not st(0). The Intel assembler
4246 doesn't have this brain damage. Read !SYSV386_COMPAT to
4247 figure out what the hardware really does. */
4248 if (STACK_TOP_P (operands[0]))
4249 p = "{p\t%0, %2|rp\t%2, %0}";
4251 p = "{rp\t%2, %0|p\t%0, %2}";
4253 if (STACK_TOP_P (operands[0]))
4254 /* As above for fmul/fadd, we can't store to st(0). */
4255 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4257 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4262 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
4265 if (STACK_TOP_P (operands[0]))
4266 p = "{rp\t%0, %1|p\t%1, %0}";
4268 p = "{p\t%1, %0|rp\t%0, %1}";
4270 if (STACK_TOP_P (operands[0]))
4271 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4273 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4278 if (STACK_TOP_P (operands[0]))
4280 if (STACK_TOP_P (operands[1]))
4281 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4283 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
4286 else if (STACK_TOP_P (operands[1]))
4289 p = "{\t%1, %0|r\t%0, %1}";
4291 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4297 p = "{r\t%2, %0|\t%0, %2}";
4299 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4312 /* Output code for INSN to convert a float to a signed int. OPERANDS
4313 are the insn operands. The output may be [HSD]Imode and the input
4314 operand may be [SDX]Fmode. */
4317 output_fix_trunc (insn, operands)
4321 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4322 int dimode_p = GET_MODE (operands[0]) == DImode;
4325 /* Jump through a hoop or two for DImode, since the hardware has no
4326 non-popping instruction. We used to do this a different way, but
4327 that was somewhat fragile and broke with post-reload splitters. */
4328 if (dimode_p && !stack_top_dies)
4329 output_asm_insn ("fld\t%y1", operands);
4331 if (! STACK_TOP_P (operands[1]))
4334 xops[0] = GEN_INT (12);
4335 xops[1] = adj_offsettable_operand (operands[2], 1);
4336 xops[1] = change_address (xops[1], QImode, NULL_RTX);
4338 xops[2] = operands[0];
4339 if (GET_CODE (operands[0]) != MEM)
4340 xops[2] = operands[3];
4342 output_asm_insn ("fnstcw\t%2", operands);
4343 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4344 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4345 output_asm_insn ("fldcw\t%2", operands);
4346 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
4348 if (stack_top_dies || dimode_p)
4349 output_asm_insn ("fistp%z2\t%2", xops);
4351 output_asm_insn ("fist%z2\t%2", xops);
4353 output_asm_insn ("fldcw\t%2", operands);
4355 if (GET_CODE (operands[0]) != MEM)
4359 split_di (operands+0, 1, xops+0, xops+1);
4360 split_di (operands+3, 1, xops+2, xops+3);
4361 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4362 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
4364 else if (GET_MODE (operands[0]) == SImode)
4365 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
4367 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
4373 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4374 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4375 when fucom should be used. */
4378 output_fp_compare (insn, operands, eflags_p, unordered_p)
4381 int eflags_p, unordered_p;
4384 rtx cmp_op0 = operands[0];
4385 rtx cmp_op1 = operands[1];
4386 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
4391 cmp_op1 = operands[2];
4395 if (GET_MODE (operands[0]) == SFmode)
4397 return "ucomiss\t{%1, %0|%0, %1}";
4399 return "comiss\t{%1, %0|%0, %y}";
4402 return "ucomisd\t{%1, %0|%0, %1}";
4404 return "comisd\t{%1, %0|%0, %y}";
4407 if (! STACK_TOP_P (cmp_op0))
4410 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4412 if (STACK_REG_P (cmp_op1)
4414 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4415 && REGNO (cmp_op1) != FIRST_STACK_REG)
4417 /* If both the top of the 387 stack dies, and the other operand
4418 is also a stack register that dies, then this must be a
4419 `fcompp' float compare */
4423 /* There is no double popping fcomi variant. Fortunately,
4424 eflags is immune from the fstp's cc clobbering. */
4426 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4428 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4436 return "fucompp\n\tfnstsw\t%0";
4438 return "fcompp\n\tfnstsw\t%0";
4451 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4453 static const char * const alt[24] =
4465 "fcomi\t{%y1, %0|%0, %y1}",
4466 "fcomip\t{%y1, %0|%0, %y1}",
4467 "fucomi\t{%y1, %0|%0, %y1}",
4468 "fucomip\t{%y1, %0|%0, %y1}",
4475 "fcom%z2\t%y2\n\tfnstsw\t%0",
4476 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4477 "fucom%z2\t%y2\n\tfnstsw\t%0",
4478 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4480 "ficom%z2\t%y2\n\tfnstsw\t%0",
4481 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4489 mask = eflags_p << 3;
4490 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4491 mask |= unordered_p << 1;
4492 mask |= stack_top_dies;
4504 /* Output assembler code to FILE to initialize basic-block profiling.
4506 If profile_block_flag == 2
4508 Output code to call the subroutine `__bb_init_trace_func'
4509 and pass two parameters to it. The first parameter is
4510 the address of a block allocated in the object module.
4511 The second parameter is the number of the first basic block
4514 The name of the block is a local symbol made with this statement:
4516 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4518 Of course, since you are writing the definition of
4519 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4520 can take a short cut in the definition of this macro and use the
4521 name that you know will result.
4523 The number of the first basic block of the function is
4524 passed to the macro in BLOCK_OR_LABEL.
4526 If described in a virtual assembler language the code to be
4530 parameter2 <- BLOCK_OR_LABEL
4531 call __bb_init_trace_func
4533 else if profile_block_flag != 0
4535 Output code to call the subroutine `__bb_init_func'
4536 and pass one single parameter to it, which is the same
4537 as the first parameter to `__bb_init_trace_func'.
4539 The first word of this parameter is a flag which will be nonzero if
4540 the object module has already been initialized. So test this word
4541 first, and do not call `__bb_init_func' if the flag is nonzero.
4542 Note: When profile_block_flag == 2 the test need not be done
4543 but `__bb_init_trace_func' *must* be called.
4545 BLOCK_OR_LABEL may be used to generate a label number as a
4546 branch destination in case `__bb_init_func' will not be called.
4548 If described in a virtual assembler language the code to be
4559 ix86_output_function_block_profiler (file, block_or_label)
4563 static int num_func = 0;
4565 char block_table[80], false_label[80];
4567 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4569 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4570 xops[5] = stack_pointer_rtx;
4571 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4573 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4575 switch (profile_block_flag)
4578 xops[2] = GEN_INT (block_or_label);
4579 xops[3] = gen_rtx_MEM (Pmode,
4580 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4581 xops[6] = GEN_INT (8);
4583 output_asm_insn ("push{l}\t%2", xops);
4585 output_asm_insn ("push{l}\t%1", xops);
4588 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4589 output_asm_insn ("push{l}\t%7", xops);
4591 output_asm_insn ("call\t%P3", xops);
4592 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4596 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4598 xops[0] = const0_rtx;
4599 xops[2] = gen_rtx_MEM (Pmode,
4600 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4601 xops[3] = gen_rtx_MEM (Pmode,
4602 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4603 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4604 xops[6] = GEN_INT (4);
4606 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4608 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4609 output_asm_insn ("jne\t%2", xops);
4612 output_asm_insn ("push{l}\t%1", xops);
4615 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4616 output_asm_insn ("push{l}\t%7", xops);
4618 output_asm_insn ("call\t%P3", xops);
4619 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4620 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4626 /* Output assembler code to FILE to increment a counter associated
4627 with basic block number BLOCKNO.
4629 If profile_block_flag == 2
4631 Output code to initialize the global structure `__bb' and
4632 call the function `__bb_trace_func' which will increment the
4635 `__bb' consists of two words. In the first word the number
4636 of the basic block has to be stored. In the second word
4637 the address of a block allocated in the object module
4640 The basic block number is given by BLOCKNO.
4642 The address of the block is given by the label created with
4644 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4646 by FUNCTION_BLOCK_PROFILER.
4648 Of course, since you are writing the definition of
4649 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4650 can take a short cut in the definition of this macro and use the
4651 name that you know will result.
4653 If described in a virtual assembler language the code to be
4656 move BLOCKNO -> (__bb)
4657 move LPBX0 -> (__bb+4)
4658 call __bb_trace_func
4660 Note that function `__bb_trace_func' must not change the
4661 machine state, especially the flag register. To grant
4662 this, you must output code to save and restore registers
4663 either in this macro or in the macros MACHINE_STATE_SAVE
4664 and MACHINE_STATE_RESTORE. The last two macros will be
4665 used in the function `__bb_trace_func', so you must make
4666 sure that the function prologue does not change any
4667 register prior to saving it with MACHINE_STATE_SAVE.
4669 else if profile_block_flag != 0
4671 Output code to increment the counter directly.
4672 Basic blocks are numbered separately from zero within each
4673 compiled object module. The count associated with block number
4674 BLOCKNO is at index BLOCKNO in an array of words; the name of
4675 this array is a local symbol made with this statement:
4677 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4679 Of course, since you are writing the definition of
4680 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4681 can take a short cut in the definition of this macro and use the
4682 name that you know will result.
4684 If described in a virtual assembler language the code to be
4687 inc (LPBX2+4*BLOCKNO)
4691 ix86_output_block_profiler (file, blockno)
4692 FILE *file ATTRIBUTE_UNUSED;
4695 rtx xops[8], cnt_rtx;
4697 char *block_table = counts;
4699 switch (profile_block_flag)
4702 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4704 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4705 xops[2] = GEN_INT (blockno);
4706 xops[3] = gen_rtx_MEM (Pmode,
4707 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4708 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4709 xops[5] = plus_constant (xops[4], 4);
4710 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4711 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4713 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4715 output_asm_insn ("pushf", xops);
4716 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4719 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4720 output_asm_insn ("push{l}\t%7", xops);
4721 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4722 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4723 output_asm_insn ("pop{l}\t%7", xops);
4726 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4727 output_asm_insn ("call\t%P3", xops);
4728 output_asm_insn ("popf", xops);
4733 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4734 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4735 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4738 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4741 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4743 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4744 output_asm_insn ("inc{l}\t%0", xops);
4751 ix86_expand_move (mode, operands)
4752 enum machine_mode mode;
4755 int strict = (reload_in_progress || reload_completed);
4758 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4760 /* Emit insns to move operands[1] into operands[0]. */
4762 if (GET_CODE (operands[0]) == MEM)
4763 operands[1] = force_reg (Pmode, operands[1]);
4766 rtx temp = operands[0];
4767 if (GET_CODE (temp) != REG)
4768 temp = gen_reg_rtx (Pmode);
4769 temp = legitimize_pic_address (operands[1], temp);
4770 if (temp == operands[0])
4777 if (GET_CODE (operands[0]) == MEM
4778 && (GET_MODE (operands[0]) == QImode
4779 || !push_operand (operands[0], mode))
4780 && GET_CODE (operands[1]) == MEM)
4781 operands[1] = force_reg (mode, operands[1]);
4783 if (push_operand (operands[0], mode)
4784 && ! general_no_elim_operand (operands[1], mode))
4785 operands[1] = copy_to_mode_reg (mode, operands[1]);
4787 if (FLOAT_MODE_P (mode))
4789 /* If we are loading a floating point constant to a register,
4790 force the value to memory now, since we'll get better code
4791 out the back end. */
4795 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4796 && register_operand (operands[0], mode))
4797 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4801 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4806 /* Attempt to expand a binary operator. Make the expansion closer to the
4807 actual machine, then just general_operand, which will allow 3 separate
4808 memory references (one output, two input) in a single insn. */
4811 ix86_expand_binary_operator (code, mode, operands)
4813 enum machine_mode mode;
4816 int matching_memory;
4817 rtx src1, src2, dst, op, clob;
4823 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4824 if (GET_RTX_CLASS (code) == 'c'
4825 && (rtx_equal_p (dst, src2)
4826 || immediate_operand (src1, mode)))
4833 /* If the destination is memory, and we do not have matching source
4834 operands, do things in registers. */
4835 matching_memory = 0;
4836 if (GET_CODE (dst) == MEM)
4838 if (rtx_equal_p (dst, src1))
4839 matching_memory = 1;
4840 else if (GET_RTX_CLASS (code) == 'c'
4841 && rtx_equal_p (dst, src2))
4842 matching_memory = 2;
4844 dst = gen_reg_rtx (mode);
4847 /* Both source operands cannot be in memory. */
4848 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4850 if (matching_memory != 2)
4851 src2 = force_reg (mode, src2);
4853 src1 = force_reg (mode, src1);
4856 /* If the operation is not commutable, source 1 cannot be a constant
4857 or non-matching memory. */
4858 if ((CONSTANT_P (src1)
4859 || (!matching_memory && GET_CODE (src1) == MEM))
4860 && GET_RTX_CLASS (code) != 'c')
4861 src1 = force_reg (mode, src1);
4863 /* If optimizing, copy to regs to improve CSE */
4864 if (optimize && ! no_new_pseudos)
4866 if (GET_CODE (dst) == MEM)
4867 dst = gen_reg_rtx (mode);
4868 if (GET_CODE (src1) == MEM)
4869 src1 = force_reg (mode, src1);
4870 if (GET_CODE (src2) == MEM)
4871 src2 = force_reg (mode, src2);
4874 /* Emit the instruction. */
4876 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4877 if (reload_in_progress)
4879 /* Reload doesn't know about the flags register, and doesn't know that
4880 it doesn't want to clobber it. We can only do this with PLUS. */
4887 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4888 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4891 /* Fix up the destination if needed. */
4892 if (dst != operands[0])
4893 emit_move_insn (operands[0], dst);
4896 /* Return TRUE or FALSE depending on whether the binary operator meets the
4897 appropriate constraints. */
4900 ix86_binary_operator_ok (code, mode, operands)
4902 enum machine_mode mode ATTRIBUTE_UNUSED;
4905 /* Both source operands cannot be in memory. */
4906 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4908 /* If the operation is not commutable, source 1 cannot be a constant. */
4909 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4911 /* If the destination is memory, we must have a matching source operand. */
4912 if (GET_CODE (operands[0]) == MEM
4913 && ! (rtx_equal_p (operands[0], operands[1])
4914 || (GET_RTX_CLASS (code) == 'c'
4915 && rtx_equal_p (operands[0], operands[2]))))
4917 /* If the operation is not commutable and the source 1 is memory, we must
4918 have a matching destionation. */
4919 if (GET_CODE (operands[1]) == MEM
4920 && GET_RTX_CLASS (code) != 'c'
4921 && ! rtx_equal_p (operands[0], operands[1]))
4926 /* Attempt to expand a unary operator. Make the expansion closer to the
4927 actual machine, then just general_operand, which will allow 2 separate
4928 memory references (one output, one input) in a single insn. */
4931 ix86_expand_unary_operator (code, mode, operands)
4933 enum machine_mode mode;
4936 int matching_memory;
4937 rtx src, dst, op, clob;
4942 /* If the destination is memory, and we do not have matching source
4943 operands, do things in registers. */
4944 matching_memory = 0;
4945 if (GET_CODE (dst) == MEM)
4947 if (rtx_equal_p (dst, src))
4948 matching_memory = 1;
4950 dst = gen_reg_rtx (mode);
4953 /* When source operand is memory, destination must match. */
4954 if (!matching_memory && GET_CODE (src) == MEM)
4955 src = force_reg (mode, src);
4957 /* If optimizing, copy to regs to improve CSE */
4958 if (optimize && ! no_new_pseudos)
4960 if (GET_CODE (dst) == MEM)
4961 dst = gen_reg_rtx (mode);
4962 if (GET_CODE (src) == MEM)
4963 src = force_reg (mode, src);
4966 /* Emit the instruction. */
4968 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4969 if (reload_in_progress || code == NOT)
4971 /* Reload doesn't know about the flags register, and doesn't know that
4972 it doesn't want to clobber it. */
4979 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4980 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4983 /* Fix up the destination if needed. */
4984 if (dst != operands[0])
4985 emit_move_insn (operands[0], dst);
4988 /* Return TRUE or FALSE depending on whether the unary operator meets the
4989 appropriate constraints. */
4992 ix86_unary_operator_ok (code, mode, operands)
4993 enum rtx_code code ATTRIBUTE_UNUSED;
4994 enum machine_mode mode ATTRIBUTE_UNUSED;
4995 rtx operands[2] ATTRIBUTE_UNUSED;
4997 /* If one of operands is memory, source and destination must match. */
4998 if ((GET_CODE (operands[0]) == MEM
4999 || GET_CODE (operands[1]) == MEM)
5000 && ! rtx_equal_p (operands[0], operands[1]))
5005 /* Return TRUE or FALSE depending on whether the first SET in INSN
5006 has source and destination with matching CC modes, and that the
5007 CC mode is at least as constrained as REQ_MODE. */
5010 ix86_match_ccmode (insn, req_mode)
5012 enum machine_mode req_mode;
5015 enum machine_mode set_mode;
5017 set = PATTERN (insn);
5018 if (GET_CODE (set) == PARALLEL)
5019 set = XVECEXP (set, 0, 0);
5020 if (GET_CODE (set) != SET)
5022 if (GET_CODE (SET_SRC (set)) != COMPARE)
5025 set_mode = GET_MODE (SET_DEST (set));
5029 if (req_mode != CCNOmode
5030 && (req_mode != CCmode
5031 || XEXP (SET_SRC (set), 1) != const0_rtx))
5035 if (req_mode == CCGCmode)
5039 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5043 if (req_mode == CCZmode)
5053 return (GET_MODE (SET_SRC (set)) == set_mode);
5056 /* Generate insn patterns to do an integer compare of OPERANDS. */
5059 ix86_expand_int_compare (code, op0, op1)
5063 enum machine_mode cmpmode;
5066 cmpmode = SELECT_CC_MODE (code, op0, op1);
5067 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5069 /* This is very simple, but making the interface the same as in the
5070 FP case makes the rest of the code easier. */
5071 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5072 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5074 /* Return the test that should be put into the flags user, i.e.
5075 the bcc, scc, or cmov instruction. */
5076 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5079 /* Figure out whether to use ordered or unordered fp comparisons.
5080 Return the appropriate mode to use. */
5083 ix86_fp_compare_mode (code)
5084 enum rtx_code code ATTRIBUTE_UNUSED;
5086 /* ??? In order to make all comparisons reversible, we do all comparisons
5087 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5088 all forms trapping and nontrapping comparisons, we can make inequality
5089 comparisons trapping again, since it results in better code when using
5090 FCOM based compares. */
5091 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
5095 ix86_cc_mode (code, op0, op1)
5099 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5100 return ix86_fp_compare_mode (code);
5103 /* Only zero flag is needed. */
5105 case NE: /* ZF!=0 */
5107 /* Codes needing carry flag. */
5108 case GEU: /* CF=0 */
5109 case GTU: /* CF=0 & ZF=0 */
5110 case LTU: /* CF=1 */
5111 case LEU: /* CF=1 | ZF=1 */
5113 /* Codes possibly doable only with sign flag when
5114 comparing against zero. */
5115 case GE: /* SF=OF or SF=0 */
5116 case LT: /* SF<>OF or SF=1 */
5117 if (op1 == const0_rtx)
5120 /* For other cases Carry flag is not required. */
5122 /* Codes doable only with sign flag when comparing
5123 against zero, but we miss jump instruction for it
5124 so we need to use relational tests agains overflow
5125 that thus needs to be zero. */
5126 case GT: /* ZF=0 & SF=OF */
5127 case LE: /* ZF=1 | SF<>OF */
5128 if (op1 == const0_rtx)
5137 /* Return true if we should use an FCOMI instruction for this fp comparison. */
5140 ix86_use_fcomi_compare (code)
5141 enum rtx_code code ATTRIBUTE_UNUSED;
5143 enum rtx_code swapped_code = swap_condition (code);
5144 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5145 || (ix86_fp_comparison_cost (swapped_code)
5146 == ix86_fp_comparison_fcomi_cost (swapped_code)));
5149 /* Swap, force into registers, or otherwise massage the two operands
5150 to a fp comparison. The operands are updated in place; the new
5151 comparsion code is returned. */
5153 static enum rtx_code
5154 ix86_prepare_fp_compare_args (code, pop0, pop1)
5158 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5159 rtx op0 = *pop0, op1 = *pop1;
5160 enum machine_mode op_mode = GET_MODE (op0);
5161 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
5163 /* All of the unordered compare instructions only work on registers.
5164 The same is true of the XFmode compare instructions. The same is
5165 true of the fcomi compare instructions. */
5168 && (fpcmp_mode == CCFPUmode
5169 || op_mode == XFmode
5170 || op_mode == TFmode
5171 || ix86_use_fcomi_compare (code)))
5173 op0 = force_reg (op_mode, op0);
5174 op1 = force_reg (op_mode, op1);
5178 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5179 things around if they appear profitable, otherwise force op0
5182 if (standard_80387_constant_p (op0) == 0
5183 || (GET_CODE (op0) == MEM
5184 && ! (standard_80387_constant_p (op1) == 0
5185 || GET_CODE (op1) == MEM)))
5188 tmp = op0, op0 = op1, op1 = tmp;
5189 code = swap_condition (code);
5192 if (GET_CODE (op0) != REG)
5193 op0 = force_reg (op_mode, op0);
5195 if (CONSTANT_P (op1))
5197 if (standard_80387_constant_p (op1))
5198 op1 = force_reg (op_mode, op1);
5200 op1 = validize_mem (force_const_mem (op_mode, op1));
5204 /* Try to rearrange the comparison to make it cheaper. */
5205 if (ix86_fp_comparison_cost (code)
5206 > ix86_fp_comparison_cost (swap_condition (code))
5207 && (GET_CODE (op0) == REG || !reload_completed))
5210 tmp = op0, op0 = op1, op1 = tmp;
5211 code = swap_condition (code);
5212 if (GET_CODE (op0) != REG)
5213 op0 = force_reg (op_mode, op0);
5221 /* Convert comparison codes we use to represent FP comparison to integer
5222 code that will result in proper branch. Return UNKNOWN if no such code
5224 static enum rtx_code
5225 ix86_fp_compare_code_to_integer (code)
5255 /* Split comparison code CODE into comparisons we can do using branch
5256 instructions. BYPASS_CODE is comparison code for branch that will
5257 branch around FIRST_CODE and SECOND_CODE. If some of branches
5258 is not required, set value to NIL.
5259 We never require more than two branches. */
5261 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5262 enum rtx_code code, *bypass_code, *first_code, *second_code;
5268 /* The fcomi comparison sets flags as follows:
5278 case GT: /* GTU - CF=0 & ZF=0 */
5279 case GE: /* GEU - CF=0 */
5280 case ORDERED: /* PF=0 */
5281 case UNORDERED: /* PF=1 */
5282 case UNEQ: /* EQ - ZF=1 */
5283 case UNLT: /* LTU - CF=1 */
5284 case UNLE: /* LEU - CF=1 | ZF=1 */
5285 case LTGT: /* EQ - ZF=0 */
5287 case LT: /* LTU - CF=1 - fails on unordered */
5289 *bypass_code = UNORDERED;
5291 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5293 *bypass_code = UNORDERED;
5295 case EQ: /* EQ - ZF=1 - fails on unordered */
5297 *bypass_code = UNORDERED;
5299 case NE: /* NE - ZF=0 - fails on unordered */
5301 *second_code = UNORDERED;
5303 case UNGE: /* GEU - CF=0 - fails on unordered */
5305 *second_code = UNORDERED;
5307 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5309 *second_code = UNORDERED;
5314 if (!TARGET_IEEE_FP)
5321 /* Return cost of comparison done fcom + arithmetics operations on AX.
5322 All following functions do use number of instructions as an cost metrics.
5323 In future this should be tweaked to compute bytes for optimize_size and
5324 take into account performance of various instructions on various CPUs. */
5326 ix86_fp_comparison_arithmetics_cost (code)
5329 if (!TARGET_IEEE_FP)
5331 /* The cost of code output by ix86_expand_fp_compare. */
5359 /* Return cost of comparison done using fcomi operation.
5360 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5362 ix86_fp_comparison_fcomi_cost (code)
5365 enum rtx_code bypass_code, first_code, second_code;
5366 /* Return arbitarily high cost when instruction is not supported - this
5367 prevents gcc from using it. */
5370 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5371 return (bypass_code != NIL || second_code != NIL) + 2;
5374 /* Return cost of comparison done using sahf operation.
5375 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5377 ix86_fp_comparison_sahf_cost (code)
5380 enum rtx_code bypass_code, first_code, second_code;
5381 /* Return arbitarily high cost when instruction is not preferred - this
5382 avoids gcc from using it. */
5383 if (!TARGET_USE_SAHF && !optimize_size)
5385 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5386 return (bypass_code != NIL || second_code != NIL) + 3;
5389 /* Compute cost of the comparison done using any method.
5390 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5392 ix86_fp_comparison_cost (code)
5395 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5398 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5399 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5401 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5402 if (min > sahf_cost)
5404 if (min > fcomi_cost)
5409 /* Generate insn patterns to do a floating point compare of OPERANDS. */
5412 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
5414 rtx op0, op1, scratch;
5418 enum machine_mode fpcmp_mode, intcmp_mode;
5420 int cost = ix86_fp_comparison_cost (code);
5421 enum rtx_code bypass_code, first_code, second_code;
5423 fpcmp_mode = ix86_fp_compare_mode (code);
5424 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5427 *second_test = NULL_RTX;
5429 *bypass_test = NULL_RTX;
5431 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5433 /* Do fcomi/sahf based test when profitable. */
5434 if ((bypass_code == NIL || bypass_test)
5435 && (second_code == NIL || second_test)
5436 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5440 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5441 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5447 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5448 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5450 scratch = gen_reg_rtx (HImode);
5451 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5452 emit_insn (gen_x86_sahf_1 (scratch));
5455 /* The FP codes work out to act like unsigned. */
5456 intcmp_mode = fpcmp_mode;
5458 if (bypass_code != NIL)
5459 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5460 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5462 if (second_code != NIL)
5463 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5464 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5469 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5470 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5471 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5473 scratch = gen_reg_rtx (HImode);
5474 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5476 /* In the unordered case, we have to check C2 for NaN's, which
5477 doesn't happen to work out to anything nice combination-wise.
5478 So do some bit twiddling on the value we've got in AH to come
5479 up with an appropriate set of condition codes. */
5481 intcmp_mode = CCNOmode;
5486 if (code == GT || !TARGET_IEEE_FP)
5488 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5493 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5494 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5495 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5496 intcmp_mode = CCmode;
5502 if (code == LT && TARGET_IEEE_FP)
5504 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5505 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5506 intcmp_mode = CCmode;
5511 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5517 if (code == GE || !TARGET_IEEE_FP)
5519 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5524 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5525 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5532 if (code == LE && TARGET_IEEE_FP)
5534 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5535 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5536 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5537 intcmp_mode = CCmode;
5542 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5548 if (code == EQ && TARGET_IEEE_FP)
5550 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5551 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5552 intcmp_mode = CCmode;
5557 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5564 if (code == NE && TARGET_IEEE_FP)
5566 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5567 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5573 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5579 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5583 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5592 /* Return the test that should be put into the flags user, i.e.
5593 the bcc, scc, or cmov instruction. */
5594 return gen_rtx_fmt_ee (code, VOIDmode,
5595 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5600 ix86_expand_compare (code, second_test, bypass_test)
5602 rtx *second_test, *bypass_test;
5605 op0 = ix86_compare_op0;
5606 op1 = ix86_compare_op1;
5609 *second_test = NULL_RTX;
5611 *bypass_test = NULL_RTX;
5613 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5614 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
5615 second_test, bypass_test);
5617 ret = ix86_expand_int_compare (code, op0, op1);
5623 ix86_expand_branch (code, label)
5629 switch (GET_MODE (ix86_compare_op0))
5634 tmp = ix86_expand_compare (code, NULL, NULL);
5635 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5636 gen_rtx_LABEL_REF (VOIDmode, label),
5638 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5645 /* Don't expand the comparison early, so that we get better code
5646 when jump or whoever decides to reverse the comparison. */
5651 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5654 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5655 ix86_compare_op0, ix86_compare_op1);
5656 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5657 gen_rtx_LABEL_REF (VOIDmode, label),
5659 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5661 use_fcomi = ix86_use_fcomi_compare (code);
5662 vec = rtvec_alloc (3 + !use_fcomi);
5663 RTVEC_ELT (vec, 0) = tmp;
5665 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5667 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5670 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5672 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5677 /* Expand DImode branch into multiple compare+branch. */
5679 rtx lo[2], hi[2], label2;
5680 enum rtx_code code1, code2, code3;
5682 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5684 tmp = ix86_compare_op0;
5685 ix86_compare_op0 = ix86_compare_op1;
5686 ix86_compare_op1 = tmp;
5687 code = swap_condition (code);
5689 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5690 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5692 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5693 avoid two branches. This costs one extra insn, so disable when
5694 optimizing for size. */
5696 if ((code == EQ || code == NE)
5698 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5703 if (hi[1] != const0_rtx)
5704 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5705 NULL_RTX, 0, OPTAB_WIDEN);
5708 if (lo[1] != const0_rtx)
5709 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5710 NULL_RTX, 0, OPTAB_WIDEN);
5712 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5713 NULL_RTX, 0, OPTAB_WIDEN);
5715 ix86_compare_op0 = tmp;
5716 ix86_compare_op1 = const0_rtx;
5717 ix86_expand_branch (code, label);
5721 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5722 op1 is a constant and the low word is zero, then we can just
5723 examine the high word. */
5725 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5728 case LT: case LTU: case GE: case GEU:
5729 ix86_compare_op0 = hi[0];
5730 ix86_compare_op1 = hi[1];
5731 ix86_expand_branch (code, label);
5737 /* Otherwise, we need two or three jumps. */
5739 label2 = gen_label_rtx ();
5742 code2 = swap_condition (code);
5743 code3 = unsigned_condition (code);
5747 case LT: case GT: case LTU: case GTU:
5750 case LE: code1 = LT; code2 = GT; break;
5751 case GE: code1 = GT; code2 = LT; break;
5752 case LEU: code1 = LTU; code2 = GTU; break;
5753 case GEU: code1 = GTU; code2 = LTU; break;
5755 case EQ: code1 = NIL; code2 = NE; break;
5756 case NE: code2 = NIL; break;
5764 * if (hi(a) < hi(b)) goto true;
5765 * if (hi(a) > hi(b)) goto false;
5766 * if (lo(a) < lo(b)) goto true;
5770 ix86_compare_op0 = hi[0];
5771 ix86_compare_op1 = hi[1];
5774 ix86_expand_branch (code1, label);
5776 ix86_expand_branch (code2, label2);
5778 ix86_compare_op0 = lo[0];
5779 ix86_compare_op1 = lo[1];
5780 ix86_expand_branch (code3, label);
5783 emit_label (label2);
5792 /* Split branch based on floating point condition. */
5794 ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5795 rtx condition, op1, op2, target1, target2, tmp;
5798 rtx label = NULL_RTX;
5799 enum rtx_code code = GET_CODE (condition);
5801 if (target2 != pc_rtx)
5804 code = reverse_condition_maybe_unordered (code);
5809 condition = ix86_expand_fp_compare (code, op1, op2,
5810 tmp, &second, &bypass);
5811 if (bypass != NULL_RTX)
5813 label = gen_label_rtx ();
5814 emit_jump_insn (gen_rtx_SET
5816 gen_rtx_IF_THEN_ELSE (VOIDmode,
5818 gen_rtx_LABEL_REF (VOIDmode,
5822 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5823 comparison and first branch. The second branch takes longer to execute
5824 so place first branch the worse predicable one if possible. */
5825 if (second != NULL_RTX
5826 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5828 rtx tmp = condition;
5832 emit_jump_insn (gen_rtx_SET
5834 gen_rtx_IF_THEN_ELSE (VOIDmode,
5835 condition, target1, target2)));
5836 if (second != NULL_RTX)
5837 emit_jump_insn (gen_rtx_SET
5839 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5840 if (label != NULL_RTX)
5845 ix86_expand_setcc (code, dest)
5849 rtx ret, tmp, tmpreg;
5850 rtx second_test, bypass_test;
5853 if (GET_MODE (ix86_compare_op0) == DImode)
5854 return 0; /* FAIL */
5856 /* Three modes of generation:
5857 0 -- destination does not overlap compare sources:
5858 clear dest first, emit strict_low_part setcc.
5859 1 -- destination does overlap compare sources:
5860 emit subreg setcc, zero extend.
5861 2 -- destination is in QImode:
5867 if (GET_MODE (dest) == QImode)
5869 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5870 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5874 emit_move_insn (dest, const0_rtx);
5876 ret = ix86_expand_compare (code, &second_test, &bypass_test);
5877 PUT_MODE (ret, QImode);
5883 tmp = gen_lowpart (QImode, dest);
5885 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5889 if (!cse_not_expected)
5890 tmp = gen_reg_rtx (QImode);
5892 tmp = gen_lowpart (QImode, dest);
5896 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5897 if (bypass_test || second_test)
5899 rtx test = second_test;
5901 rtx tmp2 = gen_reg_rtx (QImode);
5908 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5910 PUT_MODE (test, QImode);
5911 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5914 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5916 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5923 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5924 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5925 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5926 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5930 return 1; /* DONE */
5934 ix86_expand_int_movcc (operands)
5937 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5938 rtx compare_seq, compare_op;
5939 rtx second_test, bypass_test;
5941 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5942 In case comparsion is done with immediate, we can convert it to LTU or
5943 GEU by altering the integer. */
5945 if ((code == LEU || code == GTU)
5946 && GET_CODE (ix86_compare_op1) == CONST_INT
5947 && GET_MODE (operands[0]) != HImode
5948 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5949 && GET_CODE (operands[2]) == CONST_INT
5950 && GET_CODE (operands[3]) == CONST_INT)
5956 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5960 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5961 compare_seq = gen_sequence ();
5964 compare_code = GET_CODE (compare_op);
5966 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5967 HImode insns, we'd be swallowed in word prefix ops. */
5969 if (GET_MODE (operands[0]) != HImode
5970 && GET_CODE (operands[2]) == CONST_INT
5971 && GET_CODE (operands[3]) == CONST_INT)
5973 rtx out = operands[0];
5974 HOST_WIDE_INT ct = INTVAL (operands[2]);
5975 HOST_WIDE_INT cf = INTVAL (operands[3]);
5978 if ((compare_code == LTU || compare_code == GEU)
5979 && !second_test && !bypass_test)
5982 /* Detect overlap between destination and compare sources. */
5985 /* To simplify rest of code, restrict to the GEU case. */
5986 if (compare_code == LTU)
5991 compare_code = reverse_condition (compare_code);
5992 code = reverse_condition (code);
5996 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5997 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5998 tmp = gen_reg_rtx (SImode);
6000 emit_insn (compare_seq);
6001 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6013 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6024 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
6026 else if (diff == -1 && ct)
6036 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6038 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
6045 * andl cf - ct, dest
6050 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
6052 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6056 emit_move_insn (out, tmp);
6058 return 1; /* DONE */
6065 tmp = ct, ct = cf, cf = tmp;
6067 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6069 /* We may be reversing unordered compare to normal compare, that
6070 is not valid in general (we may convert non-trapping condition
6071 to trapping one), however on i386 we currently emit all
6072 comparisons unordered. */
6073 compare_code = reverse_condition_maybe_unordered (compare_code);
6074 code = reverse_condition_maybe_unordered (code);
6078 compare_code = reverse_condition (compare_code);
6079 code = reverse_condition (code);
6082 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6083 || diff == 3 || diff == 5 || diff == 9)
6089 * lea cf(dest*(ct-cf)),dest
6093 * This also catches the degenerate setcc-only case.
6099 out = emit_store_flag (out, code, ix86_compare_op0,
6100 ix86_compare_op1, VOIDmode, 0, 1);
6107 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
6111 tmp = gen_rtx_PLUS (SImode, tmp, out);
6117 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
6123 emit_move_insn (out, tmp);
6128 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6129 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6131 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6132 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6136 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6138 if (out != operands[0])
6139 emit_move_insn (operands[0], out);
6141 return 1; /* DONE */
6145 * General case: Jumpful:
6146 * xorl dest,dest cmpl op1, op2
6147 * cmpl op1, op2 movl ct, dest
6149 * decl dest movl cf, dest
6150 * andl (cf-ct),dest 1:
6155 * This is reasonably steep, but branch mispredict costs are
6156 * high on modern cpus, so consider failing only if optimizing
6159 * %%% Parameterize branch_cost on the tuning architecture, then
6160 * use that. The 80386 couldn't care less about mispredicts.
6163 if (!optimize_size && !TARGET_CMOVE)
6169 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6171 /* We may be reversing unordered compare to normal compare,
6172 that is not valid in general (we may convert non-trapping
6173 condition to trapping one), however on i386 we currently
6174 emit all comparisons unordered. */
6175 compare_code = reverse_condition_maybe_unordered (compare_code);
6176 code = reverse_condition_maybe_unordered (code);
6180 compare_code = reverse_condition (compare_code);
6181 code = reverse_condition (code);
6185 out = emit_store_flag (out, code, ix86_compare_op0,
6186 ix86_compare_op1, VOIDmode, 0, 1);
6188 emit_insn (gen_addsi3 (out, out, constm1_rtx));
6189 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
6191 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6192 if (out != operands[0])
6193 emit_move_insn (operands[0], out);
6195 return 1; /* DONE */
6201 /* Try a few things more with specific constants and a variable. */
6204 rtx var, orig_out, out, tmp;
6207 return 0; /* FAIL */
6209 /* If one of the two operands is an interesting constant, load a
6210 constant with the above and mask it in with a logical operation. */
6212 if (GET_CODE (operands[2]) == CONST_INT)
6215 if (INTVAL (operands[2]) == 0)
6216 operands[3] = constm1_rtx, op = and_optab;
6217 else if (INTVAL (operands[2]) == -1)
6218 operands[3] = const0_rtx, op = ior_optab;
6220 return 0; /* FAIL */
6222 else if (GET_CODE (operands[3]) == CONST_INT)
6225 if (INTVAL (operands[3]) == 0)
6226 operands[2] = constm1_rtx, op = and_optab;
6227 else if (INTVAL (operands[3]) == -1)
6228 operands[2] = const0_rtx, op = ior_optab;
6230 return 0; /* FAIL */
6233 return 0; /* FAIL */
6235 orig_out = operands[0];
6236 tmp = gen_reg_rtx (GET_MODE (orig_out));
6239 /* Recurse to get the constant loaded. */
6240 if (ix86_expand_int_movcc (operands) == 0)
6241 return 0; /* FAIL */
6243 /* Mask in the interesting variable. */
6244 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6246 if (out != orig_out)
6247 emit_move_insn (orig_out, out);
6249 return 1; /* DONE */
6253 * For comparison with above,
6263 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6264 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6265 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6266 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6268 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6270 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6271 emit_move_insn (tmp, operands[3]);
6274 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6276 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6277 emit_move_insn (tmp, operands[2]);
6281 emit_insn (compare_seq);
6282 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6283 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6284 compare_op, operands[2],
6287 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6288 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6293 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6294 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6299 return 1; /* DONE */
6303 ix86_expand_fp_movcc (operands)
6308 rtx compare_op, second_test, bypass_test;
6310 /* For SF/DFmode conditional moves based on comparisons
6311 in same mode, we may want to use SSE min/max instructions. */
6312 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6313 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6314 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6315 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6317 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
6318 /* We may be called from the post-reload splitter. */
6319 && (!REG_P (operands[0])
6320 || SSE_REG_P (operands[0])
6321 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
6323 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6324 code = GET_CODE (operands[1]);
6326 /* See if we have (cross) match between comparison operands and
6327 conditional move operands. */
6328 if (rtx_equal_p (operands[2], op1))
6333 code = reverse_condition_maybe_unordered (code);
6335 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6337 /* Check for min operation. */
6340 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6341 if (memory_operand (op0, VOIDmode))
6342 op0 = force_reg (GET_MODE (operands[0]), op0);
6343 if (GET_MODE (operands[0]) == SFmode)
6344 emit_insn (gen_minsf3 (operands[0], op0, op1));
6346 emit_insn (gen_mindf3 (operands[0], op0, op1));
6349 /* Check for max operation. */
6352 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6353 if (memory_operand (op0, VOIDmode))
6354 op0 = force_reg (GET_MODE (operands[0]), op0);
6355 if (GET_MODE (operands[0]) == SFmode)
6356 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6358 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6362 /* Manage condition to be sse_comparison_operator. In case we are
6363 in non-ieee mode, try to canonicalize the destination operand
6364 to be first in the comparison - this helps reload to avoid extra
6366 if (!sse_comparison_operator (operands[1], VOIDmode)
6367 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6369 rtx tmp = ix86_compare_op0;
6370 ix86_compare_op0 = ix86_compare_op1;
6371 ix86_compare_op1 = tmp;
6372 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6373 VOIDmode, ix86_compare_op0,
6376 /* Similary try to manage result to be first operand of conditional
6377 move. We also don't support the NE comparison on SSE, so try to
6379 if (rtx_equal_p (operands[0], operands[3])
6380 || GET_CODE (operands[1]) == NE)
6382 rtx tmp = operands[2];
6383 operands[2] = operands[3];
6385 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6386 (GET_CODE (operands[1])),
6387 VOIDmode, ix86_compare_op0,
6390 if (GET_MODE (operands[0]) == SFmode)
6391 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6392 operands[2], operands[3],
6393 ix86_compare_op0, ix86_compare_op1));
6395 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6396 operands[2], operands[3],
6397 ix86_compare_op0, ix86_compare_op1));
6401 /* The floating point conditional move instructions don't directly
6402 support conditions resulting from a signed integer comparison. */
6404 code = GET_CODE (operands[1]);
6405 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6407 /* The floating point conditional move instructions don't directly
6408 support signed integer comparisons. */
6410 if (!fcmov_comparison_operator (compare_op, VOIDmode))
6412 if (second_test != NULL || bypass_test != NULL)
6414 tmp = gen_reg_rtx (QImode);
6415 ix86_expand_setcc (code, tmp);
6417 ix86_compare_op0 = tmp;
6418 ix86_compare_op1 = const0_rtx;
6419 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6421 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6423 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6424 emit_move_insn (tmp, operands[3]);
6427 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6429 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6430 emit_move_insn (tmp, operands[2]);
6434 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6435 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6440 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6441 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6446 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6447 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6455 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6456 works for floating pointer parameters and nonoffsetable memories.
6457 For pushes, it returns just stack offsets; the values will be saved
6458 in the right order. Maximally three parts are generated. */
6461 ix86_split_to_parts (operand, parts, mode)
6464 enum machine_mode mode;
6466 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
6468 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6470 if (size < 2 || size > 3)
6473 /* Optimize constant pool reference to immediates. This is used by fp moves,
6474 that force all constants to memory to allow combining. */
6476 if (GET_CODE (operand) == MEM
6477 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6478 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6479 operand = get_pool_constant (XEXP (operand, 0));
6481 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
6483 /* The only non-offsetable memories we handle are pushes. */
6484 if (! push_operand (operand, VOIDmode))
6487 PUT_MODE (operand, SImode);
6488 parts[0] = parts[1] = parts[2] = operand;
6493 split_di (&operand, 1, &parts[0], &parts[1]);
6496 if (REG_P (operand))
6498 if (!reload_completed)
6500 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6501 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6503 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6505 else if (offsettable_memref_p (operand))
6507 PUT_MODE (operand, SImode);
6509 parts[1] = adj_offsettable_operand (operand, 4);
6511 parts[2] = adj_offsettable_operand (operand, 8);
6513 else if (GET_CODE (operand) == CONST_DOUBLE)
6518 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6523 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6524 parts[2] = GEN_INT (l[2]);
6527 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6532 parts[1] = GEN_INT (l[1]);
6533 parts[0] = GEN_INT (l[0]);
6543 /* Emit insns to perform a move or push of DI, DF, and XF values.
6544 Return false when normal moves are needed; true when all required
6545 insns have been emitted. Operands 2-4 contain the input values
6546 int the correct order; operands 5-7 contain the output values. */
6549 ix86_split_long_move (operands1)
6558 /* Make our own copy to avoid clobbering the operands. */
6559 operands[0] = copy_rtx (operands1[0]);
6560 operands[1] = copy_rtx (operands1[1]);
6562 /* The only non-offsettable memory we handle is push. */
6563 if (push_operand (operands[0], VOIDmode))
6565 else if (GET_CODE (operands[0]) == MEM
6566 && ! offsettable_memref_p (operands[0]))
6569 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
6570 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6572 /* When emitting push, take care for source operands on the stack. */
6573 if (push && GET_CODE (operands[1]) == MEM
6574 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6577 part[1][1] = part[1][2];
6578 part[1][0] = part[1][1];
6581 /* We need to do copy in the right order in case an address register
6582 of the source overlaps the destination. */
6583 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6585 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6587 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6590 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6593 /* Collision in the middle part can be handled by reordering. */
6594 if (collisions == 1 && size == 3
6595 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6598 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6599 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6602 /* If there are more collisions, we can't handle it by reordering.
6603 Do an lea to the last part and use only one colliding move. */
6604 else if (collisions > 1)
6607 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6608 XEXP (part[1][0], 0)));
6609 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6610 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6612 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6620 /* We use only first 12 bytes of TFmode value, but for pushing we
6621 are required to adjust stack as if we were pushing real 16byte
6623 if (GET_MODE (operands1[0]) == TFmode)
6624 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6626 emit_insn (gen_push (part[1][2]));
6628 emit_insn (gen_push (part[1][1]));
6629 emit_insn (gen_push (part[1][0]));
6633 /* Choose correct order to not overwrite the source before it is copied. */
6634 if ((REG_P (part[0][0])
6635 && REG_P (part[1][1])
6636 && (REGNO (part[0][0]) == REGNO (part[1][1])
6638 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6640 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6644 operands1[2] = part[0][2];
6645 operands1[3] = part[0][1];
6646 operands1[4] = part[0][0];
6647 operands1[5] = part[1][2];
6648 operands1[6] = part[1][1];
6649 operands1[7] = part[1][0];
6653 operands1[2] = part[0][1];
6654 operands1[3] = part[0][0];
6655 operands1[5] = part[1][1];
6656 operands1[6] = part[1][0];
6663 operands1[2] = part[0][0];
6664 operands1[3] = part[0][1];
6665 operands1[4] = part[0][2];
6666 operands1[5] = part[1][0];
6667 operands1[6] = part[1][1];
6668 operands1[7] = part[1][2];
6672 operands1[2] = part[0][0];
6673 operands1[3] = part[0][1];
6674 operands1[5] = part[1][0];
6675 operands1[6] = part[1][1];
6683 ix86_split_ashldi (operands, scratch)
6684 rtx *operands, scratch;
6686 rtx low[2], high[2];
6689 if (GET_CODE (operands[2]) == CONST_INT)
6691 split_di (operands, 2, low, high);
6692 count = INTVAL (operands[2]) & 63;
6696 emit_move_insn (high[0], low[1]);
6697 emit_move_insn (low[0], const0_rtx);
6700 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6704 if (!rtx_equal_p (operands[0], operands[1]))
6705 emit_move_insn (operands[0], operands[1]);
6706 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6707 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6712 if (!rtx_equal_p (operands[0], operands[1]))
6713 emit_move_insn (operands[0], operands[1]);
6715 split_di (operands, 1, low, high);
6717 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6718 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
6720 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6722 if (! no_new_pseudos)
6723 scratch = force_reg (SImode, const0_rtx);
6725 emit_move_insn (scratch, const0_rtx);
6727 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6731 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6736 ix86_split_ashrdi (operands, scratch)
6737 rtx *operands, scratch;
6739 rtx low[2], high[2];
6742 if (GET_CODE (operands[2]) == CONST_INT)
6744 split_di (operands, 2, low, high);
6745 count = INTVAL (operands[2]) & 63;
6749 emit_move_insn (low[0], high[1]);
6751 if (! reload_completed)
6752 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6755 emit_move_insn (high[0], low[0]);
6756 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6760 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6764 if (!rtx_equal_p (operands[0], operands[1]))
6765 emit_move_insn (operands[0], operands[1]);
6766 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6767 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6772 if (!rtx_equal_p (operands[0], operands[1]))
6773 emit_move_insn (operands[0], operands[1]);
6775 split_di (operands, 1, low, high);
6777 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6778 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6780 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6782 if (! no_new_pseudos)
6783 scratch = gen_reg_rtx (SImode);
6784 emit_move_insn (scratch, high[0]);
6785 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6786 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6790 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
6795 ix86_split_lshrdi (operands, scratch)
6796 rtx *operands, scratch;
6798 rtx low[2], high[2];
6801 if (GET_CODE (operands[2]) == CONST_INT)
6803 split_di (operands, 2, low, high);
6804 count = INTVAL (operands[2]) & 63;
6808 emit_move_insn (low[0], high[1]);
6809 emit_move_insn (high[0], const0_rtx);
6812 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6816 if (!rtx_equal_p (operands[0], operands[1]))
6817 emit_move_insn (operands[0], operands[1]);
6818 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6819 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6824 if (!rtx_equal_p (operands[0], operands[1]))
6825 emit_move_insn (operands[0], operands[1]);
6827 split_di (operands, 1, low, high);
6829 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6830 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6832 /* Heh. By reversing the arguments, we can reuse this pattern. */
6833 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6835 if (! no_new_pseudos)
6836 scratch = force_reg (SImode, const0_rtx);
6838 emit_move_insn (scratch, const0_rtx);
6840 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6844 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6848 /* Expand the appropriate insns for doing strlen if not just doing
6851 out = result, initialized with the start address
6852 align_rtx = alignment of the address.
6853 scratch = scratch register, initialized with the startaddress when
6854 not aligned, otherwise undefined
6856 This is just the body. It needs the initialisations mentioned above and
6857 some address computing at the end. These things are done in i386.md. */
6860 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6861 rtx out, align_rtx, scratch;
6865 rtx align_2_label = NULL_RTX;
6866 rtx align_3_label = NULL_RTX;
6867 rtx align_4_label = gen_label_rtx ();
6868 rtx end_0_label = gen_label_rtx ();
6870 rtx tmpreg = gen_reg_rtx (SImode);
6873 if (GET_CODE (align_rtx) == CONST_INT)
6874 align = INTVAL (align_rtx);
6876 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6878 /* Is there a known alignment and is it less than 4? */
6881 /* Is there a known alignment and is it not 2? */
6884 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6885 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6887 /* Leave just the 3 lower bits. */
6888 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6889 NULL_RTX, 0, OPTAB_WIDEN);
6891 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6892 SImode, 1, 0, align_4_label);
6893 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6894 SImode, 1, 0, align_2_label);
6895 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6896 SImode, 1, 0, align_3_label);
6900 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6901 check if is aligned to 4 - byte. */
6903 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6904 NULL_RTX, 0, OPTAB_WIDEN);
6906 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6907 SImode, 1, 0, align_4_label);
6910 mem = gen_rtx_MEM (QImode, out);
6912 /* Now compare the bytes. */
6914 /* Compare the first n unaligned byte on a byte per byte basis. */
6915 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6916 QImode, 1, 0, end_0_label);
6918 /* Increment the address. */
6919 emit_insn (gen_addsi3 (out, out, const1_rtx));
6921 /* Not needed with an alignment of 2 */
6924 emit_label (align_2_label);
6926 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6927 QImode, 1, 0, end_0_label);
6929 emit_insn (gen_addsi3 (out, out, const1_rtx));
6931 emit_label (align_3_label);
6934 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6935 QImode, 1, 0, end_0_label);
6937 emit_insn (gen_addsi3 (out, out, const1_rtx));
6940 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6941 align this loop. It gives only huge programs, but does not help to
6943 emit_label (align_4_label);
6945 mem = gen_rtx_MEM (SImode, out);
6946 emit_move_insn (scratch, mem);
6947 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6949 /* This formula yields a nonzero result iff one of the bytes is zero.
6950 This saves three branches inside loop and many cycles. */
6952 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6953 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6954 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6955 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6956 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6957 SImode, 1, 0, align_4_label);
6961 rtx reg = gen_reg_rtx (SImode);
6962 emit_move_insn (reg, tmpreg);
6963 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6965 /* If zero is not in the first two bytes, move two bytes forward. */
6966 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6967 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6968 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6969 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6970 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6973 /* Emit lea manually to avoid clobbering of flags. */
6974 emit_insn (gen_rtx_SET (SImode, reg,
6975 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6977 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6978 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6979 emit_insn (gen_rtx_SET (VOIDmode, out,
6980 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6987 rtx end_2_label = gen_label_rtx ();
6988 /* Is zero in the first two bytes? */
6990 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6991 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6992 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6993 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6994 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6996 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6997 JUMP_LABEL (tmp) = end_2_label;
6999 /* Not in the first two. Move two bytes forward. */
7000 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
7001 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
7003 emit_label (end_2_label);
7007 /* Avoid branch in fixing the byte. */
7008 tmpreg = gen_lowpart (QImode, tmpreg);
7009 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
7010 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
7012 emit_label (end_0_label);
7015 /* Clear stack slot assignments remembered from previous functions.
7016 This is called from INIT_EXPANDERS once before RTL is emitted for each
7020 ix86_init_machine_status (p)
7023 p->machine = (struct machine_function *)
7024 xcalloc (1, sizeof (struct machine_function));
7027 /* Mark machine specific bits of P for GC. */
7029 ix86_mark_machine_status (p)
7032 struct machine_function *machine = p->machine;
7033 enum machine_mode mode;
7039 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
7040 mode = (enum machine_mode) ((int) mode + 1))
7041 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
7042 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
7046 ix86_free_machine_status (p)
7053 /* Return a MEM corresponding to a stack slot with mode MODE.
7054 Allocate a new slot if necessary.
7056 The RTL for a function can have several slots available: N is
7057 which slot to use. */
7060 assign_386_stack_local (mode, n)
7061 enum machine_mode mode;
7064 if (n < 0 || n >= MAX_386_STACK_LOCALS)
7067 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
7068 ix86_stack_locals[(int) mode][n]
7069 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
7071 return ix86_stack_locals[(int) mode][n];
7074 /* Calculate the length of the memory address in the instruction
7075 encoding. Does not include the one-byte modrm, opcode, or prefix. */
7078 memory_address_length (addr)
7081 struct ix86_address parts;
7082 rtx base, index, disp;
7085 if (GET_CODE (addr) == PRE_DEC
7086 || GET_CODE (addr) == POST_INC)
7089 if (! ix86_decompose_address (addr, &parts))
7093 index = parts.index;
7097 /* Register Indirect. */
7098 if (base && !index && !disp)
7100 /* Special cases: ebp and esp need the two-byte modrm form. */
7101 if (addr == stack_pointer_rtx
7102 || addr == arg_pointer_rtx
7103 || addr == frame_pointer_rtx
7104 || addr == hard_frame_pointer_rtx)
7108 /* Direct Addressing. */
7109 else if (disp && !base && !index)
7114 /* Find the length of the displacement constant. */
7117 if (GET_CODE (disp) == CONST_INT
7118 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
7124 /* An index requires the two-byte modrm form. */
7132 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
7133 expect that insn have 8bit immediate alternative. */
7135 ix86_attr_length_immediate_default (insn, shortform)
7141 extract_insn_cached (insn);
7142 for (i = recog_data.n_operands - 1; i >= 0; --i)
7143 if (CONSTANT_P (recog_data.operand[i]))
7148 && GET_CODE (recog_data.operand[i]) == CONST_INT
7149 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
7153 switch (get_attr_mode (insn))
7165 fatal_insn ("Unknown insn mode", insn);
7171 /* Compute default value for "length_address" attribute. */
7173 ix86_attr_length_address_default (insn)
7177 extract_insn_cached (insn);
7178 for (i = recog_data.n_operands - 1; i >= 0; --i)
7179 if (GET_CODE (recog_data.operand[i]) == MEM)
7181 return memory_address_length (XEXP (recog_data.operand[i], 0));
7187 /* Return the maximum number of instructions a cpu can issue. */
7194 case PROCESSOR_PENTIUM:
7198 case PROCESSOR_PENTIUMPRO:
7199 case PROCESSOR_PENTIUM4:
7200 case PROCESSOR_ATHLON:
7208 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
7209 by DEP_INSN and nothing set by DEP_INSN. */
7212 ix86_flags_dependant (insn, dep_insn, insn_type)
7214 enum attr_type insn_type;
7218 /* Simplify the test for uninteresting insns. */
7219 if (insn_type != TYPE_SETCC
7220 && insn_type != TYPE_ICMOV
7221 && insn_type != TYPE_FCMOV
7222 && insn_type != TYPE_IBR)
7225 if ((set = single_set (dep_insn)) != 0)
7227 set = SET_DEST (set);
7230 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
7231 && XVECLEN (PATTERN (dep_insn), 0) == 2
7232 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
7233 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
7235 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7236 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7241 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
7244 /* This test is true if the dependant insn reads the flags but
7245 not any other potentially set register. */
7246 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
7249 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
7255 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
7256 address with operands set by DEP_INSN. */
7259 ix86_agi_dependant (insn, dep_insn, insn_type)
7261 enum attr_type insn_type;
7265 if (insn_type == TYPE_LEA)
7267 addr = PATTERN (insn);
7268 if (GET_CODE (addr) == SET)
7270 else if (GET_CODE (addr) == PARALLEL
7271 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
7272 addr = XVECEXP (addr, 0, 0);
7275 addr = SET_SRC (addr);
7280 extract_insn_cached (insn);
7281 for (i = recog_data.n_operands - 1; i >= 0; --i)
7282 if (GET_CODE (recog_data.operand[i]) == MEM)
7284 addr = XEXP (recog_data.operand[i], 0);
7291 return modified_in_p (addr, dep_insn);
7295 ix86_adjust_cost (insn, link, dep_insn, cost)
7296 rtx insn, link, dep_insn;
7299 enum attr_type insn_type, dep_insn_type;
7300 enum attr_memory memory;
7302 int dep_insn_code_number;
7304 /* Anti and output depenancies have zero cost on all CPUs. */
7305 if (REG_NOTE_KIND (link) != 0)
7308 dep_insn_code_number = recog_memoized (dep_insn);
7310 /* If we can't recognize the insns, we can't really do anything. */
7311 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
7314 insn_type = get_attr_type (insn);
7315 dep_insn_type = get_attr_type (dep_insn);
7317 /* Prologue and epilogue allocators can have a false dependency on ebp.
7318 This results in one cycle extra stall on Pentium prologue scheduling,
7319 so handle this important case manually. */
7320 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
7321 && dep_insn_type == TYPE_ALU
7322 && !reg_mentioned_p (stack_pointer_rtx, insn))
7327 case PROCESSOR_PENTIUM:
7328 /* Address Generation Interlock adds a cycle of latency. */
7329 if (ix86_agi_dependant (insn, dep_insn, insn_type))
7332 /* ??? Compares pair with jump/setcc. */
7333 if (ix86_flags_dependant (insn, dep_insn, insn_type))
7336 /* Floating point stores require value to be ready one cycle ealier. */
7337 if (insn_type == TYPE_FMOV
7338 && get_attr_memory (insn) == MEMORY_STORE
7339 && !ix86_agi_dependant (insn, dep_insn, insn_type))
7343 case PROCESSOR_PENTIUMPRO:
7344 /* Since we can't represent delayed latencies of load+operation,
7345 increase the cost here for non-imov insns. */
7346 if (dep_insn_type != TYPE_IMOV
7347 && dep_insn_type != TYPE_FMOV
7348 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7349 || memory == MEMORY_BOTH))
7352 /* INT->FP conversion is expensive. */
7353 if (get_attr_fp_int_src (dep_insn))
7356 /* There is one cycle extra latency between an FP op and a store. */
7357 if (insn_type == TYPE_FMOV
7358 && (set = single_set (dep_insn)) != NULL_RTX
7359 && (set2 = single_set (insn)) != NULL_RTX
7360 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7361 && GET_CODE (SET_DEST (set2)) == MEM)
7366 /* The esp dependency is resolved before the instruction is really
7368 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
7369 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
7372 /* Since we can't represent delayed latencies of load+operation,
7373 increase the cost here for non-imov insns. */
7374 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7375 || memory == MEMORY_BOTH)
7376 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
7378 /* INT->FP conversion is expensive. */
7379 if (get_attr_fp_int_src (dep_insn))
7383 case PROCESSOR_ATHLON:
7384 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
7385 || memory == MEMORY_BOTH)
7387 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
7402 struct ppro_sched_data
7405 int issued_this_cycle;
7410 ix86_safe_length (insn)
7413 if (recog_memoized (insn) >= 0)
7414 return get_attr_length(insn);
7420 ix86_safe_length_prefix (insn)
7423 if (recog_memoized (insn) >= 0)
7424 return get_attr_length(insn);
7429 static enum attr_memory
7430 ix86_safe_memory (insn)
7433 if (recog_memoized (insn) >= 0)
7434 return get_attr_memory(insn);
7436 return MEMORY_UNKNOWN;
7439 static enum attr_pent_pair
7440 ix86_safe_pent_pair (insn)
7443 if (recog_memoized (insn) >= 0)
7444 return get_attr_pent_pair(insn);
7446 return PENT_PAIR_NP;
7449 static enum attr_ppro_uops
7450 ix86_safe_ppro_uops (insn)
7453 if (recog_memoized (insn) >= 0)
7454 return get_attr_ppro_uops (insn);
7456 return PPRO_UOPS_MANY;
7460 ix86_dump_ppro_packet (dump)
7463 if (ix86_sched_data.ppro.decode[0])
7465 fprintf (dump, "PPRO packet: %d",
7466 INSN_UID (ix86_sched_data.ppro.decode[0]));
7467 if (ix86_sched_data.ppro.decode[1])
7468 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
7469 if (ix86_sched_data.ppro.decode[2])
7470 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
7475 /* We're beginning a new block. Initialize data structures as necessary. */
7478 ix86_sched_init (dump, sched_verbose)
7479 FILE *dump ATTRIBUTE_UNUSED;
7480 int sched_verbose ATTRIBUTE_UNUSED;
7482 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
7485 /* Shift INSN to SLOT, and shift everything else down. */
7488 ix86_reorder_insn (insnp, slot)
7495 insnp[0] = insnp[1];
7496 while (++insnp != slot);
7501 /* Find an instruction with given pairability and minimal amount of cycles
7502 lost by the fact that the CPU waits for both pipelines to finish before
7503 reading next instructions. Also take care that both instructions together
7504 can not exceed 7 bytes. */
7507 ix86_pent_find_pair (e_ready, ready, type, first)
7510 enum attr_pent_pair type;
7513 int mincycles, cycles;
7514 enum attr_pent_pair tmp;
7515 enum attr_memory memory;
7516 rtx *insnp, *bestinsnp = NULL;
7518 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
7521 memory = ix86_safe_memory (first);
7522 cycles = result_ready_cost (first);
7523 mincycles = INT_MAX;
7525 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
7526 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
7527 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
7529 enum attr_memory second_memory;
7530 int secondcycles, currentcycles;
7532 second_memory = ix86_safe_memory (*insnp);
7533 secondcycles = result_ready_cost (*insnp);
7534 currentcycles = abs (cycles - secondcycles);
7536 if (secondcycles >= 1 && cycles >= 1)
7538 /* Two read/modify/write instructions together takes two
7540 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7543 /* Read modify/write instruction followed by read/modify
7544 takes one cycle longer. */
7545 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7546 && tmp != PENT_PAIR_UV
7547 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7550 if (currentcycles < mincycles)
7551 bestinsnp = insnp, mincycles = currentcycles;
7557 /* Subroutines of ix86_sched_reorder. */
7560 ix86_sched_reorder_pentium (ready, e_ready)
7564 enum attr_pent_pair pair1, pair2;
7567 /* This wouldn't be necessary if Haifa knew that static insn ordering
7568 is important to which pipe an insn is issued to. So we have to make
7569 some minor rearrangements. */
7571 pair1 = ix86_safe_pent_pair (*e_ready);
7573 /* If the first insn is non-pairable, let it be. */
7574 if (pair1 == PENT_PAIR_NP)
7577 pair2 = PENT_PAIR_NP;
7580 /* If the first insn is UV or PV pairable, search for a PU
7582 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
7584 insnp = ix86_pent_find_pair (e_ready-1, ready,
7585 PENT_PAIR_PU, *e_ready);
7587 pair2 = PENT_PAIR_PU;
7590 /* If the first insn is PU or UV pairable, search for a PV
7592 if (pair2 == PENT_PAIR_NP
7593 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7595 insnp = ix86_pent_find_pair (e_ready-1, ready,
7596 PENT_PAIR_PV, *e_ready);
7598 pair2 = PENT_PAIR_PV;
7601 /* If the first insn is pairable, search for a UV
7603 if (pair2 == PENT_PAIR_NP)
7605 insnp = ix86_pent_find_pair (e_ready-1, ready,
7606 PENT_PAIR_UV, *e_ready);
7608 pair2 = PENT_PAIR_UV;
7611 if (pair2 == PENT_PAIR_NP)
7614 /* Found something! Decide if we need to swap the order. */
7615 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7616 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7617 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7618 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7619 ix86_reorder_insn (insnp, e_ready);
7621 ix86_reorder_insn (insnp, e_ready - 1);
7625 ix86_sched_reorder_ppro (ready, e_ready)
7630 enum attr_ppro_uops cur_uops;
7631 int issued_this_cycle;
7635 /* At this point .ppro.decode contains the state of the three
7636 decoders from last "cycle". That is, those insns that were
7637 actually independent. But here we're scheduling for the
7638 decoder, and we may find things that are decodable in the
7641 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
7642 issued_this_cycle = 0;
7645 cur_uops = ix86_safe_ppro_uops (*insnp);
7647 /* If the decoders are empty, and we've a complex insn at the
7648 head of the priority queue, let it issue without complaint. */
7649 if (decode[0] == NULL)
7651 if (cur_uops == PPRO_UOPS_MANY)
7657 /* Otherwise, search for a 2-4 uop unsn to issue. */
7658 while (cur_uops != PPRO_UOPS_FEW)
7662 cur_uops = ix86_safe_ppro_uops (*--insnp);
7665 /* If so, move it to the head of the line. */
7666 if (cur_uops == PPRO_UOPS_FEW)
7667 ix86_reorder_insn (insnp, e_ready);
7669 /* Issue the head of the queue. */
7670 issued_this_cycle = 1;
7671 decode[0] = *e_ready--;
7674 /* Look for simple insns to fill in the other two slots. */
7675 for (i = 1; i < 3; ++i)
7676 if (decode[i] == NULL)
7678 if (ready >= e_ready)
7682 cur_uops = ix86_safe_ppro_uops (*insnp);
7683 while (cur_uops != PPRO_UOPS_ONE)
7687 cur_uops = ix86_safe_ppro_uops (*--insnp);
7690 /* Found one. Move it to the head of the queue and issue it. */
7691 if (cur_uops == PPRO_UOPS_ONE)
7693 ix86_reorder_insn (insnp, e_ready);
7694 decode[i] = *e_ready--;
7695 issued_this_cycle++;
7699 /* ??? Didn't find one. Ideally, here we would do a lazy split
7700 of 2-uop insns, issue one and queue the other. */
7704 if (issued_this_cycle == 0)
7705 issued_this_cycle = 1;
7706 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7709 /* We are about to being issuing insns for this clock cycle.
7710 Override the default sort algorithm to better slot instructions. */
7712 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7713 FILE *dump ATTRIBUTE_UNUSED;
7714 int sched_verbose ATTRIBUTE_UNUSED;
7717 int clock_var ATTRIBUTE_UNUSED;
7719 rtx *e_ready = ready + n_ready - 1;
7729 case PROCESSOR_PENTIUM:
7730 ix86_sched_reorder_pentium (ready, e_ready);
7733 case PROCESSOR_PENTIUMPRO:
7734 ix86_sched_reorder_ppro (ready, e_ready);
7739 return ix86_issue_rate ();
7742 /* We are about to issue INSN. Return the number of insns left on the
7743 ready queue that can be issued this cycle. */
7746 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7756 return can_issue_more - 1;
7758 case PROCESSOR_PENTIUMPRO:
7760 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
7762 if (uops == PPRO_UOPS_MANY)
7765 ix86_dump_ppro_packet (dump);
7766 ix86_sched_data.ppro.decode[0] = insn;
7767 ix86_sched_data.ppro.decode[1] = NULL;
7768 ix86_sched_data.ppro.decode[2] = NULL;
7770 ix86_dump_ppro_packet (dump);
7771 ix86_sched_data.ppro.decode[0] = NULL;
7773 else if (uops == PPRO_UOPS_FEW)
7776 ix86_dump_ppro_packet (dump);
7777 ix86_sched_data.ppro.decode[0] = insn;
7778 ix86_sched_data.ppro.decode[1] = NULL;
7779 ix86_sched_data.ppro.decode[2] = NULL;
7783 for (i = 0; i < 3; ++i)
7784 if (ix86_sched_data.ppro.decode[i] == NULL)
7786 ix86_sched_data.ppro.decode[i] = insn;
7794 ix86_dump_ppro_packet (dump);
7795 ix86_sched_data.ppro.decode[0] = NULL;
7796 ix86_sched_data.ppro.decode[1] = NULL;
7797 ix86_sched_data.ppro.decode[2] = NULL;
7801 return --ix86_sched_data.ppro.issued_this_cycle;
7805 /* Walk through INSNS and look for MEM references whose address is DSTREG or
7806 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
7810 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
7812 rtx dstref, srcref, dstreg, srcreg;
7816 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
7818 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
7822 /* Subroutine of above to actually do the updating by recursively walking
7826 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
7828 rtx dstref, srcref, dstreg, srcreg;
7830 enum rtx_code code = GET_CODE (x);
7831 const char *format_ptr = GET_RTX_FORMAT (code);
7834 if (code == MEM && XEXP (x, 0) == dstreg)
7835 MEM_COPY_ATTRIBUTES (x, dstref);
7836 else if (code == MEM && XEXP (x, 0) == srcreg)
7837 MEM_COPY_ATTRIBUTES (x, srcref);
7839 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
7841 if (*format_ptr == 'e')
7842 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
7844 else if (*format_ptr == 'E')
7845 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7846 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
7851 /* Compute the alignment given to a constant that is being placed in memory.
7852 EXP is the constant and ALIGN is the alignment that the object would
7854 The value of this function is used instead of that alignment to align
7858 ix86_constant_alignment (exp, align)
7862 if (TREE_CODE (exp) == REAL_CST)
7864 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7866 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7869 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7876 /* Compute the alignment for a static variable.
7877 TYPE is the data type, and ALIGN is the alignment that
7878 the object would ordinarily have. The value of this function is used
7879 instead of that alignment to align the object. */
7882 ix86_data_alignment (type, align)
7886 if (AGGREGATE_TYPE_P (type)
7888 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7889 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7890 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7893 if (TREE_CODE (type) == ARRAY_TYPE)
7895 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7897 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7900 else if (TREE_CODE (type) == COMPLEX_TYPE)
7903 if (TYPE_MODE (type) == DCmode && align < 64)
7905 if (TYPE_MODE (type) == XCmode && align < 128)
7908 else if ((TREE_CODE (type) == RECORD_TYPE
7909 || TREE_CODE (type) == UNION_TYPE
7910 || TREE_CODE (type) == QUAL_UNION_TYPE)
7911 && TYPE_FIELDS (type))
7913 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7915 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7918 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7919 || TREE_CODE (type) == INTEGER_TYPE)
7921 if (TYPE_MODE (type) == DFmode && align < 64)
7923 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7930 /* Compute the alignment for a local variable.
7931 TYPE is the data type, and ALIGN is the alignment that
7932 the object would ordinarily have. The value of this macro is used
7933 instead of that alignment to align the object. */
7936 ix86_local_alignment (type, align)
7940 if (TREE_CODE (type) == ARRAY_TYPE)
7942 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7944 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7947 else if (TREE_CODE (type) == COMPLEX_TYPE)
7949 if (TYPE_MODE (type) == DCmode && align < 64)
7951 if (TYPE_MODE (type) == XCmode && align < 128)
7954 else if ((TREE_CODE (type) == RECORD_TYPE
7955 || TREE_CODE (type) == UNION_TYPE
7956 || TREE_CODE (type) == QUAL_UNION_TYPE)
7957 && TYPE_FIELDS (type))
7959 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7961 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7964 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7965 || TREE_CODE (type) == INTEGER_TYPE)
7968 if (TYPE_MODE (type) == DFmode && align < 64)
7970 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7976 #define def_builtin(NAME, TYPE, CODE) \
7977 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7978 struct builtin_description
7980 enum insn_code icode;
7982 enum ix86_builtins code;
7983 enum rtx_code comparison;
7987 static struct builtin_description bdesc_comi[] =
7989 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7990 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7991 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7992 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7993 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7994 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7995 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7996 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7997 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7998 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7999 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
8000 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
8003 static struct builtin_description bdesc_2arg[] =
8006 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
8007 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
8008 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
8009 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
8010 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
8011 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
8012 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
8013 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
8015 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
8016 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
8017 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
8018 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
8019 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
8020 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
8021 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
8022 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
8023 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
8024 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
8025 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
8026 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
8027 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
8028 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
8029 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
8030 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
8031 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
8032 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
8033 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
8034 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
8035 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
8036 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
8037 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
8038 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
8040 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
8041 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
8042 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
8043 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
8045 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
8046 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
8047 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
8048 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
8050 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
8051 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
8052 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
8053 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
8054 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
8057 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
8058 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
8059 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
8060 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
8061 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
8062 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
8064 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
8065 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
8066 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
8067 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
8068 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
8069 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
8070 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
8071 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
8073 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
8074 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
8075 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
8077 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
8078 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
8079 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
8080 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
8082 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
8083 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
8085 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
8086 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
8087 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
8088 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
8089 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
8090 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
8092 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
8093 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
8094 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
8095 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
8097 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
8098 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
8099 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
8100 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
8101 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
8102 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
8105 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
8106 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
8107 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
8109 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
8110 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
8112 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
8113 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
8114 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
8115 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
8116 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
8117 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
8119 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
8120 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
8121 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
8122 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
8123 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
8124 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
8126 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
8127 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
8128 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
8129 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
8131 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
8132 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
8136 static struct builtin_description bdesc_1arg[] =
8138 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
8139 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
8141 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
8142 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
8143 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
8145 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
8146 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
8147 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
8148 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
8152 /* Expand all the target specific builtins. This is not called if TARGET_MMX
8153 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
8156 ix86_init_builtins ()
8158 struct builtin_description * d;
8160 tree endlink = void_list_node;
8162 tree pchar_type_node = build_pointer_type (char_type_node);
8163 tree pfloat_type_node = build_pointer_type (float_type_node);
8164 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
8165 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
8168 tree int_ftype_v4sf_v4sf
8169 = build_function_type (integer_type_node,
8170 tree_cons (NULL_TREE, V4SF_type_node,
8171 tree_cons (NULL_TREE,
8174 tree v4si_ftype_v4sf_v4sf
8175 = build_function_type (V4SI_type_node,
8176 tree_cons (NULL_TREE, V4SF_type_node,
8177 tree_cons (NULL_TREE,
8180 /* MMX/SSE/integer conversions. */
8181 tree int_ftype_v4sf_int
8182 = build_function_type (integer_type_node,
8183 tree_cons (NULL_TREE, V4SF_type_node,
8184 tree_cons (NULL_TREE,
8188 = build_function_type (integer_type_node,
8189 tree_cons (NULL_TREE, V4SF_type_node,
8192 = build_function_type (integer_type_node,
8193 tree_cons (NULL_TREE, V8QI_type_node,
8196 = build_function_type (integer_type_node,
8197 tree_cons (NULL_TREE, V2SI_type_node,
8200 = build_function_type (V2SI_type_node,
8201 tree_cons (NULL_TREE, integer_type_node,
8203 tree v4sf_ftype_v4sf_int
8204 = build_function_type (integer_type_node,
8205 tree_cons (NULL_TREE, V4SF_type_node,
8206 tree_cons (NULL_TREE, integer_type_node,
8208 tree v4sf_ftype_v4sf_v2si
8209 = build_function_type (V4SF_type_node,
8210 tree_cons (NULL_TREE, V4SF_type_node,
8211 tree_cons (NULL_TREE, V2SI_type_node,
8213 tree int_ftype_v4hi_int
8214 = build_function_type (integer_type_node,
8215 tree_cons (NULL_TREE, V4HI_type_node,
8216 tree_cons (NULL_TREE, integer_type_node,
8218 tree v4hi_ftype_v4hi_int_int
8219 = build_function_type (V4HI_type_node,
8220 tree_cons (NULL_TREE, V4HI_type_node,
8221 tree_cons (NULL_TREE, integer_type_node,
8222 tree_cons (NULL_TREE,
8225 /* Miscellaneous. */
8226 tree v8qi_ftype_v4hi_v4hi
8227 = build_function_type (V8QI_type_node,
8228 tree_cons (NULL_TREE, V4HI_type_node,
8229 tree_cons (NULL_TREE, V4HI_type_node,
8231 tree v4hi_ftype_v2si_v2si
8232 = build_function_type (V4HI_type_node,
8233 tree_cons (NULL_TREE, V2SI_type_node,
8234 tree_cons (NULL_TREE, V2SI_type_node,
8236 tree v4sf_ftype_v4sf_v4sf_int
8237 = build_function_type (V4SF_type_node,
8238 tree_cons (NULL_TREE, V4SF_type_node,
8239 tree_cons (NULL_TREE, V4SF_type_node,
8240 tree_cons (NULL_TREE,
8243 tree v4hi_ftype_v8qi_v8qi
8244 = build_function_type (V4HI_type_node,
8245 tree_cons (NULL_TREE, V8QI_type_node,
8246 tree_cons (NULL_TREE, V8QI_type_node,
8248 tree v2si_ftype_v4hi_v4hi
8249 = build_function_type (V2SI_type_node,
8250 tree_cons (NULL_TREE, V4HI_type_node,
8251 tree_cons (NULL_TREE, V4HI_type_node,
8253 tree v4hi_ftype_v4hi_int
8254 = build_function_type (V4HI_type_node,
8255 tree_cons (NULL_TREE, V4HI_type_node,
8256 tree_cons (NULL_TREE, integer_type_node,
8258 tree di_ftype_di_int
8259 = build_function_type (long_long_unsigned_type_node,
8260 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8261 tree_cons (NULL_TREE, integer_type_node,
8263 tree v8qi_ftype_v8qi_di
8264 = build_function_type (V8QI_type_node,
8265 tree_cons (NULL_TREE, V8QI_type_node,
8266 tree_cons (NULL_TREE,
8267 long_long_integer_type_node,
8269 tree v4hi_ftype_v4hi_di
8270 = build_function_type (V4HI_type_node,
8271 tree_cons (NULL_TREE, V4HI_type_node,
8272 tree_cons (NULL_TREE,
8273 long_long_integer_type_node,
8275 tree v2si_ftype_v2si_di
8276 = build_function_type (V2SI_type_node,
8277 tree_cons (NULL_TREE, V2SI_type_node,
8278 tree_cons (NULL_TREE,
8279 long_long_integer_type_node,
8281 tree void_ftype_void
8282 = build_function_type (void_type_node, endlink);
8283 tree void_ftype_pchar_int
8284 = build_function_type (void_type_node,
8285 tree_cons (NULL_TREE, pchar_type_node,
8286 tree_cons (NULL_TREE, integer_type_node,
8288 tree void_ftype_unsigned
8289 = build_function_type (void_type_node,
8290 tree_cons (NULL_TREE, unsigned_type_node,
8292 tree unsigned_ftype_void
8293 = build_function_type (unsigned_type_node, endlink);
8295 = build_function_type (long_long_unsigned_type_node, endlink);
8297 = build_function_type (intTI_type_node, endlink);
8298 tree v2si_ftype_v4sf
8299 = build_function_type (V2SI_type_node,
8300 tree_cons (NULL_TREE, V4SF_type_node,
8303 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
8304 tree_cons (NULL_TREE, V8QI_type_node,
8305 tree_cons (NULL_TREE,
8308 tree void_ftype_v8qi_v8qi_pchar
8309 = build_function_type (void_type_node, maskmovq_args);
8310 tree v4sf_ftype_pfloat
8311 = build_function_type (V4SF_type_node,
8312 tree_cons (NULL_TREE, pfloat_type_node,
8314 tree v4sf_ftype_float
8315 = build_function_type (V4SF_type_node,
8316 tree_cons (NULL_TREE, float_type_node,
8318 tree v4sf_ftype_float_float_float_float
8319 = build_function_type (V4SF_type_node,
8320 tree_cons (NULL_TREE, float_type_node,
8321 tree_cons (NULL_TREE, float_type_node,
8322 tree_cons (NULL_TREE,
8324 tree_cons (NULL_TREE,
8327 /* @@@ the type is bogus */
8328 tree v4sf_ftype_v4sf_pv2si
8329 = build_function_type (V4SF_type_node,
8330 tree_cons (NULL_TREE, V4SF_type_node,
8331 tree_cons (NULL_TREE, pv2si_type_node,
8333 tree v4sf_ftype_pv2si_v4sf
8334 = build_function_type (V4SF_type_node,
8335 tree_cons (NULL_TREE, V4SF_type_node,
8336 tree_cons (NULL_TREE, pv2si_type_node,
8338 tree void_ftype_pfloat_v4sf
8339 = build_function_type (void_type_node,
8340 tree_cons (NULL_TREE, pfloat_type_node,
8341 tree_cons (NULL_TREE, V4SF_type_node,
8343 tree void_ftype_pdi_di
8344 = build_function_type (void_type_node,
8345 tree_cons (NULL_TREE, pdi_type_node,
8346 tree_cons (NULL_TREE,
8347 long_long_unsigned_type_node,
8349 /* Normal vector unops. */
8350 tree v4sf_ftype_v4sf
8351 = build_function_type (V4SF_type_node,
8352 tree_cons (NULL_TREE, V4SF_type_node,
8355 /* Normal vector binops. */
8356 tree v4sf_ftype_v4sf_v4sf
8357 = build_function_type (V4SF_type_node,
8358 tree_cons (NULL_TREE, V4SF_type_node,
8359 tree_cons (NULL_TREE, V4SF_type_node,
8361 tree v8qi_ftype_v8qi_v8qi
8362 = build_function_type (V8QI_type_node,
8363 tree_cons (NULL_TREE, V8QI_type_node,
8364 tree_cons (NULL_TREE, V8QI_type_node,
8366 tree v4hi_ftype_v4hi_v4hi
8367 = build_function_type (V4HI_type_node,
8368 tree_cons (NULL_TREE, V4HI_type_node,
8369 tree_cons (NULL_TREE, V4HI_type_node,
8371 tree v2si_ftype_v2si_v2si
8372 = build_function_type (V2SI_type_node,
8373 tree_cons (NULL_TREE, V2SI_type_node,
8374 tree_cons (NULL_TREE, V2SI_type_node,
8377 = build_function_type (intTI_type_node,
8378 tree_cons (NULL_TREE, intTI_type_node,
8379 tree_cons (NULL_TREE, intTI_type_node,
8382 = build_function_type (long_long_unsigned_type_node,
8383 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8384 tree_cons (NULL_TREE,
8385 long_long_unsigned_type_node,
8388 /* Add all builtins that are more or less simple operations on two
8390 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8392 /* Use one of the operands; the target can have a different mode for
8393 mask-generating compares. */
8394 enum machine_mode mode;
8399 mode = insn_data[d->icode].operand[1].mode;
8401 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
8407 type = v4sf_ftype_v4sf_v4sf;
8410 type = v8qi_ftype_v8qi_v8qi;
8413 type = v4hi_ftype_v4hi_v4hi;
8416 type = v2si_ftype_v2si_v2si;
8419 type = ti_ftype_ti_ti;
8422 type = di_ftype_di_di;
8429 /* Override for comparisons. */
8430 if (d->icode == CODE_FOR_maskcmpv4sf3
8431 || d->icode == CODE_FOR_maskncmpv4sf3
8432 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8433 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8434 type = v4si_ftype_v4sf_v4sf;
8436 def_builtin (d->name, type, d->code);
8439 /* Add the remaining MMX insns with somewhat more complicated types. */
8440 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
8441 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
8442 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
8443 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
8444 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
8445 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
8446 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
8447 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
8448 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
8450 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
8451 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
8452 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
8454 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
8455 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
8457 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
8458 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
8460 /* Everything beyond this point is SSE only. */
8464 /* comi/ucomi insns. */
8465 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8466 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
8468 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
8469 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
8470 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
8472 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
8473 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
8474 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
8475 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
8476 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
8477 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
8479 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
8480 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
8482 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
8484 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
8485 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
8486 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
8487 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
8488 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
8489 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
8491 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
8492 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
8493 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
8494 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
8496 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
8497 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
8498 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
8499 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
8501 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
8502 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
8504 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
8506 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
8507 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8508 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
8509 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
8510 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
8511 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
8513 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
8515 /* Composite intrinsics. */
8516 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
8517 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
8518 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
8519 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
8520 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
8521 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
8522 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
8525 /* Errors in the source file can cause expand_expr to return const0_rtx
8526 where we expect a vector. To avoid crashing, use one of the vector
8527 clear instructions. */
8529 safe_vector_operand (x, mode)
8531 enum machine_mode mode;
8533 if (x != const0_rtx)
8535 x = gen_reg_rtx (mode);
8537 if (VALID_MMX_REG_MODE (mode))
8538 emit_insn (gen_mmx_clrdi (mode == DImode ? x
8539 : gen_rtx_SUBREG (DImode, x, 0)));
8541 emit_insn (gen_sse_clrti (mode == TImode ? x
8542 : gen_rtx_SUBREG (TImode, x, 0)));
8546 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
8549 ix86_expand_binop_builtin (icode, arglist, target)
8550 enum insn_code icode;
8555 tree arg0 = TREE_VALUE (arglist);
8556 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8557 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8558 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8559 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8560 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8561 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
8563 if (VECTOR_MODE_P (mode0))
8564 op0 = safe_vector_operand (op0, mode0);
8565 if (VECTOR_MODE_P (mode1))
8566 op1 = safe_vector_operand (op1, mode1);
8569 || GET_MODE (target) != tmode
8570 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8571 target = gen_reg_rtx (tmode);
8573 /* In case the insn wants input operands in modes different from
8574 the result, abort. */
8575 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8578 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8579 op0 = copy_to_mode_reg (mode0, op0);
8580 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8581 op1 = copy_to_mode_reg (mode1, op1);
8583 pat = GEN_FCN (icode) (target, op0, op1);
8590 /* Subroutine of ix86_expand_builtin to take care of stores. */
8593 ix86_expand_store_builtin (icode, arglist, shuffle)
8594 enum insn_code icode;
8599 tree arg0 = TREE_VALUE (arglist);
8600 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8601 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8602 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8603 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8604 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8606 if (VECTOR_MODE_P (mode1))
8607 op1 = safe_vector_operand (op1, mode1);
8609 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8610 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8611 op1 = copy_to_mode_reg (mode1, op1);
8613 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8614 pat = GEN_FCN (icode) (op0, op1);
8620 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
8623 ix86_expand_unop_builtin (icode, arglist, target, do_load)
8624 enum insn_code icode;
8630 tree arg0 = TREE_VALUE (arglist);
8631 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8632 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8633 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8636 || GET_MODE (target) != tmode
8637 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8638 target = gen_reg_rtx (tmode);
8640 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8643 if (VECTOR_MODE_P (mode0))
8644 op0 = safe_vector_operand (op0, mode0);
8646 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8647 op0 = copy_to_mode_reg (mode0, op0);
8650 pat = GEN_FCN (icode) (target, op0);
8657 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8658 sqrtss, rsqrtss, rcpss. */
8661 ix86_expand_unop1_builtin (icode, arglist, target)
8662 enum insn_code icode;
8667 tree arg0 = TREE_VALUE (arglist);
8668 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8669 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8670 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8673 || GET_MODE (target) != tmode
8674 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8675 target = gen_reg_rtx (tmode);
8677 if (VECTOR_MODE_P (mode0))
8678 op0 = safe_vector_operand (op0, mode0);
8680 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8681 op0 = copy_to_mode_reg (mode0, op0);
8683 pat = GEN_FCN (icode) (target, op0, op0);
8690 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8693 ix86_expand_sse_compare (d, arglist, target)
8694 struct builtin_description *d;
8699 tree arg0 = TREE_VALUE (arglist);
8700 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8701 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8702 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8704 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8705 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8706 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8707 enum rtx_code comparison = d->comparison;
8709 if (VECTOR_MODE_P (mode0))
8710 op0 = safe_vector_operand (op0, mode0);
8711 if (VECTOR_MODE_P (mode1))
8712 op1 = safe_vector_operand (op1, mode1);
8714 /* Swap operands if we have a comparison that isn't available in
8718 target = gen_reg_rtx (tmode);
8719 emit_move_insn (target, op1);
8722 comparison = swap_condition (comparison);
8725 || GET_MODE (target) != tmode
8726 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8727 target = gen_reg_rtx (tmode);
8729 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8730 op0 = copy_to_mode_reg (mode0, op0);
8731 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8732 op1 = copy_to_mode_reg (mode1, op1);
8734 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8735 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8742 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
8745 ix86_expand_sse_comi (d, arglist, target)
8746 struct builtin_description *d;
8751 tree arg0 = TREE_VALUE (arglist);
8752 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8753 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8754 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8756 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8757 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8758 enum rtx_code comparison = d->comparison;
8760 if (VECTOR_MODE_P (mode0))
8761 op0 = safe_vector_operand (op0, mode0);
8762 if (VECTOR_MODE_P (mode1))
8763 op1 = safe_vector_operand (op1, mode1);
8765 /* Swap operands if we have a comparison that isn't available in
8772 comparison = swap_condition (comparison);
8775 target = gen_reg_rtx (SImode);
8776 emit_move_insn (target, const0_rtx);
8777 target = gen_rtx_SUBREG (QImode, target, 0);
8779 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8780 op0 = copy_to_mode_reg (mode0, op0);
8781 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8782 op1 = copy_to_mode_reg (mode1, op1);
8784 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8785 pat = GEN_FCN (d->icode) (op0, op1, op2);
8789 emit_insn (gen_setcc_2 (target, op2));
8794 /* Expand an expression EXP that calls a built-in function,
8795 with result going to TARGET if that's convenient
8796 (and in mode MODE if that's convenient).
8797 SUBTARGET may be used as the target for computing one of EXP's operands.
8798 IGNORE is nonzero if the value is to be ignored. */
8801 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8804 rtx subtarget ATTRIBUTE_UNUSED;
8805 enum machine_mode mode ATTRIBUTE_UNUSED;
8806 int ignore ATTRIBUTE_UNUSED;
8808 struct builtin_description *d;
8810 enum insn_code icode;
8811 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8812 tree arglist = TREE_OPERAND (exp, 1);
8813 tree arg0, arg1, arg2, arg3;
8814 rtx op0, op1, op2, pat;
8815 enum machine_mode tmode, mode0, mode1, mode2;
8816 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8820 case IX86_BUILTIN_EMMS:
8821 emit_insn (gen_emms ());
8824 case IX86_BUILTIN_SFENCE:
8825 emit_insn (gen_sfence ());
8828 case IX86_BUILTIN_M_FROM_INT:
8829 target = gen_reg_rtx (DImode);
8830 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8831 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8834 case IX86_BUILTIN_M_TO_INT:
8835 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8836 op0 = copy_to_mode_reg (DImode, op0);
8837 target = gen_reg_rtx (SImode);
8838 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8841 case IX86_BUILTIN_PEXTRW:
8842 icode = CODE_FOR_mmx_pextrw;
8843 arg0 = TREE_VALUE (arglist);
8844 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8845 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8846 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8847 tmode = insn_data[icode].operand[0].mode;
8848 mode0 = insn_data[icode].operand[1].mode;
8849 mode1 = insn_data[icode].operand[2].mode;
8851 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8852 op0 = copy_to_mode_reg (mode0, op0);
8853 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8855 /* @@@ better error message */
8856 error ("selector must be an immediate");
8860 || GET_MODE (target) != tmode
8861 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8862 target = gen_reg_rtx (tmode);
8863 pat = GEN_FCN (icode) (target, op0, op1);
8869 case IX86_BUILTIN_PINSRW:
8870 icode = CODE_FOR_mmx_pinsrw;
8871 arg0 = TREE_VALUE (arglist);
8872 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8873 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8874 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8875 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8876 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8877 tmode = insn_data[icode].operand[0].mode;
8878 mode0 = insn_data[icode].operand[1].mode;
8879 mode1 = insn_data[icode].operand[2].mode;
8880 mode2 = insn_data[icode].operand[3].mode;
8882 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8883 op0 = copy_to_mode_reg (mode0, op0);
8884 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8885 op1 = copy_to_mode_reg (mode1, op1);
8886 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8888 /* @@@ better error message */
8889 error ("selector must be an immediate");
8893 || GET_MODE (target) != tmode
8894 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8895 target = gen_reg_rtx (tmode);
8896 pat = GEN_FCN (icode) (target, op0, op1, op2);
8902 case IX86_BUILTIN_MASKMOVQ:
8903 icode = CODE_FOR_mmx_maskmovq;
8904 /* Note the arg order is different from the operand order. */
8905 arg1 = TREE_VALUE (arglist);
8906 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8907 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8908 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8909 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8910 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8911 mode0 = insn_data[icode].operand[0].mode;
8912 mode1 = insn_data[icode].operand[1].mode;
8913 mode2 = insn_data[icode].operand[2].mode;
8915 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8916 op0 = copy_to_mode_reg (mode0, op0);
8917 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8918 op1 = copy_to_mode_reg (mode1, op1);
8919 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8920 op2 = copy_to_mode_reg (mode2, op2);
8921 pat = GEN_FCN (icode) (op0, op1, op2);
8927 case IX86_BUILTIN_SQRTSS:
8928 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8929 case IX86_BUILTIN_RSQRTSS:
8930 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8931 case IX86_BUILTIN_RCPSS:
8932 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8934 case IX86_BUILTIN_LOADAPS:
8935 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8937 case IX86_BUILTIN_LOADUPS:
8938 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8940 case IX86_BUILTIN_STOREAPS:
8941 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8942 case IX86_BUILTIN_STOREUPS:
8943 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8945 case IX86_BUILTIN_LOADSS:
8946 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8948 case IX86_BUILTIN_STORESS:
8949 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8951 case IX86_BUILTIN_LOADHPS:
8952 case IX86_BUILTIN_LOADLPS:
8953 icode = (fcode == IX86_BUILTIN_LOADHPS
8954 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8955 arg0 = TREE_VALUE (arglist);
8956 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8957 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8958 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8959 tmode = insn_data[icode].operand[0].mode;
8960 mode0 = insn_data[icode].operand[1].mode;
8961 mode1 = insn_data[icode].operand[2].mode;
8963 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8964 op0 = copy_to_mode_reg (mode0, op0);
8965 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8967 || GET_MODE (target) != tmode
8968 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8969 target = gen_reg_rtx (tmode);
8970 pat = GEN_FCN (icode) (target, op0, op1);
8976 case IX86_BUILTIN_STOREHPS:
8977 case IX86_BUILTIN_STORELPS:
8978 icode = (fcode == IX86_BUILTIN_STOREHPS
8979 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8980 arg0 = TREE_VALUE (arglist);
8981 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8982 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8983 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8984 mode0 = insn_data[icode].operand[1].mode;
8985 mode1 = insn_data[icode].operand[2].mode;
8987 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8988 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8989 op1 = copy_to_mode_reg (mode1, op1);
8991 pat = GEN_FCN (icode) (op0, op0, op1);
8997 case IX86_BUILTIN_MOVNTPS:
8998 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8999 case IX86_BUILTIN_MOVNTQ:
9000 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
9002 case IX86_BUILTIN_LDMXCSR:
9003 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
9004 target = assign_386_stack_local (SImode, 0);
9005 emit_move_insn (target, op0);
9006 emit_insn (gen_ldmxcsr (target));
9009 case IX86_BUILTIN_STMXCSR:
9010 target = assign_386_stack_local (SImode, 0);
9011 emit_insn (gen_stmxcsr (target));
9012 return copy_to_mode_reg (SImode, target);
9014 case IX86_BUILTIN_PREFETCH:
9015 icode = CODE_FOR_prefetch;
9016 arg0 = TREE_VALUE (arglist);
9017 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9018 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9019 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9020 mode0 = insn_data[icode].operand[0].mode;
9021 mode1 = insn_data[icode].operand[1].mode;
9023 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9025 /* @@@ better error message */
9026 error ("selector must be an immediate");
9030 op0 = copy_to_mode_reg (Pmode, op0);
9031 pat = GEN_FCN (icode) (op0, op1);
9037 case IX86_BUILTIN_SHUFPS:
9038 icode = CODE_FOR_sse_shufps;
9039 arg0 = TREE_VALUE (arglist);
9040 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9041 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9042 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9043 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9044 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9045 tmode = insn_data[icode].operand[0].mode;
9046 mode0 = insn_data[icode].operand[1].mode;
9047 mode1 = insn_data[icode].operand[2].mode;
9048 mode2 = insn_data[icode].operand[3].mode;
9050 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9051 op0 = copy_to_mode_reg (mode0, op0);
9052 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9053 op1 = copy_to_mode_reg (mode1, op1);
9054 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
9056 /* @@@ better error message */
9057 error ("mask must be an immediate");
9061 || GET_MODE (target) != tmode
9062 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9063 target = gen_reg_rtx (tmode);
9064 pat = GEN_FCN (icode) (target, op0, op1, op2);
9070 case IX86_BUILTIN_PSHUFW:
9071 icode = CODE_FOR_mmx_pshufw;
9072 arg0 = TREE_VALUE (arglist);
9073 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9074 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9075 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9076 tmode = insn_data[icode].operand[0].mode;
9077 mode0 = insn_data[icode].operand[2].mode;
9078 mode1 = insn_data[icode].operand[3].mode;
9080 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9081 op0 = copy_to_mode_reg (mode0, op0);
9082 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
9084 /* @@@ better error message */
9085 error ("mask must be an immediate");
9089 || GET_MODE (target) != tmode
9090 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9091 target = gen_reg_rtx (tmode);
9092 pat = GEN_FCN (icode) (target, target, op0, op1);
9098 /* Composite intrinsics. */
9099 case IX86_BUILTIN_SETPS1:
9100 target = assign_386_stack_local (SFmode, 0);
9101 arg0 = TREE_VALUE (arglist);
9102 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
9103 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
9104 op0 = gen_reg_rtx (V4SFmode);
9105 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
9106 XEXP (target, 0))));
9107 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
9110 case IX86_BUILTIN_SETPS:
9111 target = assign_386_stack_local (V4SFmode, 0);
9112 op0 = change_address (target, SFmode, XEXP (target, 0));
9113 arg0 = TREE_VALUE (arglist);
9114 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9115 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9116 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
9117 emit_move_insn (op0,
9118 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
9119 emit_move_insn (adj_offsettable_operand (op0, 4),
9120 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
9121 emit_move_insn (adj_offsettable_operand (op0, 8),
9122 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
9123 emit_move_insn (adj_offsettable_operand (op0, 12),
9124 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
9125 op0 = gen_reg_rtx (V4SFmode);
9126 emit_insn (gen_sse_movaps (op0, target));
9129 case IX86_BUILTIN_CLRPS:
9130 target = gen_reg_rtx (TImode);
9131 emit_insn (gen_sse_clrti (target));
9134 case IX86_BUILTIN_LOADRPS:
9135 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
9136 gen_reg_rtx (V4SFmode), 1);
9137 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
9140 case IX86_BUILTIN_LOADPS1:
9141 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
9142 gen_reg_rtx (V4SFmode), 1);
9143 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
9146 case IX86_BUILTIN_STOREPS1:
9147 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
9148 case IX86_BUILTIN_STORERPS:
9149 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
9151 case IX86_BUILTIN_MMX_ZERO:
9152 target = gen_reg_rtx (DImode);
9153 emit_insn (gen_mmx_clrdi (target));
9160 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9161 if (d->code == fcode)
9163 /* Compares are treated specially. */
9164 if (d->icode == CODE_FOR_maskcmpv4sf3
9165 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9166 || d->icode == CODE_FOR_maskncmpv4sf3
9167 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9168 return ix86_expand_sse_compare (d, arglist, target);
9170 return ix86_expand_binop_builtin (d->icode, arglist, target);
9173 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
9174 if (d->code == fcode)
9175 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
9177 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9178 if (d->code == fcode)
9179 return ix86_expand_sse_comi (d, arglist, target);
9181 /* @@@ Should really do something sensible here. */
9185 /* Store OPERAND to the memory after reload is completed. This means
9186 that we can't easilly use assign_stack_local. */
9188 ix86_force_to_memory (mode, operand)
9189 enum machine_mode mode;
9192 if (!reload_completed)
9199 split_di (&operand, 1, operands, operands+1);
9201 gen_rtx_SET (VOIDmode,
9202 gen_rtx_MEM (SImode,
9203 gen_rtx_PRE_DEC (Pmode,
9204 stack_pointer_rtx)),
9207 gen_rtx_SET (VOIDmode,
9208 gen_rtx_MEM (SImode,
9209 gen_rtx_PRE_DEC (Pmode,
9210 stack_pointer_rtx)),
9215 /* It is better to store HImodes as SImodes. */
9216 if (!TARGET_PARTIAL_REG_STALL)
9217 operand = gen_lowpart (SImode, operand);
9221 gen_rtx_SET (VOIDmode,
9222 gen_rtx_MEM (GET_MODE (operand),
9223 gen_rtx_PRE_DEC (SImode,
9224 stack_pointer_rtx)),
9230 return gen_rtx_MEM (mode, stack_pointer_rtx);
9233 /* Free operand from the memory. */
9235 ix86_free_from_memory (mode)
9236 enum machine_mode mode;
9238 /* Use LEA to deallocate stack space. In peephole2 it will be converted
9239 to pop or add instruction if registers are available. */
9240 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9241 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9242 GEN_INT (mode == DImode
9244 : mode == HImode && TARGET_PARTIAL_REG_STALL
9249 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
9250 QImode must go into class Q_REGS.
9251 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
9252 movdf to do mem-to-mem moves through integer regs. */
9254 ix86_preferred_reload_class (x, class)
9256 enum reg_class class;
9258 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
9260 /* SSE can't load any constant directly yet. */
9261 if (SSE_CLASS_P (class))
9263 /* Floats can load 0 and 1. */
9264 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
9266 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
9267 if (MAYBE_SSE_CLASS_P (class))
9268 return (reg_class_subset_p (class, GENERAL_REGS)
9269 ? GENERAL_REGS : FLOAT_REGS);
9273 /* General regs can load everything. */
9274 if (reg_class_subset_p (class, GENERAL_REGS))
9275 return GENERAL_REGS;
9276 /* In case we haven't resolved FLOAT or SSE yet, give up. */
9277 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
9280 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
9282 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
9287 /* If we are copying between general and FP registers, we need a memory
9288 location. The same is true for SSE and MMX registers.
9290 The macro can't work reliably when one of the CLASSES is class containing
9291 registers from multiple units (SSE, MMX, integer). We avoid this by never
9292 combining those units in single alternative in the machine description.
9293 Ensure that this constraint holds to avoid unexpected surprises.
9295 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
9296 enforce these sanity checks. */
9298 ix86_secondary_memory_needed (class1, class2, mode, strict)
9299 enum reg_class class1, class2;
9300 enum machine_mode mode;
9303 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
9304 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
9305 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
9306 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
9307 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
9308 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
9315 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
9316 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
9317 && (mode) != SImode)
9318 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9319 && (mode) != SImode));
9321 /* Return the cost of moving data from a register in class CLASS1 to
9322 one in class CLASS2.
9324 It is not required that the cost always equal 2 when FROM is the same as TO;
9325 on some machines it is expensive to move between registers if they are not
9326 general registers. */
9328 ix86_register_move_cost (mode, class1, class2)
9329 enum machine_mode mode;
9330 enum reg_class class1, class2;
9332 /* In case we require secondary memory, compute cost of the store followed
9333 by load. In case of copying from general_purpose_register we may emit
9334 multiple stores followed by single load causing memory size mismatch
9335 stall. Count this as arbitarily high cost of 20. */
9336 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
9338 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
9340 return (MEMORY_MOVE_COST (mode, class1, 0)
9341 + MEMORY_MOVE_COST (mode, class2, 1));
9343 /* Moves between SSE/MMX and integer unit are expensive.
9344 ??? We should make this cost CPU specific. */
9345 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9346 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
9347 return ix86_cost->mmxsse_to_integer;
9348 if (MAYBE_FLOAT_CLASS_P (class1))
9349 return ix86_cost->fp_move;
9350 if (MAYBE_SSE_CLASS_P (class1))
9351 return ix86_cost->sse_move;
9352 if (MAYBE_MMX_CLASS_P (class1))
9353 return ix86_cost->mmx_move;
9357 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
9359 ix86_hard_regno_mode_ok (regno, mode)
9361 enum machine_mode mode;
9363 /* Flags and only flags can only hold CCmode values. */
9364 if (CC_REGNO_P (regno))
9365 return GET_MODE_CLASS (mode) == MODE_CC;
9366 if (GET_MODE_CLASS (mode) == MODE_CC
9367 || GET_MODE_CLASS (mode) == MODE_RANDOM
9368 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
9370 if (FP_REGNO_P (regno))
9371 return VALID_FP_MODE_P (mode);
9372 if (SSE_REGNO_P (regno))
9373 return VALID_SSE_REG_MODE (mode);
9374 if (MMX_REGNO_P (regno))
9375 return VALID_MMX_REG_MODE (mode);
9376 /* We handle both integer and floats in the general purpose registers.
9377 In future we should be able to handle vector modes as well. */
9378 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
9380 /* Take care for QImode values - they can be in non-QI regs, but then
9381 they do cause partial register stalls. */
9382 if (regno < 4 || mode != QImode || TARGET_64BIT)
9384 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
9387 /* Return the cost of moving data of mode M between a
9388 register and memory. A value of 2 is the default; this cost is
9389 relative to those in `REGISTER_MOVE_COST'.
9391 If moving between registers and memory is more expensive than
9392 between two registers, you should define this macro to express the
9395 Model also increased moving costs of QImode registers in non
9399 ix86_memory_move_cost (mode, class, in)
9400 enum machine_mode mode;
9401 enum reg_class class;
9404 if (FLOAT_CLASS_P (class))
9422 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
9424 if (SSE_CLASS_P (class))
9427 switch (GET_MODE_SIZE (mode))
9441 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
9443 if (MMX_CLASS_P (class))
9446 switch (GET_MODE_SIZE (mode))
9457 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
9459 switch (GET_MODE_SIZE (mode))
9463 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
9464 : ix86_cost->movzbl_load);
9466 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
9467 : ix86_cost->int_store[0] + 4);
9470 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
9472 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
9475 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
9476 * (int) GET_MODE_SIZE (mode) / 4);