1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
50 /* Processor costs (relative to an add) */
52 struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 3, /* cost of movsx */
61 3, /* cost of movzx */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
85 /* Processor costs (relative to an add) */
87 struct processor_costs i386_cost = { /* 386 specific costs */
88 1, /* cost of an add instruction */
89 1, /* cost of a lea instruction */
90 3, /* variable shift costs */
91 2, /* constant shift costs */
92 6, /* cost of starting a multiply */
93 1, /* cost of multiply per each bit set */
94 23, /* cost of a divide/mod */
95 3, /* cost of movsx */
96 2, /* cost of movzx */
97 15, /* "large" insn */
99 4, /* cost for loading QImode using movzbl */
100 {2, 4, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 4, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {8, 8, 8}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {8, 8, 8}, /* cost of loading integer registers */
108 2, /* cost of moving MMX register */
109 {4, 8}, /* cost of loading MMX registers
110 in SImode and DImode */
111 {4, 8}, /* cost of storing MMX registers
112 in SImode and DImode */
113 2, /* cost of moving SSE register */
114 {4, 8, 16}, /* cost of loading SSE registers
115 in SImode, DImode and TImode */
116 {4, 8, 16}, /* cost of storing SSE registers
117 in SImode, DImode and TImode */
118 3, /* MMX or SSE register to integer */
122 struct processor_costs i486_cost = { /* 486 specific costs */
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 3, /* variable shift costs */
126 2, /* constant shift costs */
127 12, /* cost of starting a multiply */
128 1, /* cost of multiply per each bit set */
129 40, /* cost of a divide/mod */
130 3, /* cost of movsx */
131 2, /* cost of movzx */
132 15, /* "large" insn */
134 4, /* cost for loading QImode using movzbl */
135 {2, 4, 2}, /* cost of loading integer registers
136 in QImode, HImode and SImode.
137 Relative to reg-reg move (2). */
138 {2, 4, 2}, /* cost of storing integer registers */
139 2, /* cost of reg,reg fld/fst */
140 {8, 8, 8}, /* cost of loading fp registers
141 in SFmode, DFmode and XFmode */
142 {8, 8, 8}, /* cost of loading integer registers */
143 2, /* cost of moving MMX register */
144 {4, 8}, /* cost of loading MMX registers
145 in SImode and DImode */
146 {4, 8}, /* cost of storing MMX registers
147 in SImode and DImode */
148 2, /* cost of moving SSE register */
149 {4, 8, 16}, /* cost of loading SSE registers
150 in SImode, DImode and TImode */
151 {4, 8, 16}, /* cost of storing SSE registers
152 in SImode, DImode and TImode */
153 3 /* MMX or SSE register to integer */
157 struct processor_costs pentium_cost = {
158 1, /* cost of an add instruction */
159 1, /* cost of a lea instruction */
160 4, /* variable shift costs */
161 1, /* constant shift costs */
162 11, /* cost of starting a multiply */
163 0, /* cost of multiply per each bit set */
164 25, /* cost of a divide/mod */
165 3, /* cost of movsx */
166 2, /* cost of movzx */
167 8, /* "large" insn */
169 6, /* cost for loading QImode using movzbl */
170 {2, 4, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 4, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 6}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {4, 4, 6}, /* cost of loading integer registers */
178 8, /* cost of moving MMX register */
179 {8, 8}, /* cost of loading MMX registers
180 in SImode and DImode */
181 {8, 8}, /* cost of storing MMX registers
182 in SImode and DImode */
183 2, /* cost of moving SSE register */
184 {4, 8, 16}, /* cost of loading SSE registers
185 in SImode, DImode and TImode */
186 {4, 8, 16}, /* cost of storing SSE registers
187 in SImode, DImode and TImode */
188 3 /* MMX or SSE register to integer */
192 struct processor_costs pentiumpro_cost = {
193 1, /* cost of an add instruction */
194 1, /* cost of a lea instruction */
195 1, /* variable shift costs */
196 1, /* constant shift costs */
197 4, /* cost of starting a multiply */
198 0, /* cost of multiply per each bit set */
199 17, /* cost of a divide/mod */
200 1, /* cost of movsx */
201 1, /* cost of movzx */
202 8, /* "large" insn */
204 2, /* cost for loading QImode using movzbl */
205 {4, 4, 4}, /* cost of loading integer registers
206 in QImode, HImode and SImode.
207 Relative to reg-reg move (2). */
208 {2, 2, 2}, /* cost of storing integer registers */
209 2, /* cost of reg,reg fld/fst */
210 {2, 2, 6}, /* cost of loading fp registers
211 in SFmode, DFmode and XFmode */
212 {4, 4, 6}, /* cost of loading integer registers */
213 2, /* cost of moving MMX register */
214 {2, 2}, /* cost of loading MMX registers
215 in SImode and DImode */
216 {2, 2}, /* cost of storing MMX registers
217 in SImode and DImode */
218 2, /* cost of moving SSE register */
219 {2, 2, 8}, /* cost of loading SSE registers
220 in SImode, DImode and TImode */
221 {2, 2, 8}, /* cost of storing SSE registers
222 in SImode, DImode and TImode */
223 3 /* MMX or SSE register to integer */
227 struct processor_costs k6_cost = {
228 1, /* cost of an add instruction */
229 2, /* cost of a lea instruction */
230 1, /* variable shift costs */
231 1, /* constant shift costs */
232 3, /* cost of starting a multiply */
233 0, /* cost of multiply per each bit set */
234 18, /* cost of a divide/mod */
235 2, /* cost of movsx */
236 2, /* cost of movzx */
237 8, /* "large" insn */
239 3, /* cost for loading QImode using movzbl */
240 {4, 5, 4}, /* cost of loading integer registers
241 in QImode, HImode and SImode.
242 Relative to reg-reg move (2). */
243 {2, 3, 2}, /* cost of storing integer registers */
244 4, /* cost of reg,reg fld/fst */
245 {6, 6, 6}, /* cost of loading fp registers
246 in SFmode, DFmode and XFmode */
247 {4, 4, 4}, /* cost of loading integer registers */
248 2, /* cost of moving MMX register */
249 {2, 2}, /* cost of loading MMX registers
250 in SImode and DImode */
251 {2, 2}, /* cost of storing MMX registers
252 in SImode and DImode */
253 2, /* cost of moving SSE register */
254 {2, 2, 8}, /* cost of loading SSE registers
255 in SImode, DImode and TImode */
256 {2, 2, 8}, /* cost of storing SSE registers
257 in SImode, DImode and TImode */
258 6 /* MMX or SSE register to integer */
262 struct processor_costs athlon_cost = {
263 1, /* cost of an add instruction */
264 2, /* cost of a lea instruction */
265 1, /* variable shift costs */
266 1, /* constant shift costs */
267 5, /* cost of starting a multiply */
268 0, /* cost of multiply per each bit set */
269 42, /* cost of a divide/mod */
270 1, /* cost of movsx */
271 1, /* cost of movzx */
272 8, /* "large" insn */
274 4, /* cost for loading QImode using movzbl */
275 {4, 5, 4}, /* cost of loading integer registers
276 in QImode, HImode and SImode.
277 Relative to reg-reg move (2). */
278 {2, 3, 2}, /* cost of storing integer registers */
279 4, /* cost of reg,reg fld/fst */
280 {6, 6, 20}, /* cost of loading fp registers
281 in SFmode, DFmode and XFmode */
282 {4, 4, 16}, /* cost of loading integer registers */
283 2, /* cost of moving MMX register */
284 {2, 2}, /* cost of loading MMX registers
285 in SImode and DImode */
286 {2, 2}, /* cost of storing MMX registers
287 in SImode and DImode */
288 2, /* cost of moving SSE register */
289 {2, 2, 8}, /* cost of loading SSE registers
290 in SImode, DImode and TImode */
291 {2, 2, 8}, /* cost of storing SSE registers
292 in SImode, DImode and TImode */
293 6 /* MMX or SSE register to integer */
297 struct processor_costs pentium4_cost = {
298 1, /* cost of an add instruction */
299 1, /* cost of a lea instruction */
300 8, /* variable shift costs */
301 8, /* constant shift costs */
302 30, /* cost of starting a multiply */
303 0, /* cost of multiply per each bit set */
304 112, /* cost of a divide/mod */
305 1, /* cost of movsx */
306 1, /* cost of movzx */
307 16, /* "large" insn */
309 2, /* cost for loading QImode using movzbl */
310 {4, 5, 4}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 3, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of loading integer registers */
318 2, /* cost of moving MMX register */
319 {2, 2}, /* cost of loading MMX registers
320 in SImode and DImode */
321 {2, 2}, /* cost of storing MMX registers
322 in SImode and DImode */
323 12, /* cost of moving SSE register */
324 {12, 12, 12}, /* cost of loading SSE registers
325 in SImode, DImode and TImode */
326 {2, 2, 8}, /* cost of storing SSE registers
327 in SImode, DImode and TImode */
328 10, /* MMX or SSE register to integer */
331 const struct processor_costs *ix86_cost = &pentium_cost;
333 /* Processor feature/optimization bitmasks. */
334 #define m_386 (1<<PROCESSOR_I386)
335 #define m_486 (1<<PROCESSOR_I486)
336 #define m_PENT (1<<PROCESSOR_PENTIUM)
337 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
338 #define m_K6 (1<<PROCESSOR_K6)
339 #define m_ATHLON (1<<PROCESSOR_ATHLON)
340 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
342 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
343 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
344 const int x86_zero_extend_with_and = m_486 | m_PENT;
345 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
346 const int x86_double_with_add = ~m_386;
347 const int x86_use_bit_test = m_386;
348 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
349 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
350 const int x86_3dnow_a = m_ATHLON;
351 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
352 const int x86_branch_hints = m_PENT4;
353 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
354 const int x86_partial_reg_stall = m_PPRO;
355 const int x86_use_loop = m_K6;
356 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
357 const int x86_use_mov0 = m_K6;
358 const int x86_use_cltd = ~(m_PENT | m_K6);
359 const int x86_read_modify_write = ~m_PENT;
360 const int x86_read_modify = ~(m_PENT | m_PPRO);
361 const int x86_split_long_moves = m_PPRO;
362 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
363 const int x86_single_stringop = m_386 | m_PENT4;
364 const int x86_qimode_math = ~(0);
365 const int x86_promote_qi_regs = 0;
366 const int x86_himode_math = ~(m_PPRO);
367 const int x86_promote_hi_regs = m_PPRO;
368 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
369 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
370 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
371 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
372 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
373 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
374 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
375 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
376 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
377 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
379 /* In case the avreage insn count for single function invocation is
380 lower than this constant, emit fast (but longer) prologue and
382 #define FAST_PROLOGUE_INSN_COUNT 30
383 /* Set by prologue expander and used by epilogue expander to determine
385 static int use_fast_prologue_epilogue;
387 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
389 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
390 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
391 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
393 /* Array of the smallest class containing reg number REGNO, indexed by
394 REGNO. Used by REGNO_REG_CLASS in i386.h. */
396 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
399 AREG, DREG, CREG, BREG,
401 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
403 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
404 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
407 /* flags, fpsr, dirflag, frame */
408 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
409 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
411 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
413 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
414 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
415 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
419 /* The "default" register map used in 32bit mode. */
421 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
423 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
424 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
425 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
426 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
427 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
428 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
429 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
432 static int x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
433 1 /*RDX*/, 2 /*RCX*/,
434 FIRST_REX_INT_REG /*R8 */,
435 FIRST_REX_INT_REG + 1 /*R9 */};
436 static int x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
438 /* The "default" register map used in 64bit mode. */
439 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
441 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
442 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
443 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
444 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
445 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
446 8,9,10,11,12,13,14,15, /* extended integer registers */
447 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
450 /* Define the register numbers to be used in Dwarf debugging information.
451 The SVR4 reference port C compiler uses the following register numbers
452 in its Dwarf output code:
453 0 for %eax (gcc regno = 0)
454 1 for %ecx (gcc regno = 2)
455 2 for %edx (gcc regno = 1)
456 3 for %ebx (gcc regno = 3)
457 4 for %esp (gcc regno = 7)
458 5 for %ebp (gcc regno = 6)
459 6 for %esi (gcc regno = 4)
460 7 for %edi (gcc regno = 5)
461 The following three DWARF register numbers are never generated by
462 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
463 believes these numbers have these meanings.
464 8 for %eip (no gcc equivalent)
465 9 for %eflags (gcc regno = 17)
466 10 for %trapno (no gcc equivalent)
467 It is not at all clear how we should number the FP stack registers
468 for the x86 architecture. If the version of SDB on x86/svr4 were
469 a bit less brain dead with respect to floating-point then we would
470 have a precedent to follow with respect to DWARF register numbers
471 for x86 FP registers, but the SDB on x86/svr4 is so completely
472 broken with respect to FP registers that it is hardly worth thinking
473 of it as something to strive for compatibility with.
474 The version of x86/svr4 SDB I have at the moment does (partially)
475 seem to believe that DWARF register number 11 is associated with
476 the x86 register %st(0), but that's about all. Higher DWARF
477 register numbers don't seem to be associated with anything in
478 particular, and even for DWARF regno 11, SDB only seems to under-
479 stand that it should say that a variable lives in %st(0) (when
480 asked via an `=' command) if we said it was in DWARF regno 11,
481 but SDB still prints garbage when asked for the value of the
482 variable in question (via a `/' command).
483 (Also note that the labels SDB prints for various FP stack regs
484 when doing an `x' command are all wrong.)
485 Note that these problems generally don't affect the native SVR4
486 C compiler because it doesn't allow the use of -O with -g and
487 because when it is *not* optimizing, it allocates a memory
488 location for each floating-point variable, and the memory
489 location is what gets described in the DWARF AT_location
490 attribute for the variable in question.
491 Regardless of the severe mental illness of the x86/svr4 SDB, we
492 do something sensible here and we use the following DWARF
493 register numbers. Note that these are all stack-top-relative
495 11 for %st(0) (gcc regno = 8)
496 12 for %st(1) (gcc regno = 9)
497 13 for %st(2) (gcc regno = 10)
498 14 for %st(3) (gcc regno = 11)
499 15 for %st(4) (gcc regno = 12)
500 16 for %st(5) (gcc regno = 13)
501 17 for %st(6) (gcc regno = 14)
502 18 for %st(7) (gcc regno = 15)
504 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
506 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
507 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
508 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
509 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
510 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
511 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
512 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
515 /* Test and compare insns in i386.md store the information needed to
516 generate branch and scc insns here. */
518 struct rtx_def *ix86_compare_op0 = NULL_RTX;
519 struct rtx_def *ix86_compare_op1 = NULL_RTX;
521 #define MAX_386_STACK_LOCALS 3
522 /* Size of the register save area. */
523 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
525 /* Define the structure for the machine field in struct function. */
526 struct machine_function
528 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
529 int save_varrargs_registers;
530 int accesses_prev_frame;
533 #define ix86_stack_locals (cfun->machine->stack_locals)
534 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
536 /* Structure describing stack frame layout.
537 Stack grows downward:
543 saved frame pointer if frame_pointer_needed
544 <- HARD_FRAME_POINTER
550 > to_allocate <- FRAME_POINTER
562 int outgoing_arguments_size;
565 HOST_WIDE_INT to_allocate;
566 /* The offsets relative to ARG_POINTER. */
567 HOST_WIDE_INT frame_pointer_offset;
568 HOST_WIDE_INT hard_frame_pointer_offset;
569 HOST_WIDE_INT stack_pointer_offset;
572 /* Code model option as passed by user. */
573 const char *ix86_cmodel_string;
575 enum cmodel ix86_cmodel;
577 /* which cpu are we scheduling for */
578 enum processor_type ix86_cpu;
580 /* which instruction set architecture to use. */
583 /* Strings to hold which cpu and instruction set architecture to use. */
584 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
585 const char *ix86_arch_string; /* for -march=<xxx> */
587 /* # of registers to use to pass arguments. */
588 const char *ix86_regparm_string;
590 /* ix86_regparm_string as a number */
593 /* Alignment to use for loops and jumps: */
595 /* Power of two alignment for loops. */
596 const char *ix86_align_loops_string;
598 /* Power of two alignment for non-loop jumps. */
599 const char *ix86_align_jumps_string;
601 /* Power of two alignment for stack boundary in bytes. */
602 const char *ix86_preferred_stack_boundary_string;
604 /* Preferred alignment for stack boundary in bits. */
605 int ix86_preferred_stack_boundary;
607 /* Values 1-5: see jump.c */
608 int ix86_branch_cost;
609 const char *ix86_branch_cost_string;
611 /* Power of two alignment for functions. */
612 const char *ix86_align_funcs_string;
614 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
615 static char internal_label_prefix[16];
616 static int internal_label_prefix_len;
618 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
619 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
620 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
622 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
623 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
625 static rtx gen_push PARAMS ((rtx));
626 static int memory_address_length PARAMS ((rtx addr));
627 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
628 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
629 static int ix86_safe_length PARAMS ((rtx));
630 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
631 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
632 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
633 static void ix86_dump_ppro_packet PARAMS ((FILE *));
634 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
635 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
637 static void ix86_init_machine_status PARAMS ((struct function *));
638 static void ix86_mark_machine_status PARAMS ((struct function *));
639 static void ix86_free_machine_status PARAMS ((struct function *));
640 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
641 static int ix86_safe_length_prefix PARAMS ((rtx));
642 static int ix86_nsaved_regs PARAMS((void));
643 static void ix86_emit_save_regs PARAMS((void));
644 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
645 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
646 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
647 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
648 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
649 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
650 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
651 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
652 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
653 static int ix86_issue_rate PARAMS ((void));
654 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
655 static void ix86_sched_init PARAMS ((FILE *, int, int));
656 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
657 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
661 rtx base, index, disp;
665 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
667 struct builtin_description;
668 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
670 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
672 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
673 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
674 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
675 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
676 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
677 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
678 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
682 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
684 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
685 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
686 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
687 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
688 static int ix86_save_reg PARAMS ((int, int));
689 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
690 static int ix86_comp_type_attributes PARAMS ((tree, tree));
691 const struct attribute_spec ix86_attribute_table[];
692 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
693 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
695 #ifdef DO_GLOBAL_CTORS_BODY
696 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
699 /* Register class used for passing given 64bit part of the argument.
700 These represent classes as documented by the PS ABI, with the exception
701 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
702 use SF or DFmode move instead of DImode to avoid reformating penalties.
704 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
705 whenever possible (upper half does contain padding).
707 enum x86_64_reg_class
710 X86_64_INTEGER_CLASS,
711 X86_64_INTEGERSI_CLASS,
720 const char * const x86_64_reg_class_name[] =
721 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
723 #define MAX_CLASSES 4
724 static int classify_argument PARAMS ((enum machine_mode, tree,
725 enum x86_64_reg_class [MAX_CLASSES],
727 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
729 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
731 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
732 enum x86_64_reg_class));
734 /* Initialize the GCC target structure. */
735 #undef TARGET_ATTRIBUTE_TABLE
736 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
737 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
738 # undef TARGET_MERGE_DECL_ATTRIBUTES
739 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
742 #undef TARGET_COMP_TYPE_ATTRIBUTES
743 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
745 #undef TARGET_INIT_BUILTINS
746 #define TARGET_INIT_BUILTINS ix86_init_builtins
748 #undef TARGET_EXPAND_BUILTIN
749 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
751 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
752 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
754 # undef TARGET_ASM_FUNCTION_PROLOGUE
755 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
758 #undef TARGET_ASM_OPEN_PAREN
759 #define TARGET_ASM_OPEN_PAREN ""
760 #undef TARGET_ASM_CLOSE_PAREN
761 #define TARGET_ASM_CLOSE_PAREN ""
763 #undef TARGET_SCHED_ADJUST_COST
764 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
765 #undef TARGET_SCHED_ISSUE_RATE
766 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
767 #undef TARGET_SCHED_VARIABLE_ISSUE
768 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
769 #undef TARGET_SCHED_INIT
770 #define TARGET_SCHED_INIT ix86_sched_init
771 #undef TARGET_SCHED_REORDER
772 #define TARGET_SCHED_REORDER ix86_sched_reorder
774 struct gcc_target targetm = TARGET_INITIALIZER;
776 /* Sometimes certain combinations of command options do not make
777 sense on a particular target machine. You can define a macro
778 `OVERRIDE_OPTIONS' to take account of this. This macro, if
779 defined, is executed once just after all the command options have
782 Don't use this macro to turn on various extra optimizations for
783 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
789 /* Comes from final.c -- no real reason to change it. */
790 #define MAX_CODE_ALIGN 16
794 const struct processor_costs *cost; /* Processor costs */
795 const int target_enable; /* Target flags to enable. */
796 const int target_disable; /* Target flags to disable. */
797 const int align_loop; /* Default alignments. */
798 const int align_jump;
799 const int align_func;
800 const int branch_cost;
802 const processor_target_table[PROCESSOR_max] =
804 {&i386_cost, 0, 0, 2, 2, 2, 1},
805 {&i486_cost, 0, 0, 4, 4, 4, 1},
806 {&pentium_cost, 0, 0, -4, -4, -4, 1},
807 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
808 {&k6_cost, 0, 0, -5, -5, 4, 1},
809 {&athlon_cost, 0, 0, 4, -4, 4, 1},
810 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
815 const char *const name; /* processor name or nickname. */
816 const enum processor_type processor;
818 const processor_alias_table[] =
820 {"i386", PROCESSOR_I386},
821 {"i486", PROCESSOR_I486},
822 {"i586", PROCESSOR_PENTIUM},
823 {"pentium", PROCESSOR_PENTIUM},
824 {"i686", PROCESSOR_PENTIUMPRO},
825 {"pentiumpro", PROCESSOR_PENTIUMPRO},
826 {"k6", PROCESSOR_K6},
827 {"athlon", PROCESSOR_ATHLON},
828 {"pentium4", PROCESSOR_PENTIUM4},
831 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
833 #ifdef SUBTARGET_OVERRIDE_OPTIONS
834 SUBTARGET_OVERRIDE_OPTIONS;
837 ix86_arch = PROCESSOR_I386;
838 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
840 if (ix86_cmodel_string != 0)
842 if (!strcmp (ix86_cmodel_string, "small"))
843 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
845 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
846 else if (!strcmp (ix86_cmodel_string, "32"))
848 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
849 ix86_cmodel = CM_KERNEL;
850 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
851 ix86_cmodel = CM_MEDIUM;
852 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
853 ix86_cmodel = CM_LARGE;
855 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
861 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
863 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
864 error ("Code model `%s' not supported in the %s bit mode.",
865 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
866 if (ix86_cmodel == CM_LARGE)
867 sorry ("Code model `large' not supported yet.");
868 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
869 sorry ("%i-bit mode not compiled in.",
870 (target_flags & MASK_64BIT) ? 64 : 32);
872 if (ix86_arch_string != 0)
874 for (i = 0; i < pta_size; i++)
875 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
877 ix86_arch = processor_alias_table[i].processor;
878 /* Default cpu tuning to the architecture. */
879 ix86_cpu = ix86_arch;
884 error ("bad value (%s) for -march= switch", ix86_arch_string);
887 if (ix86_cpu_string != 0)
889 for (i = 0; i < pta_size; i++)
890 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
892 ix86_cpu = processor_alias_table[i].processor;
896 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
900 ix86_cost = &size_cost;
902 ix86_cost = processor_target_table[ix86_cpu].cost;
903 target_flags |= processor_target_table[ix86_cpu].target_enable;
904 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
906 /* Arrange to set up i386_stack_locals for all functions. */
907 init_machine_status = ix86_init_machine_status;
908 mark_machine_status = ix86_mark_machine_status;
909 free_machine_status = ix86_free_machine_status;
911 /* Validate -mregparm= value. */
912 if (ix86_regparm_string)
914 i = atoi (ix86_regparm_string);
915 if (i < 0 || i > REGPARM_MAX)
916 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
922 ix86_regparm = REGPARM_MAX;
924 /* If the user has provided any of the -malign-* options,
925 warn and use that value only if -falign-* is not set.
926 Remove this code in GCC 3.2 or later. */
927 if (ix86_align_loops_string)
929 warning ("-malign-loops is obsolete, use -falign-loops");
930 if (align_loops == 0)
932 i = atoi (ix86_align_loops_string);
933 if (i < 0 || i > MAX_CODE_ALIGN)
934 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
936 align_loops = 1 << i;
940 if (ix86_align_jumps_string)
942 warning ("-malign-jumps is obsolete, use -falign-jumps");
943 if (align_jumps == 0)
945 i = atoi (ix86_align_jumps_string);
946 if (i < 0 || i > MAX_CODE_ALIGN)
947 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
949 align_jumps = 1 << i;
953 if (ix86_align_funcs_string)
955 warning ("-malign-functions is obsolete, use -falign-functions");
956 if (align_functions == 0)
958 i = atoi (ix86_align_funcs_string);
959 if (i < 0 || i > MAX_CODE_ALIGN)
960 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
962 align_functions = 1 << i;
966 /* Default align_* from the processor table. */
967 #define abs(n) (n < 0 ? -n : n)
968 if (align_loops == 0)
969 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
970 if (align_jumps == 0)
971 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
972 if (align_functions == 0)
973 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
975 /* Validate -mpreferred-stack-boundary= value, or provide default.
976 The default of 128 bits is for Pentium III's SSE __m128, but we
977 don't want additional code to keep the stack aligned when
978 optimizing for code size. */
979 ix86_preferred_stack_boundary = (optimize_size
980 ? TARGET_64BIT ? 64 : 32
982 if (ix86_preferred_stack_boundary_string)
984 i = atoi (ix86_preferred_stack_boundary_string);
985 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
986 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
987 TARGET_64BIT ? 3 : 2);
989 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
992 /* Validate -mbranch-cost= value, or provide default. */
993 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
994 if (ix86_branch_cost_string)
996 i = atoi (ix86_branch_cost_string);
998 error ("-mbranch-cost=%d is not between 0 and 5", i);
1000 ix86_branch_cost = i;
1003 /* Keep nonleaf frame pointers. */
1004 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1005 flag_omit_frame_pointer = 1;
1007 /* If we're doing fast math, we don't care about comparison order
1008 wrt NaNs. This lets us use a shorter comparison sequence. */
1009 if (flag_unsafe_math_optimizations)
1010 target_flags &= ~MASK_IEEE_FP;
1014 if (TARGET_ALIGN_DOUBLE)
1015 error ("-malign-double makes no sense in the 64bit mode.");
1017 error ("-mrtd calling convention not supported in the 64bit mode.");
1018 /* Enable by default the SSE and MMX builtins. */
1019 target_flags |= MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE;
1022 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1025 target_flags |= MASK_MMX;
1027 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1030 target_flags |= MASK_MMX;
1031 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1032 extensions it adds. */
1033 if (x86_3dnow_a & (1 << ix86_arch))
1034 target_flags |= MASK_3DNOW_A;
1036 if ((x86_accumulate_outgoing_args & CPUMASK)
1037 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
1039 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1041 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1044 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1045 p = strchr (internal_label_prefix, 'X');
1046 internal_label_prefix_len = p - internal_label_prefix;
1052 optimization_options (level, size)
1054 int size ATTRIBUTE_UNUSED;
1056 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1057 make the problem with not enough registers even worse. */
1058 #ifdef INSN_SCHEDULING
1060 flag_schedule_insns = 0;
1062 if (TARGET_64BIT && optimize >= 1)
1063 flag_omit_frame_pointer = 1;
1066 flag_pcc_struct_return = 0;
1067 flag_asynchronous_unwind_tables = 1;
1071 /* Table of valid machine attributes. */
1072 const struct attribute_spec ix86_attribute_table[] =
1074 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1075 /* Stdcall attribute says callee is responsible for popping arguments
1076 if they are not variable. */
1077 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1078 /* Cdecl attribute says the callee is a normal C declaration */
1079 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1080 /* Regparm attribute specifies how many integer arguments are to be
1081 passed in registers. */
1082 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1083 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1084 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1085 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1086 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1088 { NULL, 0, 0, false, false, false, NULL }
1091 /* Handle a "cdecl" or "stdcall" attribute;
1092 arguments as in struct attribute_spec.handler. */
1094 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1097 tree args ATTRIBUTE_UNUSED;
1098 int flags ATTRIBUTE_UNUSED;
1101 if (TREE_CODE (*node) != FUNCTION_TYPE
1102 && TREE_CODE (*node) != METHOD_TYPE
1103 && TREE_CODE (*node) != FIELD_DECL
1104 && TREE_CODE (*node) != TYPE_DECL)
1106 warning ("`%s' attribute only applies to functions",
1107 IDENTIFIER_POINTER (name));
1108 *no_add_attrs = true;
1113 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1114 *no_add_attrs = true;
1120 /* Handle a "regparm" attribute;
1121 arguments as in struct attribute_spec.handler. */
1123 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1127 int flags ATTRIBUTE_UNUSED;
1130 if (TREE_CODE (*node) != FUNCTION_TYPE
1131 && TREE_CODE (*node) != METHOD_TYPE
1132 && TREE_CODE (*node) != FIELD_DECL
1133 && TREE_CODE (*node) != TYPE_DECL)
1135 warning ("`%s' attribute only applies to functions",
1136 IDENTIFIER_POINTER (name));
1137 *no_add_attrs = true;
1143 cst = TREE_VALUE (args);
1144 if (TREE_CODE (cst) != INTEGER_CST)
1146 warning ("`%s' attribute requires an integer constant argument",
1147 IDENTIFIER_POINTER (name));
1148 *no_add_attrs = true;
1150 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1152 warning ("argument to `%s' attribute larger than %d",
1153 IDENTIFIER_POINTER (name), REGPARM_MAX);
1154 *no_add_attrs = true;
1161 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1163 /* Generate the assembly code for function entry. FILE is a stdio
1164 stream to output the code to. SIZE is an int: how many units of
1165 temporary storage to allocate.
1167 Refer to the array `regs_ever_live' to determine which registers to
1168 save; `regs_ever_live[I]' is nonzero if register number I is ever
1169 used in the function. This function is responsible for knowing
1170 which registers should not be saved even if used.
1172 We override it here to allow for the new profiling code to go before
1173 the prologue and the old mcount code to go after the prologue (and
1174 after %ebx has been set up for ELF shared library support). */
1177 ix86_osf_output_function_prologue (file, size)
1182 char *lprefix = LPREFIX;
1183 int labelno = profile_label_no;
1187 if (TARGET_UNDERSCORES)
1190 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1192 if (!flag_pic && !HALF_PIC_P ())
1194 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1195 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1198 else if (HALF_PIC_P ())
1202 HALF_PIC_EXTERNAL ("_mcount_ptr");
1203 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1206 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1207 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1209 fprintf (file, "\tcall *(%%eax)\n");
1214 static int call_no = 0;
1216 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1217 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1218 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1219 lprefix, call_no++);
1220 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1222 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1224 fprintf (file, "\tcall *(%%eax)\n");
1230 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1234 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1235 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1240 static int call_no = 0;
1242 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1243 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1244 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1245 lprefix, call_no++);
1246 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1248 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1250 fprintf (file, "\tcall *(%%eax)\n");
1253 #endif /* !OSF_OS */
1255 function_prologue (file, size);
1258 #endif /* OSF_OS || TARGET_OSF1ELF */
1260 /* Return 0 if the attributes for two types are incompatible, 1 if they
1261 are compatible, and 2 if they are nearly compatible (which causes a
1262 warning to be generated). */
1265 ix86_comp_type_attributes (type1, type2)
1269 /* Check for mismatch of non-default calling convention. */
1270 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1272 if (TREE_CODE (type1) != FUNCTION_TYPE)
1275 /* Check for mismatched return types (cdecl vs stdcall). */
1276 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1277 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1282 /* Value is the number of bytes of arguments automatically
1283 popped when returning from a subroutine call.
1284 FUNDECL is the declaration node of the function (as a tree),
1285 FUNTYPE is the data type of the function (as a tree),
1286 or for a library call it is an identifier node for the subroutine name.
1287 SIZE is the number of bytes of arguments passed on the stack.
1289 On the 80386, the RTD insn may be used to pop them if the number
1290 of args is fixed, but if the number is variable then the caller
1291 must pop them all. RTD can't be used for library calls now
1292 because the library is compiled with the Unix compiler.
1293 Use of RTD is a selectable option, since it is incompatible with
1294 standard Unix calling sequences. If the option is not selected,
1295 the caller must always pop the args.
1297 The attribute stdcall is equivalent to RTD on a per module basis. */
1300 ix86_return_pops_args (fundecl, funtype, size)
1305 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1307 /* Cdecl functions override -mrtd, and never pop the stack. */
1308 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1310 /* Stdcall functions will pop the stack if not variable args. */
1311 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1315 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1316 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1317 == void_type_node)))
1321 /* Lose any fake structure return argument. */
1322 if (aggregate_value_p (TREE_TYPE (funtype))
1324 return GET_MODE_SIZE (Pmode);
1329 /* Argument support functions. */
1331 /* Return true when register may be used to pass function parameters. */
1333 ix86_function_arg_regno_p (regno)
1338 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1339 if (SSE_REGNO_P (regno) && TARGET_SSE)
1341 /* RAX is used as hidden argument to va_arg functions. */
1344 for (i = 0; i < REGPARM_MAX; i++)
1345 if (regno == x86_64_int_parameter_registers[i])
1350 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1351 for a call to a function whose data type is FNTYPE.
1352 For a library call, FNTYPE is 0. */
1355 init_cumulative_args (cum, fntype, libname)
1356 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1357 tree fntype; /* tree ptr for function decl */
1358 rtx libname; /* SYMBOL_REF of library name or 0 */
1360 static CUMULATIVE_ARGS zero_cum;
1361 tree param, next_param;
1363 if (TARGET_DEBUG_ARG)
1365 fprintf (stderr, "\ninit_cumulative_args (");
1367 fprintf (stderr, "fntype code = %s, ret code = %s",
1368 tree_code_name[(int) TREE_CODE (fntype)],
1369 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1371 fprintf (stderr, "no fntype");
1374 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1379 /* Set up the number of registers to use for passing arguments. */
1380 cum->nregs = ix86_regparm;
1381 cum->sse_nregs = SSE_REGPARM_MAX;
1382 if (fntype && !TARGET_64BIT)
1384 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1387 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1389 cum->maybe_vaarg = false;
1391 /* Determine if this function has variable arguments. This is
1392 indicated by the last argument being 'void_type_mode' if there
1393 are no variable arguments. If there are variable arguments, then
1394 we won't pass anything in registers */
1398 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1399 param != 0; param = next_param)
1401 next_param = TREE_CHAIN (param);
1402 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1406 cum->maybe_vaarg = true;
1410 if ((!fntype && !libname)
1411 || (fntype && !TYPE_ARG_TYPES (fntype)))
1412 cum->maybe_vaarg = 1;
1414 if (TARGET_DEBUG_ARG)
1415 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1420 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1421 of this code is to classify each 8bytes of incoming argument by the register
1422 class and assign registers accordingly. */
1424 /* Return the union class of CLASS1 and CLASS2.
1425 See the x86-64 PS ABI for details. */
1427 static enum x86_64_reg_class
1428 merge_classes (class1, class2)
1429 enum x86_64_reg_class class1, class2;
1431 /* Rule #1: If both classes are equal, this is the resulting class. */
1432 if (class1 == class2)
1435 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1437 if (class1 == X86_64_NO_CLASS)
1439 if (class2 == X86_64_NO_CLASS)
1442 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1443 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1444 return X86_64_MEMORY_CLASS;
1446 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1447 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1448 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1449 return X86_64_INTEGERSI_CLASS;
1450 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1451 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1452 return X86_64_INTEGER_CLASS;
1454 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1455 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1456 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1457 return X86_64_MEMORY_CLASS;
1459 /* Rule #6: Otherwise class SSE is used. */
1460 return X86_64_SSE_CLASS;
1463 /* Classify the argument of type TYPE and mode MODE.
1464 CLASSES will be filled by the register class used to pass each word
1465 of the operand. The number of words is returned. In case the parameter
1466 should be passed in memory, 0 is returned. As a special case for zero
1467 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1469 BIT_OFFSET is used internally for handling records and specifies offset
1470 of the offset in bits modulo 256 to avoid overflow cases.
1472 See the x86-64 PS ABI for details.
1476 classify_argument (mode, type, classes, bit_offset)
1477 enum machine_mode mode;
1479 enum x86_64_reg_class classes[MAX_CLASSES];
1483 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1484 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1486 if (type && AGGREGATE_TYPE_P (type))
1490 enum x86_64_reg_class subclasses[MAX_CLASSES];
1492 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1496 for (i = 0; i < words; i++)
1497 classes[i] = X86_64_NO_CLASS;
1499 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1500 signalize memory class, so handle it as special case. */
1503 classes[0] = X86_64_NO_CLASS;
1507 /* Classify each field of record and merge classes. */
1508 if (TREE_CODE (type) == RECORD_TYPE)
1510 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1512 if (TREE_CODE (field) == FIELD_DECL)
1516 /* Bitfields are always classified as integer. Handle them
1517 early, since later code would consider them to be
1518 misaligned integers. */
1519 if (DECL_BIT_FIELD (field))
1521 for (i = int_bit_position (field) / 8 / 8;
1522 i < (int_bit_position (field)
1523 + tree_low_cst (DECL_SIZE (field), 0)
1526 merge_classes (X86_64_INTEGER_CLASS,
1531 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1532 TREE_TYPE (field), subclasses,
1533 (int_bit_position (field)
1534 + bit_offset) % 256);
1537 for (i = 0; i < num; i++)
1540 (int_bit_position (field) + bit_offset) / 8 / 8;
1542 merge_classes (subclasses[i], classes[i + pos]);
1548 /* Arrays are handled as small records. */
1549 else if (TREE_CODE (type) == ARRAY_TYPE)
1552 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1553 TREE_TYPE (type), subclasses, bit_offset);
1557 /* The partial classes are now full classes. */
1558 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1559 subclasses[0] = X86_64_SSE_CLASS;
1560 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1561 subclasses[0] = X86_64_INTEGER_CLASS;
1563 for (i = 0; i < words; i++)
1564 classes[i] = subclasses[i % num];
1566 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1567 else if (TREE_CODE (type) == UNION_TYPE)
1569 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1571 if (TREE_CODE (field) == FIELD_DECL)
1574 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1575 TREE_TYPE (field), subclasses,
1579 for (i = 0; i < num; i++)
1580 classes[i] = merge_classes (subclasses[i], classes[i]);
1587 /* Final merger cleanup. */
1588 for (i = 0; i < words; i++)
1590 /* If one class is MEMORY, everything should be passed in
1592 if (classes[i] == X86_64_MEMORY_CLASS)
1595 /* The X86_64_SSEUP_CLASS should be always preceeded by
1596 X86_64_SSE_CLASS. */
1597 if (classes[i] == X86_64_SSEUP_CLASS
1598 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1599 classes[i] = X86_64_SSE_CLASS;
1601 /* X86_64_X87UP_CLASS should be preceeded by X86_64_X87_CLASS. */
1602 if (classes[i] == X86_64_X87UP_CLASS
1603 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1604 classes[i] = X86_64_SSE_CLASS;
1609 /* Compute alignment needed. We align all types to natural boundaries with
1610 exception of XFmode that is aligned to 64bits. */
1611 if (mode != VOIDmode && mode != BLKmode)
1613 int mode_alignment = GET_MODE_BITSIZE (mode);
1616 mode_alignment = 128;
1617 else if (mode == XCmode)
1618 mode_alignment = 256;
1619 /* Missalignmed fields are always returned in memory. */
1620 if (bit_offset % mode_alignment)
1624 /* Classification of atomic types. */
1634 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1635 classes[0] = X86_64_INTEGERSI_CLASS;
1637 classes[0] = X86_64_INTEGER_CLASS;
1641 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1644 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1645 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1648 if (!(bit_offset % 64))
1649 classes[0] = X86_64_SSESF_CLASS;
1651 classes[0] = X86_64_SSE_CLASS;
1654 classes[0] = X86_64_SSEDF_CLASS;
1657 classes[0] = X86_64_X87_CLASS;
1658 classes[1] = X86_64_X87UP_CLASS;
1661 classes[0] = X86_64_X87_CLASS;
1662 classes[1] = X86_64_X87UP_CLASS;
1663 classes[2] = X86_64_X87_CLASS;
1664 classes[3] = X86_64_X87UP_CLASS;
1667 classes[0] = X86_64_SSEDF_CLASS;
1668 classes[1] = X86_64_SSEDF_CLASS;
1671 classes[0] = X86_64_SSE_CLASS;
1680 /* Examine the argument and return set number of register required in each
1681 class. Return 0 ifif parameter should be passed in memory. */
1683 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1684 enum machine_mode mode;
1686 int *int_nregs, *sse_nregs;
1689 enum x86_64_reg_class class[MAX_CLASSES];
1690 int n = classify_argument (mode, type, class, 0);
1696 for (n--; n >= 0; n--)
1699 case X86_64_INTEGER_CLASS:
1700 case X86_64_INTEGERSI_CLASS:
1703 case X86_64_SSE_CLASS:
1704 case X86_64_SSESF_CLASS:
1705 case X86_64_SSEDF_CLASS:
1708 case X86_64_NO_CLASS:
1709 case X86_64_SSEUP_CLASS:
1711 case X86_64_X87_CLASS:
1712 case X86_64_X87UP_CLASS:
1716 case X86_64_MEMORY_CLASS:
1721 /* Construct container for the argument used by GCC interface. See
1722 FUNCTION_ARG for the detailed description. */
1724 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1725 enum machine_mode mode;
1728 int nintregs, nsseregs;
1729 int *intreg, sse_regno;
1731 enum machine_mode tmpmode;
1733 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1734 enum x86_64_reg_class class[MAX_CLASSES];
1738 int needed_sseregs, needed_intregs;
1739 rtx exp[MAX_CLASSES];
1742 n = classify_argument (mode, type, class, 0);
1743 if (TARGET_DEBUG_ARG)
1746 fprintf (stderr, "Memory class\n");
1749 fprintf (stderr, "Classes:");
1750 for (i = 0; i < n; i++)
1752 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1754 fprintf (stderr, "\n");
1759 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1761 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1764 /* First construct simple cases. Avoid SCmode, since we want to use
1765 single register to pass this type. */
1766 if (n == 1 && mode != SCmode)
1769 case X86_64_INTEGER_CLASS:
1770 case X86_64_INTEGERSI_CLASS:
1771 return gen_rtx_REG (mode, intreg[0]);
1772 case X86_64_SSE_CLASS:
1773 case X86_64_SSESF_CLASS:
1774 case X86_64_SSEDF_CLASS:
1775 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1776 case X86_64_X87_CLASS:
1777 return gen_rtx_REG (mode, FIRST_STACK_REG);
1778 case X86_64_NO_CLASS:
1779 /* Zero sized array, struct or class. */
1784 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1785 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1787 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1788 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1789 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1790 && class[1] == X86_64_INTEGER_CLASS
1791 && (mode == CDImode || mode == TImode)
1792 && intreg[0] + 1 == intreg[1])
1793 return gen_rtx_REG (mode, intreg[0]);
1795 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1796 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1797 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1799 /* Otherwise figure out the entries of the PARALLEL. */
1800 for (i = 0; i < n; i++)
1804 case X86_64_NO_CLASS:
1806 case X86_64_INTEGER_CLASS:
1807 case X86_64_INTEGERSI_CLASS:
1808 /* Merge TImodes on aligned occassions here too. */
1809 if (i * 8 + 8 > bytes)
1810 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1811 else if (class[i] == X86_64_INTEGERSI_CLASS)
1815 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1816 if (tmpmode == BLKmode)
1818 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1819 gen_rtx_REG (tmpmode, *intreg),
1823 case X86_64_SSESF_CLASS:
1824 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1825 gen_rtx_REG (SFmode,
1826 SSE_REGNO (sse_regno)),
1830 case X86_64_SSEDF_CLASS:
1831 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1832 gen_rtx_REG (DFmode,
1833 SSE_REGNO (sse_regno)),
1837 case X86_64_SSE_CLASS:
1838 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1839 tmpmode = TImode, i++;
1842 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1843 gen_rtx_REG (tmpmode,
1844 SSE_REGNO (sse_regno)),
1852 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
1853 for (i = 0; i < nexps; i++)
1854 XVECEXP (ret, 0, i) = exp [i];
1858 /* Update the data in CUM to advance over an argument
1859 of mode MODE and data type TYPE.
1860 (TYPE is null for libcalls where that information may not be available.) */
1863 function_arg_advance (cum, mode, type, named)
1864 CUMULATIVE_ARGS *cum; /* current arg information */
1865 enum machine_mode mode; /* current arg mode */
1866 tree type; /* type of the argument or 0 if lib support */
1867 int named; /* whether or not the argument was named */
1870 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1871 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1873 if (TARGET_DEBUG_ARG)
1875 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1876 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1879 int int_nregs, sse_nregs;
1880 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
1881 cum->words += words;
1882 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
1884 cum->nregs -= int_nregs;
1885 cum->sse_nregs -= sse_nregs;
1886 cum->regno += int_nregs;
1887 cum->sse_regno += sse_nregs;
1890 cum->words += words;
1894 if (TARGET_SSE && mode == TImode)
1896 cum->sse_words += words;
1897 cum->sse_nregs -= 1;
1898 cum->sse_regno += 1;
1899 if (cum->sse_nregs <= 0)
1907 cum->words += words;
1908 cum->nregs -= words;
1909 cum->regno += words;
1911 if (cum->nregs <= 0)
1921 /* Define where to put the arguments to a function.
1922 Value is zero to push the argument on the stack,
1923 or a hard register in which to store the argument.
1925 MODE is the argument's machine mode.
1926 TYPE is the data type of the argument (as a tree).
1927 This is null for libcalls where that information may
1929 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1930 the preceding args and about the function being called.
1931 NAMED is nonzero if this argument is a named parameter
1932 (otherwise it is an extra parameter matching an ellipsis). */
1935 function_arg (cum, mode, type, named)
1936 CUMULATIVE_ARGS *cum; /* current arg information */
1937 enum machine_mode mode; /* current arg mode */
1938 tree type; /* type of the argument or 0 if lib support */
1939 int named; /* != 0 for normal args, == 0 for ... args */
1943 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1944 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1946 /* Handle an hidden AL argument containing number of registers for varargs
1947 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
1949 if (mode == VOIDmode)
1952 return GEN_INT (cum->maybe_vaarg
1953 ? (cum->sse_nregs < 0
1961 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
1962 &x86_64_int_parameter_registers [cum->regno],
1967 /* For now, pass fp/complex values on the stack. */
1976 if (words <= cum->nregs)
1977 ret = gen_rtx_REG (mode, cum->regno);
1981 ret = gen_rtx_REG (mode, cum->sse_regno);
1985 if (TARGET_DEBUG_ARG)
1988 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1989 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1992 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1994 fprintf (stderr, ", stack");
1996 fprintf (stderr, " )\n");
2002 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2006 ix86_function_arg_boundary (mode, type)
2007 enum machine_mode mode;
2012 return PARM_BOUNDARY;
2014 align = TYPE_ALIGN (type);
2016 align = GET_MODE_ALIGNMENT (mode);
2017 if (align < PARM_BOUNDARY)
2018 align = PARM_BOUNDARY;
2024 /* Return true if N is a possible register number of function value. */
2026 ix86_function_value_regno_p (regno)
2031 return ((regno) == 0
2032 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2033 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2035 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2036 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2037 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2040 /* Define how to find the value returned by a function.
2041 VALTYPE is the data type of the value (as a tree).
2042 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2043 otherwise, FUNC is 0. */
2045 ix86_function_value (valtype)
2050 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2051 REGPARM_MAX, SSE_REGPARM_MAX,
2052 x86_64_int_return_registers, 0);
2053 /* For zero sized structures, construct_continer return NULL, but we need
2054 to keep rest of compiler happy by returning meaningfull value. */
2056 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2060 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2063 /* Return false ifif type is returned in memory. */
2065 ix86_return_in_memory (type)
2068 int needed_intregs, needed_sseregs;
2071 return !examine_argument (TYPE_MODE (type), type, 1,
2072 &needed_intregs, &needed_sseregs);
2076 if (TYPE_MODE (type) == BLKmode
2077 || (VECTOR_MODE_P (TYPE_MODE (type))
2078 && int_size_in_bytes (type) == 8)
2079 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2080 && TYPE_MODE (type) != TFmode
2081 && !VECTOR_MODE_P (TYPE_MODE (type))))
2087 /* Define how to find the value returned by a library function
2088 assuming the value has mode MODE. */
2090 ix86_libcall_value (mode)
2091 enum machine_mode mode;
2101 return gen_rtx_REG (mode, FIRST_SSE_REG);
2104 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2106 return gen_rtx_REG (mode, 0);
2110 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2113 /* Create the va_list data type. */
2116 ix86_build_va_list ()
2118 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2120 /* For i386 we use plain pointer to argument area. */
2122 return build_pointer_type (char_type_node);
2124 record = make_lang_type (RECORD_TYPE);
2125 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2127 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2128 unsigned_type_node);
2129 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2130 unsigned_type_node);
2131 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2133 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2136 DECL_FIELD_CONTEXT (f_gpr) = record;
2137 DECL_FIELD_CONTEXT (f_fpr) = record;
2138 DECL_FIELD_CONTEXT (f_ovf) = record;
2139 DECL_FIELD_CONTEXT (f_sav) = record;
2141 TREE_CHAIN (record) = type_decl;
2142 TYPE_NAME (record) = type_decl;
2143 TYPE_FIELDS (record) = f_gpr;
2144 TREE_CHAIN (f_gpr) = f_fpr;
2145 TREE_CHAIN (f_fpr) = f_ovf;
2146 TREE_CHAIN (f_ovf) = f_sav;
2148 layout_type (record);
2150 /* The correct type is an array type of one element. */
2151 return build_array_type (record, build_index_type (size_zero_node));
2154 /* Perform any needed actions needed for a function that is receiving a
2155 variable number of arguments.
2159 MODE and TYPE are the mode and type of the current parameter.
2161 PRETEND_SIZE is a variable that should be set to the amount of stack
2162 that must be pushed by the prolog to pretend that our caller pushed
2165 Normally, this macro will push all remaining incoming registers on the
2166 stack and set PRETEND_SIZE to the length of the registers pushed. */
2169 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2170 CUMULATIVE_ARGS *cum;
2171 enum machine_mode mode;
2173 int *pretend_size ATTRIBUTE_UNUSED;
2177 CUMULATIVE_ARGS next_cum;
2178 rtx save_area = NULL_RTX, mem;
2191 /* Indicate to allocate space on the stack for varargs save area. */
2192 ix86_save_varrargs_registers = 1;
2194 fntype = TREE_TYPE (current_function_decl);
2195 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2196 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2197 != void_type_node));
2199 /* For varargs, we do not want to skip the dummy va_dcl argument.
2200 For stdargs, we do want to skip the last named argument. */
2203 function_arg_advance (&next_cum, mode, type, 1);
2206 save_area = frame_pointer_rtx;
2208 set = get_varargs_alias_set ();
2210 for (i = next_cum.regno; i < ix86_regparm; i++)
2212 mem = gen_rtx_MEM (Pmode,
2213 plus_constant (save_area, i * UNITS_PER_WORD));
2214 set_mem_alias_set (mem, set);
2215 emit_move_insn (mem, gen_rtx_REG (Pmode,
2216 x86_64_int_parameter_registers[i]));
2219 if (next_cum.sse_nregs)
2221 /* Now emit code to save SSE registers. The AX parameter contains number
2222 of SSE parameter regsiters used to call this function. We use
2223 sse_prologue_save insn template that produces computed jump across
2224 SSE saves. We need some preparation work to get this working. */
2226 label = gen_label_rtx ();
2227 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2229 /* Compute address to jump to :
2230 label - 5*eax + nnamed_sse_arguments*5 */
2231 tmp_reg = gen_reg_rtx (Pmode);
2232 nsse_reg = gen_reg_rtx (Pmode);
2233 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2234 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2235 gen_rtx_MULT (Pmode, nsse_reg,
2237 if (next_cum.sse_regno)
2240 gen_rtx_CONST (DImode,
2241 gen_rtx_PLUS (DImode,
2243 GEN_INT (next_cum.sse_regno * 4))));
2245 emit_move_insn (nsse_reg, label_ref);
2246 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2248 /* Compute address of memory block we save into. We always use pointer
2249 pointing 127 bytes after first byte to store - this is needed to keep
2250 instruction size limited by 4 bytes. */
2251 tmp_reg = gen_reg_rtx (Pmode);
2252 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2253 plus_constant (save_area,
2254 8 * REGPARM_MAX + 127)));
2255 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2256 set_mem_alias_set (mem, set);
2257 set_mem_align (mem, BITS_PER_WORD);
2259 /* And finally do the dirty job! */
2260 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2261 GEN_INT (next_cum.sse_regno), label));
2266 /* Implement va_start. */
2269 ix86_va_start (stdarg_p, valist, nextarg)
2274 HOST_WIDE_INT words, n_gpr, n_fpr;
2275 tree f_gpr, f_fpr, f_ovf, f_sav;
2276 tree gpr, fpr, ovf, sav, t;
2278 /* Only 64bit target needs something special. */
2281 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2285 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2286 f_fpr = TREE_CHAIN (f_gpr);
2287 f_ovf = TREE_CHAIN (f_fpr);
2288 f_sav = TREE_CHAIN (f_ovf);
2290 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2291 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2292 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2293 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2294 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2296 /* Count number of gp and fp argument registers used. */
2297 words = current_function_args_info.words;
2298 n_gpr = current_function_args_info.regno;
2299 n_fpr = current_function_args_info.sse_regno;
2301 if (TARGET_DEBUG_ARG)
2302 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2303 (int)words, (int)n_gpr, (int)n_fpr);
2305 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2306 build_int_2 (n_gpr * 8, 0));
2307 TREE_SIDE_EFFECTS (t) = 1;
2308 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2310 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2311 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2312 TREE_SIDE_EFFECTS (t) = 1;
2313 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2315 /* Find the overflow area. */
2316 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2318 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2319 build_int_2 (words * UNITS_PER_WORD, 0));
2320 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2321 TREE_SIDE_EFFECTS (t) = 1;
2322 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2324 /* Find the register save area.
2325 Prologue of the function save it right above stack frame. */
2326 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2327 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2328 TREE_SIDE_EFFECTS (t) = 1;
2329 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2332 /* Implement va_arg. */
2334 ix86_va_arg (valist, type)
2337 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2338 tree f_gpr, f_fpr, f_ovf, f_sav;
2339 tree gpr, fpr, ovf, sav, t;
2341 rtx lab_false, lab_over = NULL_RTX;
2345 /* Only 64bit target needs something special. */
2348 return std_expand_builtin_va_arg (valist, type);
2351 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2352 f_fpr = TREE_CHAIN (f_gpr);
2353 f_ovf = TREE_CHAIN (f_fpr);
2354 f_sav = TREE_CHAIN (f_ovf);
2356 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2357 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2358 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2359 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2360 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2362 size = int_size_in_bytes (type);
2363 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2365 container = construct_container (TYPE_MODE (type), type, 0,
2366 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2368 * Pull the value out of the saved registers ...
2371 addr_rtx = gen_reg_rtx (Pmode);
2375 rtx int_addr_rtx, sse_addr_rtx;
2376 int needed_intregs, needed_sseregs;
2379 lab_over = gen_label_rtx ();
2380 lab_false = gen_label_rtx ();
2382 examine_argument (TYPE_MODE (type), type, 0,
2383 &needed_intregs, &needed_sseregs);
2386 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2387 || TYPE_ALIGN (type) > 128);
2389 /* In case we are passing structure, verify that it is consetuctive block
2390 on the register save area. If not we need to do moves. */
2391 if (!need_temp && !REG_P (container))
2393 /* Verify that all registers are strictly consetuctive */
2394 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2398 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2400 rtx slot = XVECEXP (container, 0, i);
2401 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2402 || INTVAL (XEXP (slot, 1)) != i * 16)
2410 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2412 rtx slot = XVECEXP (container, 0, i);
2413 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2414 || INTVAL (XEXP (slot, 1)) != i * 8)
2421 int_addr_rtx = addr_rtx;
2422 sse_addr_rtx = addr_rtx;
2426 int_addr_rtx = gen_reg_rtx (Pmode);
2427 sse_addr_rtx = gen_reg_rtx (Pmode);
2429 /* First ensure that we fit completely in registers. */
2432 emit_cmp_and_jump_insns (expand_expr
2433 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2434 GEN_INT ((REGPARM_MAX - needed_intregs +
2435 1) * 8), GE, const1_rtx, SImode,
2440 emit_cmp_and_jump_insns (expand_expr
2441 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2442 GEN_INT ((SSE_REGPARM_MAX -
2443 needed_sseregs + 1) * 16 +
2444 REGPARM_MAX * 8), GE, const1_rtx,
2445 SImode, 1, 1, lab_false);
2448 /* Compute index to start of area used for integer regs. */
2451 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2452 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2453 if (r != int_addr_rtx)
2454 emit_move_insn (int_addr_rtx, r);
2458 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2459 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2460 if (r != sse_addr_rtx)
2461 emit_move_insn (sse_addr_rtx, r);
2468 /* Never use the memory itself, as it has the alias set. */
2469 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2470 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2471 set_mem_alias_set (mem, get_varargs_alias_set ());
2472 set_mem_align (mem, BITS_PER_UNIT);
2474 for (i = 0; i < XVECLEN (container, 0); i++)
2476 rtx slot = XVECEXP (container, 0, i);
2477 rtx reg = XEXP (slot, 0);
2478 enum machine_mode mode = GET_MODE (reg);
2484 if (SSE_REGNO_P (REGNO (reg)))
2486 src_addr = sse_addr_rtx;
2487 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2491 src_addr = int_addr_rtx;
2492 src_offset = REGNO (reg) * 8;
2494 src_mem = gen_rtx_MEM (mode, src_addr);
2495 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2496 src_mem = adjust_address (src_mem, mode, src_offset);
2497 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2498 emit_move_insn (dest_mem, src_mem);
2505 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2506 build_int_2 (needed_intregs * 8, 0));
2507 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2508 TREE_SIDE_EFFECTS (t) = 1;
2509 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2514 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2515 build_int_2 (needed_sseregs * 16, 0));
2516 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2517 TREE_SIDE_EFFECTS (t) = 1;
2518 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2521 emit_jump_insn (gen_jump (lab_over));
2523 emit_label (lab_false);
2526 /* ... otherwise out of the overflow area. */
2528 /* Care for on-stack alignment if needed. */
2529 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2533 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2534 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2535 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2539 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2541 emit_move_insn (addr_rtx, r);
2544 build (PLUS_EXPR, TREE_TYPE (t), t,
2545 build_int_2 (rsize * UNITS_PER_WORD, 0));
2546 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2547 TREE_SIDE_EFFECTS (t) = 1;
2548 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2551 emit_label (lab_over);
2556 /* Return nonzero if OP is general operand representable on x86_64. */
2559 x86_64_general_operand (op, mode)
2561 enum machine_mode mode;
2564 return general_operand (op, mode);
2565 if (nonimmediate_operand (op, mode))
2567 return x86_64_sign_extended_value (op);
2570 /* Return nonzero if OP is general operand representable on x86_64
2571 as eighter sign extended or zero extended constant. */
2574 x86_64_szext_general_operand (op, mode)
2576 enum machine_mode mode;
2579 return general_operand (op, mode);
2580 if (nonimmediate_operand (op, mode))
2582 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2585 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2588 x86_64_nonmemory_operand (op, mode)
2590 enum machine_mode mode;
2593 return nonmemory_operand (op, mode);
2594 if (register_operand (op, mode))
2596 return x86_64_sign_extended_value (op);
2599 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2602 x86_64_movabs_operand (op, mode)
2604 enum machine_mode mode;
2606 if (!TARGET_64BIT || !flag_pic)
2607 return nonmemory_operand (op, mode);
2608 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2610 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2615 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2618 x86_64_szext_nonmemory_operand (op, mode)
2620 enum machine_mode mode;
2623 return nonmemory_operand (op, mode);
2624 if (register_operand (op, mode))
2626 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2629 /* Return nonzero if OP is immediate operand representable on x86_64. */
2632 x86_64_immediate_operand (op, mode)
2634 enum machine_mode mode;
2637 return immediate_operand (op, mode);
2638 return x86_64_sign_extended_value (op);
2641 /* Return nonzero if OP is immediate operand representable on x86_64. */
2644 x86_64_zext_immediate_operand (op, mode)
2646 enum machine_mode mode ATTRIBUTE_UNUSED;
2648 return x86_64_zero_extended_value (op);
2651 /* Return nonzero if OP is (const_int 1), else return zero. */
2654 const_int_1_operand (op, mode)
2656 enum machine_mode mode ATTRIBUTE_UNUSED;
2658 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2661 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2662 reference and a constant. */
2665 symbolic_operand (op, mode)
2667 enum machine_mode mode ATTRIBUTE_UNUSED;
2669 switch (GET_CODE (op))
2677 if (GET_CODE (op) == SYMBOL_REF
2678 || GET_CODE (op) == LABEL_REF
2679 || (GET_CODE (op) == UNSPEC
2680 && (XINT (op, 1) == 6
2681 || XINT (op, 1) == 7
2682 || XINT (op, 1) == 15)))
2684 if (GET_CODE (op) != PLUS
2685 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2689 if (GET_CODE (op) == SYMBOL_REF
2690 || GET_CODE (op) == LABEL_REF)
2692 /* Only @GOTOFF gets offsets. */
2693 if (GET_CODE (op) != UNSPEC
2694 || XINT (op, 1) != 7)
2697 op = XVECEXP (op, 0, 0);
2698 if (GET_CODE (op) == SYMBOL_REF
2699 || GET_CODE (op) == LABEL_REF)
2708 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2711 pic_symbolic_operand (op, mode)
2713 enum machine_mode mode ATTRIBUTE_UNUSED;
2715 if (GET_CODE (op) != CONST)
2720 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2725 if (GET_CODE (op) == UNSPEC)
2727 if (GET_CODE (op) != PLUS
2728 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2731 if (GET_CODE (op) == UNSPEC)
2737 /* Return true if OP is a symbolic operand that resolves locally. */
2740 local_symbolic_operand (op, mode)
2742 enum machine_mode mode ATTRIBUTE_UNUSED;
2744 if (GET_CODE (op) == LABEL_REF)
2747 if (GET_CODE (op) == CONST
2748 && GET_CODE (XEXP (op, 0)) == PLUS
2749 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2750 op = XEXP (XEXP (op, 0), 0);
2752 if (GET_CODE (op) != SYMBOL_REF)
2755 /* These we've been told are local by varasm and encode_section_info
2757 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2760 /* There is, however, a not insubstantial body of code in the rest of
2761 the compiler that assumes it can just stick the results of
2762 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2763 /* ??? This is a hack. Should update the body of the compiler to
2764 always create a DECL an invoke ENCODE_SECTION_INFO. */
2765 if (strncmp (XSTR (op, 0), internal_label_prefix,
2766 internal_label_prefix_len) == 0)
2772 /* Test for a valid operand for a call instruction. Don't allow the
2773 arg pointer register or virtual regs since they may decay into
2774 reg + const, which the patterns can't handle. */
2777 call_insn_operand (op, mode)
2779 enum machine_mode mode ATTRIBUTE_UNUSED;
2781 /* Disallow indirect through a virtual register. This leads to
2782 compiler aborts when trying to eliminate them. */
2783 if (GET_CODE (op) == REG
2784 && (op == arg_pointer_rtx
2785 || op == frame_pointer_rtx
2786 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2787 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2790 /* Disallow `call 1234'. Due to varying assembler lameness this
2791 gets either rejected or translated to `call .+1234'. */
2792 if (GET_CODE (op) == CONST_INT)
2795 /* Explicitly allow SYMBOL_REF even if pic. */
2796 if (GET_CODE (op) == SYMBOL_REF)
2799 /* Half-pic doesn't allow anything but registers and constants.
2800 We've just taken care of the later. */
2802 return register_operand (op, Pmode);
2804 /* Otherwise we can allow any general_operand in the address. */
2805 return general_operand (op, Pmode);
2809 constant_call_address_operand (op, mode)
2811 enum machine_mode mode ATTRIBUTE_UNUSED;
2813 if (GET_CODE (op) == CONST
2814 && GET_CODE (XEXP (op, 0)) == PLUS
2815 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2816 op = XEXP (XEXP (op, 0), 0);
2817 return GET_CODE (op) == SYMBOL_REF;
2820 /* Match exactly zero and one. */
2823 const0_operand (op, mode)
2825 enum machine_mode mode;
2827 return op == CONST0_RTX (mode);
2831 const1_operand (op, mode)
2833 enum machine_mode mode ATTRIBUTE_UNUSED;
2835 return op == const1_rtx;
2838 /* Match 2, 4, or 8. Used for leal multiplicands. */
2841 const248_operand (op, mode)
2843 enum machine_mode mode ATTRIBUTE_UNUSED;
2845 return (GET_CODE (op) == CONST_INT
2846 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2849 /* True if this is a constant appropriate for an increment or decremenmt. */
2852 incdec_operand (op, mode)
2854 enum machine_mode mode ATTRIBUTE_UNUSED;
2856 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
2857 registers, since carry flag is not set. */
2858 if (TARGET_PENTIUM4 && !optimize_size)
2860 return op == const1_rtx || op == constm1_rtx;
2863 /* Return nonzero if OP is acceptable as operand of DImode shift
2867 shiftdi_operand (op, mode)
2869 enum machine_mode mode ATTRIBUTE_UNUSED;
2872 return nonimmediate_operand (op, mode);
2874 return register_operand (op, mode);
2877 /* Return false if this is the stack pointer, or any other fake
2878 register eliminable to the stack pointer. Otherwise, this is
2881 This is used to prevent esp from being used as an index reg.
2882 Which would only happen in pathological cases. */
2885 reg_no_sp_operand (op, mode)
2887 enum machine_mode mode;
2890 if (GET_CODE (t) == SUBREG)
2892 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
2895 return register_operand (op, mode);
2899 mmx_reg_operand (op, mode)
2901 enum machine_mode mode ATTRIBUTE_UNUSED;
2903 return MMX_REG_P (op);
2906 /* Return false if this is any eliminable register. Otherwise
2910 general_no_elim_operand (op, mode)
2912 enum machine_mode mode;
2915 if (GET_CODE (t) == SUBREG)
2917 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2918 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2919 || t == virtual_stack_dynamic_rtx)
2922 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
2923 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
2926 return general_operand (op, mode);
2929 /* Return false if this is any eliminable register. Otherwise
2930 register_operand or const_int. */
2933 nonmemory_no_elim_operand (op, mode)
2935 enum machine_mode mode;
2938 if (GET_CODE (t) == SUBREG)
2940 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2941 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2942 || t == virtual_stack_dynamic_rtx)
2945 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
2948 /* Return true if op is a Q_REGS class register. */
2951 q_regs_operand (op, mode)
2953 enum machine_mode mode;
2955 if (mode != VOIDmode && GET_MODE (op) != mode)
2957 if (GET_CODE (op) == SUBREG)
2958 op = SUBREG_REG (op);
2959 return QI_REG_P (op);
2962 /* Return true if op is a NON_Q_REGS class register. */
2965 non_q_regs_operand (op, mode)
2967 enum machine_mode mode;
2969 if (mode != VOIDmode && GET_MODE (op) != mode)
2971 if (GET_CODE (op) == SUBREG)
2972 op = SUBREG_REG (op);
2973 return NON_QI_REG_P (op);
2976 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
2979 sse_comparison_operator (op, mode)
2981 enum machine_mode mode ATTRIBUTE_UNUSED;
2983 enum rtx_code code = GET_CODE (op);
2986 /* Operations supported directly. */
2996 /* These are equivalent to ones above in non-IEEE comparisons. */
3003 return !TARGET_IEEE_FP;
3008 /* Return 1 if OP is a valid comparison operator in valid mode. */
3010 ix86_comparison_operator (op, mode)
3012 enum machine_mode mode;
3014 enum machine_mode inmode;
3015 enum rtx_code code = GET_CODE (op);
3016 if (mode != VOIDmode && GET_MODE (op) != mode)
3018 if (GET_RTX_CLASS (code) != '<')
3020 inmode = GET_MODE (XEXP (op, 0));
3022 if (inmode == CCFPmode || inmode == CCFPUmode)
3024 enum rtx_code second_code, bypass_code;
3025 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3026 return (bypass_code == NIL && second_code == NIL);
3033 if (inmode == CCmode || inmode == CCGCmode
3034 || inmode == CCGOCmode || inmode == CCNOmode)
3037 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3038 if (inmode == CCmode)
3042 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3050 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3053 fcmov_comparison_operator (op, mode)
3055 enum machine_mode mode;
3057 enum machine_mode inmode;
3058 enum rtx_code code = GET_CODE (op);
3059 if (mode != VOIDmode && GET_MODE (op) != mode)
3061 if (GET_RTX_CLASS (code) != '<')
3063 inmode = GET_MODE (XEXP (op, 0));
3064 if (inmode == CCFPmode || inmode == CCFPUmode)
3066 enum rtx_code second_code, bypass_code;
3067 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3068 if (bypass_code != NIL || second_code != NIL)
3070 code = ix86_fp_compare_code_to_integer (code);
3072 /* i387 supports just limited amount of conditional codes. */
3075 case LTU: case GTU: case LEU: case GEU:
3076 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3079 case ORDERED: case UNORDERED:
3087 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3090 promotable_binary_operator (op, mode)
3092 enum machine_mode mode ATTRIBUTE_UNUSED;
3094 switch (GET_CODE (op))
3097 /* Modern CPUs have same latency for HImode and SImode multiply,
3098 but 386 and 486 do HImode multiply faster. */
3099 return ix86_cpu > PROCESSOR_I486;
3111 /* Nearly general operand, but accept any const_double, since we wish
3112 to be able to drop them into memory rather than have them get pulled
3116 cmp_fp_expander_operand (op, mode)
3118 enum machine_mode mode;
3120 if (mode != VOIDmode && mode != GET_MODE (op))
3122 if (GET_CODE (op) == CONST_DOUBLE)
3124 return general_operand (op, mode);
3127 /* Match an SI or HImode register for a zero_extract. */
3130 ext_register_operand (op, mode)
3132 enum machine_mode mode ATTRIBUTE_UNUSED;
3135 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3136 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3139 if (!register_operand (op, VOIDmode))
3142 /* Be curefull to accept only registers having upper parts. */
3143 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3144 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3147 /* Return 1 if this is a valid binary floating-point operation.
3148 OP is the expression matched, and MODE is its mode. */
3151 binary_fp_operator (op, mode)
3153 enum machine_mode mode;
3155 if (mode != VOIDmode && mode != GET_MODE (op))
3158 switch (GET_CODE (op))
3164 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3172 mult_operator(op, mode)
3174 enum machine_mode mode ATTRIBUTE_UNUSED;
3176 return GET_CODE (op) == MULT;
3180 div_operator(op, mode)
3182 enum machine_mode mode ATTRIBUTE_UNUSED;
3184 return GET_CODE (op) == DIV;
3188 arith_or_logical_operator (op, mode)
3190 enum machine_mode mode;
3192 return ((mode == VOIDmode || GET_MODE (op) == mode)
3193 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3194 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3197 /* Returns 1 if OP is memory operand with a displacement. */
3200 memory_displacement_operand (op, mode)
3202 enum machine_mode mode;
3204 struct ix86_address parts;
3206 if (! memory_operand (op, mode))
3209 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3212 return parts.disp != NULL_RTX;
3215 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3216 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3218 ??? It seems likely that this will only work because cmpsi is an
3219 expander, and no actual insns use this. */
3222 cmpsi_operand (op, mode)
3224 enum machine_mode mode;
3226 if (nonimmediate_operand (op, mode))
3229 if (GET_CODE (op) == AND
3230 && GET_MODE (op) == SImode
3231 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3232 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3233 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3234 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3235 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3236 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3242 /* Returns 1 if OP is memory operand that can not be represented by the
3246 long_memory_operand (op, mode)
3248 enum machine_mode mode;
3250 if (! memory_operand (op, mode))
3253 return memory_address_length (op) != 0;
3256 /* Return nonzero if the rtx is known aligned. */
3259 aligned_operand (op, mode)
3261 enum machine_mode mode;
3263 struct ix86_address parts;
3265 if (!general_operand (op, mode))
3268 /* Registers and immediate operands are always "aligned". */
3269 if (GET_CODE (op) != MEM)
3272 /* Don't even try to do any aligned optimizations with volatiles. */
3273 if (MEM_VOLATILE_P (op))
3278 /* Pushes and pops are only valid on the stack pointer. */
3279 if (GET_CODE (op) == PRE_DEC
3280 || GET_CODE (op) == POST_INC)
3283 /* Decode the address. */
3284 if (! ix86_decompose_address (op, &parts))
3287 /* Look for some component that isn't known to be aligned. */
3291 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3296 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3301 if (GET_CODE (parts.disp) != CONST_INT
3302 || (INTVAL (parts.disp) & 3) != 0)
3306 /* Didn't find one -- this must be an aligned address. */
3310 /* Return true if the constant is something that can be loaded with
3311 a special instruction. Only handle 0.0 and 1.0; others are less
3315 standard_80387_constant_p (x)
3318 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3320 /* Note that on the 80387, other constants, such as pi, that we should support
3321 too. On some machines, these are much slower to load as standard constant,
3322 than to load from doubles in memory. */
3323 if (x == CONST0_RTX (GET_MODE (x)))
3325 if (x == CONST1_RTX (GET_MODE (x)))
3330 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3333 standard_sse_constant_p (x)
3336 if (GET_CODE (x) != CONST_DOUBLE)
3338 return (x == CONST0_RTX (GET_MODE (x)));
3341 /* Returns 1 if OP contains a symbol reference */
3344 symbolic_reference_mentioned_p (op)
3347 register const char *fmt;
3350 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3353 fmt = GET_RTX_FORMAT (GET_CODE (op));
3354 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3360 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3361 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3365 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3372 /* Return 1 if it is appropriate to emit `ret' instructions in the
3373 body of a function. Do this only if the epilogue is simple, needing a
3374 couple of insns. Prior to reloading, we can't tell how many registers
3375 must be saved, so return 0 then. Return 0 if there is no frame
3376 marker to de-allocate.
3378 If NON_SAVING_SETJMP is defined and true, then it is not possible
3379 for the epilogue to be simple, so return 0. This is a special case
3380 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3381 until final, but jump_optimize may need to know sooner if a
3385 ix86_can_use_return_insn_p ()
3387 struct ix86_frame frame;
3389 #ifdef NON_SAVING_SETJMP
3390 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3393 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3394 if (profile_block_flag == 2)
3398 if (! reload_completed || frame_pointer_needed)
3401 /* Don't allow more than 32 pop, since that's all we can do
3402 with one instruction. */
3403 if (current_function_pops_args
3404 && current_function_args_size >= 32768)
3407 ix86_compute_frame_layout (&frame);
3408 return frame.to_allocate == 0 && frame.nregs == 0;
3411 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3413 x86_64_sign_extended_value (value)
3416 switch (GET_CODE (value))
3418 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3419 to be at least 32 and this all acceptable constants are
3420 represented as CONST_INT. */
3422 if (HOST_BITS_PER_WIDE_INT == 32)
3426 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3427 return trunc_int_for_mode (val, SImode) == val;
3431 /* For certain code models, the symbolic references are known to fit. */
3433 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3435 /* For certain code models, the code is near as well. */
3437 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3439 /* We also may accept the offsetted memory references in certain special
3442 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3443 && XVECLEN (XEXP (value, 0), 0) == 1
3444 && XINT (XEXP (value, 0), 1) == 15)
3446 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3448 rtx op1 = XEXP (XEXP (value, 0), 0);
3449 rtx op2 = XEXP (XEXP (value, 0), 1);
3450 HOST_WIDE_INT offset;
3452 if (ix86_cmodel == CM_LARGE)
3454 if (GET_CODE (op2) != CONST_INT)
3456 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3457 switch (GET_CODE (op1))
3460 /* For CM_SMALL assume that latest object is 1MB before
3461 end of 31bits boundary. We may also accept pretty
3462 large negative constants knowing that all objects are
3463 in the positive half of address space. */
3464 if (ix86_cmodel == CM_SMALL
3465 && offset < 1024*1024*1024
3466 && trunc_int_for_mode (offset, SImode) == offset)
3468 /* For CM_KERNEL we know that all object resist in the
3469 negative half of 32bits address space. We may not
3470 accept negative offsets, since they may be just off
3471 and we may accept pretty large possitive ones. */
3472 if (ix86_cmodel == CM_KERNEL
3474 && trunc_int_for_mode (offset, SImode) == offset)
3478 /* These conditions are similar to SYMBOL_REF ones, just the
3479 constraints for code models differ. */
3480 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3481 && offset < 1024*1024*1024
3482 && trunc_int_for_mode (offset, SImode) == offset)
3484 if (ix86_cmodel == CM_KERNEL
3486 && trunc_int_for_mode (offset, SImode) == offset)
3499 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3501 x86_64_zero_extended_value (value)
3504 switch (GET_CODE (value))
3507 if (HOST_BITS_PER_WIDE_INT == 32)
3508 return (GET_MODE (value) == VOIDmode
3509 && !CONST_DOUBLE_HIGH (value));
3513 if (HOST_BITS_PER_WIDE_INT == 32)
3514 return INTVAL (value) >= 0;
3516 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3519 /* For certain code models, the symbolic references are known to fit. */
3521 return ix86_cmodel == CM_SMALL;
3523 /* For certain code models, the code is near as well. */
3525 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3527 /* We also may accept the offsetted memory references in certain special
3530 if (GET_CODE (XEXP (value, 0)) == PLUS)
3532 rtx op1 = XEXP (XEXP (value, 0), 0);
3533 rtx op2 = XEXP (XEXP (value, 0), 1);
3535 if (ix86_cmodel == CM_LARGE)
3537 switch (GET_CODE (op1))
3541 /* For small code model we may accept pretty large possitive
3542 offsets, since one bit is available for free. Negative
3543 offsets are limited by the size of NULL pointer area
3544 specified by the ABI. */
3545 if (ix86_cmodel == CM_SMALL
3546 && GET_CODE (op2) == CONST_INT
3547 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3548 && (trunc_int_for_mode (INTVAL (op2), SImode)
3551 /* ??? For the kernel, we may accept adjustment of
3552 -0x10000000, since we know that it will just convert
3553 negative address space to possitive, but perhaps this
3554 is not worthwhile. */
3557 /* These conditions are similar to SYMBOL_REF ones, just the
3558 constraints for code models differ. */
3559 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3560 && GET_CODE (op2) == CONST_INT
3561 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3562 && (trunc_int_for_mode (INTVAL (op2), SImode)
3576 /* Value should be nonzero if functions must have frame pointers.
3577 Zero means the frame pointer need not be set up (and parms may
3578 be accessed via the stack pointer) in functions that seem suitable. */
3581 ix86_frame_pointer_required ()
3583 /* If we accessed previous frames, then the generated code expects
3584 to be able to access the saved ebp value in our frame. */
3585 if (cfun->machine->accesses_prev_frame)
3588 /* Several x86 os'es need a frame pointer for other reasons,
3589 usually pertaining to setjmp. */
3590 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3593 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3594 the frame pointer by default. Turn it back on now if we've not
3595 got a leaf function. */
3596 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3602 /* Record that the current function accesses previous call frames. */
3605 ix86_setup_frame_addresses ()
3607 cfun->machine->accesses_prev_frame = 1;
3610 static char pic_label_name[32];
3612 /* This function generates code for -fpic that loads %ebx with
3613 the return address of the caller and then returns. */
3616 ix86_asm_file_end (file)
3621 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3624 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3625 to updating relocations to a section being discarded such that this
3626 doesn't work. Ought to detect this at configure time. */
3628 /* The trick here is to create a linkonce section containing the
3629 pic label thunk, but to refer to it with an internal label.
3630 Because the label is internal, we don't have inter-dso name
3631 binding issues on hosts that don't support ".hidden".
3633 In order to use these macros, however, we must create a fake
3635 if (targetm.have_named_sections)
3637 tree decl = build_decl (FUNCTION_DECL,
3638 get_identifier ("i686.get_pc_thunk"),
3640 DECL_ONE_ONLY (decl) = 1;
3641 UNIQUE_SECTION (decl, 0);
3642 named_section (decl, NULL);
3649 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3650 internal (non-global) label that's being emitted, it didn't make
3651 sense to have .type information for local labels. This caused
3652 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3653 me debug info for a label that you're declaring non-global?) this
3654 was changed to call ASM_OUTPUT_LABEL() instead. */
3656 ASM_OUTPUT_LABEL (file, pic_label_name);
3658 xops[0] = pic_offset_table_rtx;
3659 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3660 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3661 output_asm_insn ("ret", xops);
3665 load_pic_register ()
3672 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3674 if (TARGET_DEEP_BRANCH_PREDICTION)
3676 if (! pic_label_name[0])
3677 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3678 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3682 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3685 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3687 if (! TARGET_DEEP_BRANCH_PREDICTION)
3688 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3690 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3693 /* Generate an "push" pattern for input ARG. */
3699 return gen_rtx_SET (VOIDmode,
3701 gen_rtx_PRE_DEC (Pmode,
3702 stack_pointer_rtx)),
3706 /* Return 1 if we need to save REGNO. */
3708 ix86_save_reg (regno, maybe_eh_return)
3710 int maybe_eh_return;
3714 && regno == PIC_OFFSET_TABLE_REGNUM
3715 && (current_function_uses_pic_offset_table
3716 || current_function_uses_const_pool
3717 || current_function_calls_eh_return))
3720 if (current_function_calls_eh_return && maybe_eh_return)
3725 unsigned test = EH_RETURN_DATA_REGNO(i);
3726 if (test == INVALID_REGNUM)
3728 if (test == (unsigned) regno)
3733 return (regs_ever_live[regno]
3734 && !call_used_regs[regno]
3735 && !fixed_regs[regno]
3736 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3739 /* Return number of registers to be saved on the stack. */
3747 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3748 if (ix86_save_reg (regno, true))
3753 /* Return the offset between two registers, one to be eliminated, and the other
3754 its replacement, at the start of a routine. */
3757 ix86_initial_elimination_offset (from, to)
3761 struct ix86_frame frame;
3762 ix86_compute_frame_layout (&frame);
3764 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3765 return frame.hard_frame_pointer_offset;
3766 else if (from == FRAME_POINTER_REGNUM
3767 && to == HARD_FRAME_POINTER_REGNUM)
3768 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3771 if (to != STACK_POINTER_REGNUM)
3773 else if (from == ARG_POINTER_REGNUM)
3774 return frame.stack_pointer_offset;
3775 else if (from != FRAME_POINTER_REGNUM)
3778 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3782 /* Fill structure ix86_frame about frame of currently computed function. */
3785 ix86_compute_frame_layout (frame)
3786 struct ix86_frame *frame;
3788 HOST_WIDE_INT total_size;
3789 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3791 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3792 HOST_WIDE_INT size = get_frame_size ();
3794 frame->nregs = ix86_nsaved_regs ();
3797 /* Skip return value and save base pointer. */
3798 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3800 frame->hard_frame_pointer_offset = offset;
3802 /* Do some sanity checking of stack_alignment_needed and
3803 preferred_alignment, since i386 port is the only using those features
3804 that may break easily. */
3806 if (size && !stack_alignment_needed)
3808 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3810 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3812 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3815 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3816 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3818 /* Register save area */
3819 offset += frame->nregs * UNITS_PER_WORD;
3822 if (ix86_save_varrargs_registers)
3824 offset += X86_64_VARARGS_SIZE;
3825 frame->va_arg_size = X86_64_VARARGS_SIZE;
3828 frame->va_arg_size = 0;
3830 /* Align start of frame for local function. */
3831 frame->padding1 = ((offset + stack_alignment_needed - 1)
3832 & -stack_alignment_needed) - offset;
3834 offset += frame->padding1;
3836 /* Frame pointer points here. */
3837 frame->frame_pointer_offset = offset;
3841 /* Add outgoing arguments area. */
3842 if (ACCUMULATE_OUTGOING_ARGS)
3844 offset += current_function_outgoing_args_size;
3845 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3848 frame->outgoing_arguments_size = 0;
3850 /* Align stack boundary. */
3851 frame->padding2 = ((offset + preferred_alignment - 1)
3852 & -preferred_alignment) - offset;
3854 offset += frame->padding2;
3856 /* We've reached end of stack frame. */
3857 frame->stack_pointer_offset = offset;
3859 /* Size prologue needs to allocate. */
3860 frame->to_allocate =
3861 (size + frame->padding1 + frame->padding2
3862 + frame->outgoing_arguments_size + frame->va_arg_size);
3864 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
3865 && current_function_is_leaf)
3867 frame->red_zone_size = frame->to_allocate;
3868 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
3869 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
3872 frame->red_zone_size = 0;
3873 frame->to_allocate -= frame->red_zone_size;
3874 frame->stack_pointer_offset -= frame->red_zone_size;
3876 fprintf (stderr, "nregs: %i\n", frame->nregs);
3877 fprintf (stderr, "size: %i\n", size);
3878 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
3879 fprintf (stderr, "padding1: %i\n", frame->padding1);
3880 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
3881 fprintf (stderr, "padding2: %i\n", frame->padding2);
3882 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
3883 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
3884 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
3885 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
3886 frame->hard_frame_pointer_offset);
3887 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
3891 /* Emit code to save registers in the prologue. */
3894 ix86_emit_save_regs ()
3899 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3900 if (ix86_save_reg (regno, true))
3902 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
3903 RTX_FRAME_RELATED_P (insn) = 1;
3907 /* Emit code to save registers using MOV insns. First register
3908 is restored from POINTER + OFFSET. */
3910 ix86_emit_save_regs_using_mov (pointer, offset)
3912 HOST_WIDE_INT offset;
3917 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3918 if (ix86_save_reg (regno, true))
3920 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
3922 gen_rtx_REG (Pmode, regno));
3923 RTX_FRAME_RELATED_P (insn) = 1;
3924 offset += UNITS_PER_WORD;
3928 /* Expand the prologue into a bunch of separate insns. */
3931 ix86_expand_prologue ()
3934 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
3935 || current_function_uses_const_pool)
3937 struct ix86_frame frame;
3939 HOST_WIDE_INT allocate;
3943 use_fast_prologue_epilogue
3944 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
3945 if (TARGET_PROLOGUE_USING_MOVE)
3946 use_mov = use_fast_prologue_epilogue;
3948 ix86_compute_frame_layout (&frame);
3950 /* Note: AT&T enter does NOT have reversed args. Enter is probably
3951 slower on all targets. Also sdb doesn't like it. */
3953 if (frame_pointer_needed)
3955 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
3956 RTX_FRAME_RELATED_P (insn) = 1;
3958 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3959 RTX_FRAME_RELATED_P (insn) = 1;
3962 allocate = frame.to_allocate;
3963 /* In case we are dealing only with single register and empty frame,
3964 push is equivalent of the mov+add sequence. */
3965 if (allocate == 0 && frame.nregs <= 1)
3969 ix86_emit_save_regs ();
3971 allocate += frame.nregs * UNITS_PER_WORD;
3975 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
3977 insn = emit_insn (gen_pro_epilogue_adjust_stack
3978 (stack_pointer_rtx, stack_pointer_rtx,
3979 GEN_INT (-allocate)));
3980 RTX_FRAME_RELATED_P (insn) = 1;
3984 /* ??? Is this only valid for Win32? */
3991 arg0 = gen_rtx_REG (SImode, 0);
3992 emit_move_insn (arg0, GEN_INT (allocate));
3994 sym = gen_rtx_MEM (FUNCTION_MODE,
3995 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
3996 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
3998 CALL_INSN_FUNCTION_USAGE (insn)
3999 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4000 CALL_INSN_FUNCTION_USAGE (insn));
4004 if (!frame_pointer_needed || !frame.to_allocate)
4005 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4007 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4008 -frame.nregs * UNITS_PER_WORD);
4011 #ifdef SUBTARGET_PROLOGUE
4016 load_pic_register ();
4018 /* If we are profiling, make sure no instructions are scheduled before
4019 the call to mcount. However, if -fpic, the above call will have
4021 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
4022 emit_insn (gen_blockage ());
4025 /* Emit code to restore saved registers using MOV insns. First register
4026 is restored from POINTER + OFFSET. */
4028 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4031 int maybe_eh_return;
4035 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4036 if (ix86_save_reg (regno, maybe_eh_return))
4038 emit_move_insn (gen_rtx_REG (Pmode, regno),
4039 adjust_address (gen_rtx_MEM (Pmode, pointer),
4041 offset += UNITS_PER_WORD;
4045 /* Restore function stack, frame, and registers. */
4048 ix86_expand_epilogue (style)
4052 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4053 struct ix86_frame frame;
4054 HOST_WIDE_INT offset;
4056 ix86_compute_frame_layout (&frame);
4058 /* Calculate start of saved registers relative to ebp. Special care
4059 must be taken for the normal return case of a function using
4060 eh_return: the eax and edx registers are marked as saved, but not
4061 restored along this path. */
4062 offset = frame.nregs;
4063 if (current_function_calls_eh_return && style != 2)
4065 offset *= -UNITS_PER_WORD;
4067 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
4068 if (profile_block_flag == 2)
4070 FUNCTION_BLOCK_PROFILER_EXIT;
4074 /* If we're only restoring one register and sp is not valid then
4075 using a move instruction to restore the register since it's
4076 less work than reloading sp and popping the register.
4078 The default code result in stack adjustment using add/lea instruction,
4079 while this code results in LEAVE instruction (or discrete equivalent),
4080 so it is profitable in some other cases as well. Especially when there
4081 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4082 and there is exactly one register to pop. This heruistic may need some
4083 tuning in future. */
4084 if ((!sp_valid && frame.nregs <= 1)
4085 || (TARGET_EPILOGUE_USING_MOVE
4086 && use_fast_prologue_epilogue
4087 && (frame.nregs > 1 || frame.to_allocate))
4088 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4089 || (frame_pointer_needed && TARGET_USE_LEAVE
4090 && use_fast_prologue_epilogue && frame.nregs == 1)
4091 || current_function_calls_eh_return)
4093 /* Restore registers. We can use ebp or esp to address the memory
4094 locations. If both are available, default to ebp, since offsets
4095 are known to be small. Only exception is esp pointing directly to the
4096 end of block of saved registers, where we may simplify addressing
4099 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4100 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4101 frame.to_allocate, style == 2);
4103 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4104 offset, style == 2);
4106 /* eh_return epilogues need %ecx added to the stack pointer. */
4109 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4111 if (frame_pointer_needed)
4113 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4114 tmp = plus_constant (tmp, UNITS_PER_WORD);
4115 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4117 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4118 emit_move_insn (hard_frame_pointer_rtx, tmp);
4120 emit_insn (gen_pro_epilogue_adjust_stack
4121 (stack_pointer_rtx, sa, const0_rtx));
4125 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4126 tmp = plus_constant (tmp, (frame.to_allocate
4127 + frame.nregs * UNITS_PER_WORD));
4128 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4131 else if (!frame_pointer_needed)
4132 emit_insn (gen_pro_epilogue_adjust_stack
4133 (stack_pointer_rtx, stack_pointer_rtx,
4134 GEN_INT (frame.to_allocate
4135 + frame.nregs * UNITS_PER_WORD)));
4136 /* If not an i386, mov & pop is faster than "leave". */
4137 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4138 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4141 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4142 hard_frame_pointer_rtx,
4145 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4147 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4152 /* First step is to deallocate the stack frame so that we can
4153 pop the registers. */
4156 if (!frame_pointer_needed)
4158 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4159 hard_frame_pointer_rtx,
4162 else if (frame.to_allocate)
4163 emit_insn (gen_pro_epilogue_adjust_stack
4164 (stack_pointer_rtx, stack_pointer_rtx,
4165 GEN_INT (frame.to_allocate)));
4167 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4168 if (ix86_save_reg (regno, false))
4171 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4173 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4175 if (frame_pointer_needed)
4177 /* Leave results in shorter depdendancy chains on CPUs that are
4178 able to grok it fast. */
4179 if (TARGET_USE_LEAVE)
4180 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4181 else if (TARGET_64BIT)
4182 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4184 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4188 /* Sibcall epilogues don't want a return instruction. */
4192 if (current_function_pops_args && current_function_args_size)
4194 rtx popc = GEN_INT (current_function_pops_args);
4196 /* i386 can only pop 64K bytes. If asked to pop more, pop
4197 return address, do explicit add, and jump indirectly to the
4200 if (current_function_pops_args >= 65536)
4202 rtx ecx = gen_rtx_REG (SImode, 2);
4204 /* There are is no "pascal" calling convention in 64bit ABI. */
4208 emit_insn (gen_popsi1 (ecx));
4209 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4210 emit_jump_insn (gen_return_indirect_internal (ecx));
4213 emit_jump_insn (gen_return_pop_internal (popc));
4216 emit_jump_insn (gen_return_internal ());
4219 /* Extract the parts of an RTL expression that is a valid memory address
4220 for an instruction. Return false if the structure of the address is
4224 ix86_decompose_address (addr, out)
4226 struct ix86_address *out;
4228 rtx base = NULL_RTX;
4229 rtx index = NULL_RTX;
4230 rtx disp = NULL_RTX;
4231 HOST_WIDE_INT scale = 1;
4232 rtx scale_rtx = NULL_RTX;
4234 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4236 else if (GET_CODE (addr) == PLUS)
4238 rtx op0 = XEXP (addr, 0);
4239 rtx op1 = XEXP (addr, 1);
4240 enum rtx_code code0 = GET_CODE (op0);
4241 enum rtx_code code1 = GET_CODE (op1);
4243 if (code0 == REG || code0 == SUBREG)
4245 if (code1 == REG || code1 == SUBREG)
4246 index = op0, base = op1; /* index + base */
4248 base = op0, disp = op1; /* base + displacement */
4250 else if (code0 == MULT)
4252 index = XEXP (op0, 0);
4253 scale_rtx = XEXP (op0, 1);
4254 if (code1 == REG || code1 == SUBREG)
4255 base = op1; /* index*scale + base */
4257 disp = op1; /* index*scale + disp */
4259 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4261 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4262 scale_rtx = XEXP (XEXP (op0, 0), 1);
4263 base = XEXP (op0, 1);
4266 else if (code0 == PLUS)
4268 index = XEXP (op0, 0); /* index + base + disp */
4269 base = XEXP (op0, 1);
4275 else if (GET_CODE (addr) == MULT)
4277 index = XEXP (addr, 0); /* index*scale */
4278 scale_rtx = XEXP (addr, 1);
4280 else if (GET_CODE (addr) == ASHIFT)
4284 /* We're called for lea too, which implements ashift on occasion. */
4285 index = XEXP (addr, 0);
4286 tmp = XEXP (addr, 1);
4287 if (GET_CODE (tmp) != CONST_INT)
4289 scale = INTVAL (tmp);
4290 if ((unsigned HOST_WIDE_INT) scale > 3)
4295 disp = addr; /* displacement */
4297 /* Extract the integral value of scale. */
4300 if (GET_CODE (scale_rtx) != CONST_INT)
4302 scale = INTVAL (scale_rtx);
4305 /* Allow arg pointer and stack pointer as index if there is not scaling */
4306 if (base && index && scale == 1
4307 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4308 || index == stack_pointer_rtx))
4315 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4316 if ((base == hard_frame_pointer_rtx
4317 || base == frame_pointer_rtx
4318 || base == arg_pointer_rtx) && !disp)
4321 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4322 Avoid this by transforming to [%esi+0]. */
4323 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4324 && base && !index && !disp
4326 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4329 /* Special case: encode reg+reg instead of reg*2. */
4330 if (!base && index && scale && scale == 2)
4331 base = index, scale = 1;
4333 /* Special case: scaling cannot be encoded without base or displacement. */
4334 if (!base && !disp && index && scale != 1)
4345 /* Return cost of the memory address x.
4346 For i386, it is better to use a complex address than let gcc copy
4347 the address into a reg and make a new pseudo. But not if the address
4348 requires to two regs - that would mean more pseudos with longer
4351 ix86_address_cost (x)
4354 struct ix86_address parts;
4357 if (!ix86_decompose_address (x, &parts))
4360 /* More complex memory references are better. */
4361 if (parts.disp && parts.disp != const0_rtx)
4364 /* Attempt to minimize number of registers in the address. */
4366 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4368 && (!REG_P (parts.index)
4369 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4373 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4375 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4376 && parts.base != parts.index)
4379 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4380 since it's predecode logic can't detect the length of instructions
4381 and it degenerates to vector decoded. Increase cost of such
4382 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4383 to split such addresses or even refuse such addresses at all.
4385 Following addressing modes are affected:
4390 The first and last case may be avoidable by explicitly coding the zero in
4391 memory address, but I don't have AMD-K6 machine handy to check this
4395 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4396 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4397 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4403 /* If X is a machine specific address (i.e. a symbol or label being
4404 referenced as a displacement from the GOT implemented using an
4405 UNSPEC), then return the base term. Otherwise return X. */
4408 ix86_find_base_term (x)
4415 if (GET_CODE (x) != CONST)
4418 if (GET_CODE (term) == PLUS
4419 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4420 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4421 term = XEXP (term, 0);
4422 if (GET_CODE (term) != UNSPEC
4423 || XVECLEN (term, 0) != 1
4424 || XINT (term, 1) != 15)
4427 term = XVECEXP (term, 0, 0);
4429 if (GET_CODE (term) != SYMBOL_REF
4430 && GET_CODE (term) != LABEL_REF)
4436 if (GET_CODE (x) != PLUS
4437 || XEXP (x, 0) != pic_offset_table_rtx
4438 || GET_CODE (XEXP (x, 1)) != CONST)
4441 term = XEXP (XEXP (x, 1), 0);
4443 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4444 term = XEXP (term, 0);
4446 if (GET_CODE (term) != UNSPEC
4447 || XVECLEN (term, 0) != 1
4448 || XINT (term, 1) != 7)
4451 term = XVECEXP (term, 0, 0);
4453 if (GET_CODE (term) != SYMBOL_REF
4454 && GET_CODE (term) != LABEL_REF)
4460 /* Determine if a given CONST RTX is a valid memory displacement
4464 legitimate_pic_address_disp_p (disp)
4467 /* In 64bit mode we can allow direct addresses of symbols and labels
4468 when they are not dynamic symbols. */
4472 if (GET_CODE (disp) == CONST)
4474 /* ??? Handle PIC code models */
4475 if (GET_CODE (x) == PLUS
4476 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4477 && ix86_cmodel == CM_SMALL_PIC
4478 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4479 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4481 if (local_symbolic_operand (x, Pmode))
4484 if (GET_CODE (disp) != CONST)
4486 disp = XEXP (disp, 0);
4490 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4491 of GOT tables. We should not need these anyway. */
4492 if (GET_CODE (disp) != UNSPEC
4493 || XVECLEN (disp, 0) != 1
4494 || XINT (disp, 1) != 15)
4497 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4498 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4503 if (GET_CODE (disp) == PLUS)
4505 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4507 disp = XEXP (disp, 0);
4510 if (GET_CODE (disp) != UNSPEC
4511 || XVECLEN (disp, 0) != 1)
4514 /* Must be @GOT or @GOTOFF. */
4515 switch (XINT (disp, 1))
4518 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4520 case 7: /* @GOTOFF */
4521 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4527 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4528 memory address for an instruction. The MODE argument is the machine mode
4529 for the MEM expression that wants to use this address.
4531 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4532 convert common non-canonical forms to canonical form so that they will
4536 legitimate_address_p (mode, addr, strict)
4537 enum machine_mode mode;
4541 struct ix86_address parts;
4542 rtx base, index, disp;
4543 HOST_WIDE_INT scale;
4544 const char *reason = NULL;
4545 rtx reason_rtx = NULL_RTX;
4547 if (TARGET_DEBUG_ADDR)
4550 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4551 GET_MODE_NAME (mode), strict);
4555 if (! ix86_decompose_address (addr, &parts))
4557 reason = "decomposition failed";
4562 index = parts.index;
4564 scale = parts.scale;
4566 /* Validate base register.
4568 Don't allow SUBREG's here, it can lead to spill failures when the base
4569 is one word out of a two word structure, which is represented internally
4576 if (GET_CODE (base) != REG)
4578 reason = "base is not a register";
4582 if (GET_MODE (base) != Pmode)
4584 reason = "base is not in Pmode";
4588 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4589 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4591 reason = "base is not valid";
4596 /* Validate index register.
4598 Don't allow SUBREG's here, it can lead to spill failures when the index
4599 is one word out of a two word structure, which is represented internally
4606 if (GET_CODE (index) != REG)
4608 reason = "index is not a register";
4612 if (GET_MODE (index) != Pmode)
4614 reason = "index is not in Pmode";
4618 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4619 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4621 reason = "index is not valid";
4626 /* Validate scale factor. */
4629 reason_rtx = GEN_INT (scale);
4632 reason = "scale without index";
4636 if (scale != 2 && scale != 4 && scale != 8)
4638 reason = "scale is not a valid multiplier";
4643 /* Validate displacement. */
4648 if (!CONSTANT_ADDRESS_P (disp))
4650 reason = "displacement is not constant";
4656 if (!x86_64_sign_extended_value (disp))
4658 reason = "displacement is out of range";
4664 if (GET_CODE (disp) == CONST_DOUBLE)
4666 reason = "displacement is a const_double";
4671 if (flag_pic && SYMBOLIC_CONST (disp))
4673 if (TARGET_64BIT && (index || base))
4675 reason = "non-constant pic memory reference";
4678 if (! legitimate_pic_address_disp_p (disp))
4680 reason = "displacement is an invalid pic construct";
4684 /* This code used to verify that a symbolic pic displacement
4685 includes the pic_offset_table_rtx register.
4687 While this is good idea, unfortunately these constructs may
4688 be created by "adds using lea" optimization for incorrect
4697 This code is nonsensical, but results in addressing
4698 GOT table with pic_offset_table_rtx base. We can't
4699 just refuse it easily, since it gets matched by
4700 "addsi3" pattern, that later gets split to lea in the
4701 case output register differs from input. While this
4702 can be handled by separate addsi pattern for this case
4703 that never results in lea, this seems to be easier and
4704 correct fix for crash to disable this test. */
4706 else if (HALF_PIC_P ())
4708 if (! HALF_PIC_ADDRESS_P (disp)
4709 || (base != NULL_RTX || index != NULL_RTX))
4711 reason = "displacement is an invalid half-pic reference";
4717 /* Everything looks valid. */
4718 if (TARGET_DEBUG_ADDR)
4719 fprintf (stderr, "Success.\n");
4723 if (TARGET_DEBUG_ADDR)
4725 fprintf (stderr, "Error: %s\n", reason);
4726 debug_rtx (reason_rtx);
4731 /* Return an unique alias set for the GOT. */
4733 static HOST_WIDE_INT
4734 ix86_GOT_alias_set ()
4736 static HOST_WIDE_INT set = -1;
4738 set = new_alias_set ();
4742 /* Return a legitimate reference for ORIG (an address) using the
4743 register REG. If REG is 0, a new pseudo is generated.
4745 There are two types of references that must be handled:
4747 1. Global data references must load the address from the GOT, via
4748 the PIC reg. An insn is emitted to do this load, and the reg is
4751 2. Static data references, constant pool addresses, and code labels
4752 compute the address as an offset from the GOT, whose base is in
4753 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4754 differentiate them from global data objects. The returned
4755 address is the PIC reg + an unspec constant.
4757 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4758 reg also appears in the address. */
4761 legitimize_pic_address (orig, reg)
4769 if (local_symbolic_operand (addr, Pmode))
4771 /* In 64bit mode we can address such objects directly. */
4776 /* This symbol may be referenced via a displacement from the PIC
4777 base address (@GOTOFF). */
4779 current_function_uses_pic_offset_table = 1;
4780 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4781 new = gen_rtx_CONST (Pmode, new);
4782 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4786 emit_move_insn (reg, new);
4791 else if (GET_CODE (addr) == SYMBOL_REF)
4795 current_function_uses_pic_offset_table = 1;
4796 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4797 new = gen_rtx_CONST (Pmode, new);
4798 new = gen_rtx_MEM (Pmode, new);
4799 RTX_UNCHANGING_P (new) = 1;
4800 set_mem_alias_set (new, ix86_GOT_alias_set ());
4803 reg = gen_reg_rtx (Pmode);
4804 /* Use directly gen_movsi, otherwise the address is loaded
4805 into register for CSE. We don't want to CSE this addresses,
4806 instead we CSE addresses from the GOT table, so skip this. */
4807 emit_insn (gen_movsi (reg, new));
4812 /* This symbol must be referenced via a load from the
4813 Global Offset Table (@GOT). */
4815 current_function_uses_pic_offset_table = 1;
4816 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4817 new = gen_rtx_CONST (Pmode, new);
4818 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4819 new = gen_rtx_MEM (Pmode, new);
4820 RTX_UNCHANGING_P (new) = 1;
4821 set_mem_alias_set (new, ix86_GOT_alias_set ());
4824 reg = gen_reg_rtx (Pmode);
4825 emit_move_insn (reg, new);
4831 if (GET_CODE (addr) == CONST)
4833 addr = XEXP (addr, 0);
4834 if (GET_CODE (addr) == UNSPEC)
4836 /* Check that the unspec is one of the ones we generate? */
4838 else if (GET_CODE (addr) != PLUS)
4841 if (GET_CODE (addr) == PLUS)
4843 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4845 /* Check first to see if this is a constant offset from a @GOTOFF
4846 symbol reference. */
4847 if (local_symbolic_operand (op0, Pmode)
4848 && GET_CODE (op1) == CONST_INT)
4852 current_function_uses_pic_offset_table = 1;
4853 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4854 new = gen_rtx_PLUS (Pmode, new, op1);
4855 new = gen_rtx_CONST (Pmode, new);
4856 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4860 emit_move_insn (reg, new);
4866 /* ??? We need to limit offsets here. */
4871 base = legitimize_pic_address (XEXP (addr, 0), reg);
4872 new = legitimize_pic_address (XEXP (addr, 1),
4873 base == reg ? NULL_RTX : reg);
4875 if (GET_CODE (new) == CONST_INT)
4876 new = plus_constant (base, INTVAL (new));
4879 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
4881 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
4882 new = XEXP (new, 1);
4884 new = gen_rtx_PLUS (Pmode, base, new);
4892 /* Try machine-dependent ways of modifying an illegitimate address
4893 to be legitimate. If we find one, return the new, valid address.
4894 This macro is used in only one place: `memory_address' in explow.c.
4896 OLDX is the address as it was before break_out_memory_refs was called.
4897 In some cases it is useful to look at this to decide what needs to be done.
4899 MODE and WIN are passed so that this macro can use
4900 GO_IF_LEGITIMATE_ADDRESS.
4902 It is always safe for this macro to do nothing. It exists to recognize
4903 opportunities to optimize the output.
4905 For the 80386, we handle X+REG by loading X into a register R and
4906 using R+REG. R will go in a general reg and indexing will be used.
4907 However, if REG is a broken-out memory address or multiplication,
4908 nothing needs to be done because REG can certainly go in a general reg.
4910 When -fpic is used, special handling is needed for symbolic references.
4911 See comments by legitimize_pic_address in i386.c for details. */
4914 legitimize_address (x, oldx, mode)
4916 register rtx oldx ATTRIBUTE_UNUSED;
4917 enum machine_mode mode;
4922 if (TARGET_DEBUG_ADDR)
4924 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
4925 GET_MODE_NAME (mode));
4929 if (flag_pic && SYMBOLIC_CONST (x))
4930 return legitimize_pic_address (x, 0);
4932 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
4933 if (GET_CODE (x) == ASHIFT
4934 && GET_CODE (XEXP (x, 1)) == CONST_INT
4935 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
4938 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
4939 GEN_INT (1 << log));
4942 if (GET_CODE (x) == PLUS)
4944 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
4946 if (GET_CODE (XEXP (x, 0)) == ASHIFT
4947 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4948 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
4951 XEXP (x, 0) = gen_rtx_MULT (Pmode,
4952 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
4953 GEN_INT (1 << log));
4956 if (GET_CODE (XEXP (x, 1)) == ASHIFT
4957 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4958 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
4961 XEXP (x, 1) = gen_rtx_MULT (Pmode,
4962 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
4963 GEN_INT (1 << log));
4966 /* Put multiply first if it isn't already. */
4967 if (GET_CODE (XEXP (x, 1)) == MULT)
4969 rtx tmp = XEXP (x, 0);
4970 XEXP (x, 0) = XEXP (x, 1);
4975 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
4976 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
4977 created by virtual register instantiation, register elimination, and
4978 similar optimizations. */
4979 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
4982 x = gen_rtx_PLUS (Pmode,
4983 gen_rtx_PLUS (Pmode, XEXP (x, 0),
4984 XEXP (XEXP (x, 1), 0)),
4985 XEXP (XEXP (x, 1), 1));
4989 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
4990 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
4991 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
4992 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4993 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
4994 && CONSTANT_P (XEXP (x, 1)))
4997 rtx other = NULL_RTX;
4999 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5001 constant = XEXP (x, 1);
5002 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5004 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5006 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5007 other = XEXP (x, 1);
5015 x = gen_rtx_PLUS (Pmode,
5016 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5017 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5018 plus_constant (other, INTVAL (constant)));
5022 if (changed && legitimate_address_p (mode, x, FALSE))
5025 if (GET_CODE (XEXP (x, 0)) == MULT)
5028 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5031 if (GET_CODE (XEXP (x, 1)) == MULT)
5034 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5038 && GET_CODE (XEXP (x, 1)) == REG
5039 && GET_CODE (XEXP (x, 0)) == REG)
5042 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5045 x = legitimize_pic_address (x, 0);
5048 if (changed && legitimate_address_p (mode, x, FALSE))
5051 if (GET_CODE (XEXP (x, 0)) == REG)
5053 register rtx temp = gen_reg_rtx (Pmode);
5054 register rtx val = force_operand (XEXP (x, 1), temp);
5056 emit_move_insn (temp, val);
5062 else if (GET_CODE (XEXP (x, 1)) == REG)
5064 register rtx temp = gen_reg_rtx (Pmode);
5065 register rtx val = force_operand (XEXP (x, 0), temp);
5067 emit_move_insn (temp, val);
5077 /* Print an integer constant expression in assembler syntax. Addition
5078 and subtraction are the only arithmetic that may appear in these
5079 expressions. FILE is the stdio stream to write to, X is the rtx, and
5080 CODE is the operand print code from the output string. */
5083 output_pic_addr_const (file, x, code)
5090 switch (GET_CODE (x))
5100 assemble_name (file, XSTR (x, 0));
5101 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5102 fputs ("@PLT", file);
5109 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5110 assemble_name (asm_out_file, buf);
5114 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5118 /* This used to output parentheses around the expression,
5119 but that does not work on the 386 (either ATT or BSD assembler). */
5120 output_pic_addr_const (file, XEXP (x, 0), code);
5124 if (GET_MODE (x) == VOIDmode)
5126 /* We can use %d if the number is <32 bits and positive. */
5127 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5128 fprintf (file, "0x%lx%08lx",
5129 (unsigned long) CONST_DOUBLE_HIGH (x),
5130 (unsigned long) CONST_DOUBLE_LOW (x));
5132 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5135 /* We can't handle floating point constants;
5136 PRINT_OPERAND must handle them. */
5137 output_operand_lossage ("floating constant misused");
5141 /* Some assemblers need integer constants to appear first. */
5142 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5144 output_pic_addr_const (file, XEXP (x, 0), code);
5146 output_pic_addr_const (file, XEXP (x, 1), code);
5148 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5150 output_pic_addr_const (file, XEXP (x, 1), code);
5152 output_pic_addr_const (file, XEXP (x, 0), code);
5159 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
5160 output_pic_addr_const (file, XEXP (x, 0), code);
5162 output_pic_addr_const (file, XEXP (x, 1), code);
5163 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
5167 if (XVECLEN (x, 0) != 1)
5169 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5170 switch (XINT (x, 1))
5173 fputs ("@GOT", file);
5176 fputs ("@GOTOFF", file);
5179 fputs ("@PLT", file);
5182 fputs ("@GOTPCREL(%RIP)", file);
5185 output_operand_lossage ("invalid UNSPEC as operand");
5191 output_operand_lossage ("invalid expression as operand");
5195 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5196 We need to handle our special PIC relocations. */
5199 i386_dwarf_output_addr_const (file, x)
5204 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : INT_ASM_OP);
5208 fprintf (file, "%s", INT_ASM_OP);
5211 output_pic_addr_const (file, x, '\0');
5213 output_addr_const (file, x);
5217 /* In the name of slightly smaller debug output, and to cater to
5218 general assembler losage, recognize PIC+GOTOFF and turn it back
5219 into a direct symbol reference. */
5222 i386_simplify_dwarf_addr (orig_x)
5229 if (GET_CODE (x) != CONST
5230 || GET_CODE (XEXP (x, 0)) != UNSPEC
5231 || XINT (XEXP (x, 0), 1) != 15)
5233 return XVECEXP (XEXP (x, 0), 0, 0);
5236 if (GET_CODE (x) != PLUS
5237 || GET_CODE (XEXP (x, 0)) != REG
5238 || GET_CODE (XEXP (x, 1)) != CONST)
5241 x = XEXP (XEXP (x, 1), 0);
5242 if (GET_CODE (x) == UNSPEC
5243 && (XINT (x, 1) == 6
5244 || XINT (x, 1) == 7))
5245 return XVECEXP (x, 0, 0);
5247 if (GET_CODE (x) == PLUS
5248 && GET_CODE (XEXP (x, 0)) == UNSPEC
5249 && GET_CODE (XEXP (x, 1)) == CONST_INT
5250 && (XINT (XEXP (x, 0), 1) == 6
5251 || XINT (XEXP (x, 0), 1) == 7))
5252 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5258 put_condition_code (code, mode, reverse, fp, file)
5260 enum machine_mode mode;
5266 if (mode == CCFPmode || mode == CCFPUmode)
5268 enum rtx_code second_code, bypass_code;
5269 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5270 if (bypass_code != NIL || second_code != NIL)
5272 code = ix86_fp_compare_code_to_integer (code);
5276 code = reverse_condition (code);
5287 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5292 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5293 Those same assemblers have the same but opposite losage on cmov. */
5296 suffix = fp ? "nbe" : "a";
5299 if (mode == CCNOmode || mode == CCGOCmode)
5301 else if (mode == CCmode || mode == CCGCmode)
5312 if (mode == CCNOmode || mode == CCGOCmode)
5314 else if (mode == CCmode || mode == CCGCmode)
5323 suffix = fp ? "nb" : "ae";
5326 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5336 suffix = fp ? "u" : "p";
5339 suffix = fp ? "nu" : "np";
5344 fputs (suffix, file);
5348 print_reg (x, code, file)
5353 if (REGNO (x) == ARG_POINTER_REGNUM
5354 || REGNO (x) == FRAME_POINTER_REGNUM
5355 || REGNO (x) == FLAGS_REG
5356 || REGNO (x) == FPSR_REG)
5359 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
5362 if (code == 'w' || MMX_REG_P (x))
5364 else if (code == 'b')
5366 else if (code == 'k')
5368 else if (code == 'q')
5370 else if (code == 'y')
5372 else if (code == 'h')
5375 code = GET_MODE_SIZE (GET_MODE (x));
5377 /* Irritatingly, AMD extended registers use different naming convention
5378 from the normal registers. */
5379 if (REX_INT_REG_P (x))
5386 error ("Extended registers have no high halves\n");
5389 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5392 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5395 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5398 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5401 error ("Unsupported operand size for extended register.\n");
5409 if (STACK_TOP_P (x))
5411 fputs ("st(0)", file);
5418 if (! ANY_FP_REG_P (x))
5419 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5423 fputs (hi_reg_name[REGNO (x)], file);
5426 fputs (qi_reg_name[REGNO (x)], file);
5429 fputs (qi_high_reg_name[REGNO (x)], file);
5437 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5438 C -- print opcode suffix for set/cmov insn.
5439 c -- like C, but print reversed condition
5440 F,f -- likewise, but for floating-point.
5441 R -- print the prefix for register names.
5442 z -- print the opcode suffix for the size of the current operand.
5443 * -- print a star (in certain assembler syntax)
5444 A -- print an absolute memory reference.
5445 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5446 s -- print a shift double count, followed by the assemblers argument
5448 b -- print the QImode name of the register for the indicated operand.
5449 %b0 would print %al if operands[0] is reg 0.
5450 w -- likewise, print the HImode name of the register.
5451 k -- likewise, print the SImode name of the register.
5452 q -- likewise, print the DImode name of the register.
5453 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5454 y -- print "st(0)" instead of "st" as a register.
5455 D -- print condition for SSE cmp instruction.
5456 P -- if PIC, print an @PLT suffix.
5457 X -- don't print any sort of PIC '@' suffix for a symbol.
5461 print_operand (file, x, code)
5471 if (ASSEMBLER_DIALECT == 0)
5476 if (ASSEMBLER_DIALECT == 0)
5478 else if (ASSEMBLER_DIALECT == 1)
5480 /* Intel syntax. For absolute addresses, registers should not
5481 be surrounded by braces. */
5482 if (GET_CODE (x) != REG)
5485 PRINT_OPERAND (file, x, 0);
5491 PRINT_OPERAND (file, x, 0);
5496 if (ASSEMBLER_DIALECT == 0)
5501 if (ASSEMBLER_DIALECT == 0)
5506 if (ASSEMBLER_DIALECT == 0)
5511 if (ASSEMBLER_DIALECT == 0)
5516 if (ASSEMBLER_DIALECT == 0)
5521 if (ASSEMBLER_DIALECT == 0)
5526 /* 387 opcodes don't get size suffixes if the operands are
5529 if (STACK_REG_P (x))
5532 /* this is the size of op from size of operand */
5533 switch (GET_MODE_SIZE (GET_MODE (x)))
5536 #ifdef HAVE_GAS_FILDS_FISTS
5542 if (GET_MODE (x) == SFmode)
5557 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5559 #ifdef GAS_MNEMONICS
5585 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5587 PRINT_OPERAND (file, x, 0);
5593 /* Little bit of braindamage here. The SSE compare instructions
5594 does use completely different names for the comparisons that the
5595 fp conditional moves. */
5596 switch (GET_CODE (x))
5611 fputs ("unord", file);
5615 fputs ("neq", file);
5619 fputs ("nlt", file);
5623 fputs ("nle", file);
5626 fputs ("ord", file);
5634 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5637 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5640 /* Like above, but reverse condition */
5642 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5645 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5651 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5654 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5657 int pred_val = INTVAL (XEXP (x, 0));
5659 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5660 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5662 int taken = pred_val > REG_BR_PROB_BASE / 2;
5663 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5665 /* Emit hints only in the case default branch prediction
5666 heruistics would fail. */
5667 if (taken != cputaken)
5669 /* We use 3e (DS) prefix for taken branches and
5670 2e (CS) prefix for not taken branches. */
5672 fputs ("ds ; ", file);
5674 fputs ("cs ; ", file);
5683 sprintf (str, "invalid operand code `%c'", code);
5684 output_operand_lossage (str);
5689 if (GET_CODE (x) == REG)
5691 PRINT_REG (x, code, file);
5694 else if (GET_CODE (x) == MEM)
5696 /* No `byte ptr' prefix for call instructions. */
5697 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
5700 switch (GET_MODE_SIZE (GET_MODE (x)))
5702 case 1: size = "BYTE"; break;
5703 case 2: size = "WORD"; break;
5704 case 4: size = "DWORD"; break;
5705 case 8: size = "QWORD"; break;
5706 case 12: size = "XWORD"; break;
5707 case 16: size = "XMMWORD"; break;
5712 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5715 else if (code == 'w')
5717 else if (code == 'k')
5721 fputs (" PTR ", file);
5725 if (flag_pic && CONSTANT_ADDRESS_P (x))
5726 output_pic_addr_const (file, x, code);
5727 /* Avoid (%rip) for call operands. */
5728 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5729 && GET_CODE (x) != CONST_INT)
5730 output_addr_const (file, x);
5735 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5740 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5741 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5743 if (ASSEMBLER_DIALECT == 0)
5745 fprintf (file, "0x%lx", l);
5748 /* These float cases don't actually occur as immediate operands. */
5749 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5754 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5755 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5756 fprintf (file, "%s", dstr);
5759 else if (GET_CODE (x) == CONST_DOUBLE
5760 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5765 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5766 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5767 fprintf (file, "%s", dstr);
5773 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5775 if (ASSEMBLER_DIALECT == 0)
5778 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5779 || GET_CODE (x) == LABEL_REF)
5781 if (ASSEMBLER_DIALECT == 0)
5784 fputs ("OFFSET FLAT:", file);
5787 if (GET_CODE (x) == CONST_INT)
5788 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5790 output_pic_addr_const (file, x, code);
5792 output_addr_const (file, x);
5796 /* Print a memory operand whose address is ADDR. */
5799 print_operand_address (file, addr)
5803 struct ix86_address parts;
5804 rtx base, index, disp;
5807 if (! ix86_decompose_address (addr, &parts))
5811 index = parts.index;
5813 scale = parts.scale;
5815 if (!base && !index)
5817 /* Displacement only requires special attention. */
5819 if (GET_CODE (disp) == CONST_INT)
5821 if (ASSEMBLER_DIALECT != 0)
5823 if (USER_LABEL_PREFIX[0] == 0)
5825 fputs ("ds:", file);
5827 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
5830 output_pic_addr_const (file, addr, 0);
5832 output_addr_const (file, addr);
5834 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5835 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5836 fputs ("(%rip)", file);
5840 if (ASSEMBLER_DIALECT == 0)
5845 output_pic_addr_const (file, disp, 0);
5846 else if (GET_CODE (disp) == LABEL_REF)
5847 output_asm_label (disp);
5849 output_addr_const (file, disp);
5854 PRINT_REG (base, 0, file);
5858 PRINT_REG (index, 0, file);
5860 fprintf (file, ",%d", scale);
5866 rtx offset = NULL_RTX;
5870 /* Pull out the offset of a symbol; print any symbol itself. */
5871 if (GET_CODE (disp) == CONST
5872 && GET_CODE (XEXP (disp, 0)) == PLUS
5873 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
5875 offset = XEXP (XEXP (disp, 0), 1);
5876 disp = gen_rtx_CONST (VOIDmode,
5877 XEXP (XEXP (disp, 0), 0));
5881 output_pic_addr_const (file, disp, 0);
5882 else if (GET_CODE (disp) == LABEL_REF)
5883 output_asm_label (disp);
5884 else if (GET_CODE (disp) == CONST_INT)
5887 output_addr_const (file, disp);
5893 PRINT_REG (base, 0, file);
5896 if (INTVAL (offset) >= 0)
5898 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5902 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5909 PRINT_REG (index, 0, file);
5911 fprintf (file, "*%d", scale);
5918 /* Split one or more DImode RTL references into pairs of SImode
5919 references. The RTL can be REG, offsettable MEM, integer constant, or
5920 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5921 split and "num" is its length. lo_half and hi_half are output arrays
5922 that parallel "operands". */
5925 split_di (operands, num, lo_half, hi_half)
5928 rtx lo_half[], hi_half[];
5932 rtx op = operands[num];
5934 /* simplify_subreg refuse to split volatile memory addresses,
5935 but we still have to handle it. */
5936 if (GET_CODE (op) == MEM)
5938 lo_half[num] = adjust_address (op, SImode, 0);
5939 hi_half[num] = adjust_address (op, SImode, 4);
5943 lo_half[num] = simplify_gen_subreg (SImode, op,
5944 GET_MODE (op) == VOIDmode
5945 ? DImode : GET_MODE (op), 0);
5946 hi_half[num] = simplify_gen_subreg (SImode, op,
5947 GET_MODE (op) == VOIDmode
5948 ? DImode : GET_MODE (op), 4);
5952 /* Split one or more TImode RTL references into pairs of SImode
5953 references. The RTL can be REG, offsettable MEM, integer constant, or
5954 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5955 split and "num" is its length. lo_half and hi_half are output arrays
5956 that parallel "operands". */
5959 split_ti (operands, num, lo_half, hi_half)
5962 rtx lo_half[], hi_half[];
5966 rtx op = operands[num];
5968 /* simplify_subreg refuse to split volatile memory addresses, but we
5969 still have to handle it. */
5970 if (GET_CODE (op) == MEM)
5972 lo_half[num] = adjust_address (op, DImode, 0);
5973 hi_half[num] = adjust_address (op, DImode, 8);
5977 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
5978 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
5983 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
5984 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
5985 is the expression of the binary operation. The output may either be
5986 emitted here, or returned to the caller, like all output_* functions.
5988 There is no guarantee that the operands are the same mode, as they
5989 might be within FLOAT or FLOAT_EXTEND expressions. */
5991 #ifndef SYSV386_COMPAT
5992 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
5993 wants to fix the assemblers because that causes incompatibility
5994 with gcc. No-one wants to fix gcc because that causes
5995 incompatibility with assemblers... You can use the option of
5996 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
5997 #define SYSV386_COMPAT 1
6001 output_387_binary_op (insn, operands)
6005 static char buf[30];
6008 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6010 #ifdef ENABLE_CHECKING
6011 /* Even if we do not want to check the inputs, this documents input
6012 constraints. Which helps in understanding the following code. */
6013 if (STACK_REG_P (operands[0])
6014 && ((REG_P (operands[1])
6015 && REGNO (operands[0]) == REGNO (operands[1])
6016 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6017 || (REG_P (operands[2])
6018 && REGNO (operands[0]) == REGNO (operands[2])
6019 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6020 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6026 switch (GET_CODE (operands[3]))
6029 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6030 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6038 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6039 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6047 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6048 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6056 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6057 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6071 if (GET_MODE (operands[0]) == SFmode)
6072 strcat (buf, "ss\t{%2, %0|%0, %2}");
6074 strcat (buf, "sd\t{%2, %0|%0, %2}");
6079 switch (GET_CODE (operands[3]))
6083 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6085 rtx temp = operands[2];
6086 operands[2] = operands[1];
6090 /* know operands[0] == operands[1]. */
6092 if (GET_CODE (operands[2]) == MEM)
6098 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6100 if (STACK_TOP_P (operands[0]))
6101 /* How is it that we are storing to a dead operand[2]?
6102 Well, presumably operands[1] is dead too. We can't
6103 store the result to st(0) as st(0) gets popped on this
6104 instruction. Instead store to operands[2] (which I
6105 think has to be st(1)). st(1) will be popped later.
6106 gcc <= 2.8.1 didn't have this check and generated
6107 assembly code that the Unixware assembler rejected. */
6108 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6110 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6114 if (STACK_TOP_P (operands[0]))
6115 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6117 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6122 if (GET_CODE (operands[1]) == MEM)
6128 if (GET_CODE (operands[2]) == MEM)
6134 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6137 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6138 derived assemblers, confusingly reverse the direction of
6139 the operation for fsub{r} and fdiv{r} when the
6140 destination register is not st(0). The Intel assembler
6141 doesn't have this brain damage. Read !SYSV386_COMPAT to
6142 figure out what the hardware really does. */
6143 if (STACK_TOP_P (operands[0]))
6144 p = "{p\t%0, %2|rp\t%2, %0}";
6146 p = "{rp\t%2, %0|p\t%0, %2}";
6148 if (STACK_TOP_P (operands[0]))
6149 /* As above for fmul/fadd, we can't store to st(0). */
6150 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6152 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6157 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6160 if (STACK_TOP_P (operands[0]))
6161 p = "{rp\t%0, %1|p\t%1, %0}";
6163 p = "{p\t%1, %0|rp\t%0, %1}";
6165 if (STACK_TOP_P (operands[0]))
6166 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6168 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6173 if (STACK_TOP_P (operands[0]))
6175 if (STACK_TOP_P (operands[1]))
6176 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6178 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6181 else if (STACK_TOP_P (operands[1]))
6184 p = "{\t%1, %0|r\t%0, %1}";
6186 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6192 p = "{r\t%2, %0|\t%0, %2}";
6194 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6207 /* Output code to initialize control word copies used by
6208 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6209 is set to control word rounding downwards. */
6211 emit_i387_cw_initialization (normal, round_down)
6212 rtx normal, round_down;
6214 rtx reg = gen_reg_rtx (HImode);
6216 emit_insn (gen_x86_fnstcw_1 (normal));
6217 emit_move_insn (reg, normal);
6218 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6220 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6222 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6223 emit_move_insn (round_down, reg);
6226 /* Output code for INSN to convert a float to a signed int. OPERANDS
6227 are the insn operands. The output may be [HSD]Imode and the input
6228 operand may be [SDX]Fmode. */
6231 output_fix_trunc (insn, operands)
6235 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6236 int dimode_p = GET_MODE (operands[0]) == DImode;
6238 /* Jump through a hoop or two for DImode, since the hardware has no
6239 non-popping instruction. We used to do this a different way, but
6240 that was somewhat fragile and broke with post-reload splitters. */
6241 if (dimode_p && !stack_top_dies)
6242 output_asm_insn ("fld\t%y1", operands);
6244 if (!STACK_TOP_P (operands[1]))
6247 if (GET_CODE (operands[0]) != MEM)
6250 output_asm_insn ("fldcw\t%3", operands);
6251 if (stack_top_dies || dimode_p)
6252 output_asm_insn ("fistp%z0\t%0", operands);
6254 output_asm_insn ("fist%z0\t%0", operands);
6255 output_asm_insn ("fldcw\t%2", operands);
6260 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6261 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6262 when fucom should be used. */
6265 output_fp_compare (insn, operands, eflags_p, unordered_p)
6268 int eflags_p, unordered_p;
6271 rtx cmp_op0 = operands[0];
6272 rtx cmp_op1 = operands[1];
6273 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6278 cmp_op1 = operands[2];
6282 if (GET_MODE (operands[0]) == SFmode)
6284 return "ucomiss\t{%1, %0|%0, %1}";
6286 return "comiss\t{%1, %0|%0, %y}";
6289 return "ucomisd\t{%1, %0|%0, %1}";
6291 return "comisd\t{%1, %0|%0, %y}";
6294 if (! STACK_TOP_P (cmp_op0))
6297 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6299 if (STACK_REG_P (cmp_op1)
6301 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6302 && REGNO (cmp_op1) != FIRST_STACK_REG)
6304 /* If both the top of the 387 stack dies, and the other operand
6305 is also a stack register that dies, then this must be a
6306 `fcompp' float compare */
6310 /* There is no double popping fcomi variant. Fortunately,
6311 eflags is immune from the fstp's cc clobbering. */
6313 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6315 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6323 return "fucompp\n\tfnstsw\t%0";
6325 return "fcompp\n\tfnstsw\t%0";
6338 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6340 static const char * const alt[24] =
6352 "fcomi\t{%y1, %0|%0, %y1}",
6353 "fcomip\t{%y1, %0|%0, %y1}",
6354 "fucomi\t{%y1, %0|%0, %y1}",
6355 "fucomip\t{%y1, %0|%0, %y1}",
6362 "fcom%z2\t%y2\n\tfnstsw\t%0",
6363 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6364 "fucom%z2\t%y2\n\tfnstsw\t%0",
6365 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6367 "ficom%z2\t%y2\n\tfnstsw\t%0",
6368 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6376 mask = eflags_p << 3;
6377 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6378 mask |= unordered_p << 1;
6379 mask |= stack_top_dies;
6391 /* Output assembler code to FILE to initialize basic-block profiling.
6393 If profile_block_flag == 2
6395 Output code to call the subroutine `__bb_init_trace_func'
6396 and pass two parameters to it. The first parameter is
6397 the address of a block allocated in the object module.
6398 The second parameter is the number of the first basic block
6401 The name of the block is a local symbol made with this statement:
6403 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6405 Of course, since you are writing the definition of
6406 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6407 can take a short cut in the definition of this macro and use the
6408 name that you know will result.
6410 The number of the first basic block of the function is
6411 passed to the macro in BLOCK_OR_LABEL.
6413 If described in a virtual assembler language the code to be
6417 parameter2 <- BLOCK_OR_LABEL
6418 call __bb_init_trace_func
6420 else if profile_block_flag != 0
6422 Output code to call the subroutine `__bb_init_func'
6423 and pass one single parameter to it, which is the same
6424 as the first parameter to `__bb_init_trace_func'.
6426 The first word of this parameter is a flag which will be nonzero if
6427 the object module has already been initialized. So test this word
6428 first, and do not call `__bb_init_func' if the flag is nonzero.
6429 Note: When profile_block_flag == 2 the test need not be done
6430 but `__bb_init_trace_func' *must* be called.
6432 BLOCK_OR_LABEL may be used to generate a label number as a
6433 branch destination in case `__bb_init_func' will not be called.
6435 If described in a virtual assembler language the code to be
6446 ix86_output_function_block_profiler (file, block_or_label)
6450 static int num_func = 0;
6452 char block_table[80], false_label[80];
6454 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6456 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6457 xops[5] = stack_pointer_rtx;
6458 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6460 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6462 switch (profile_block_flag)
6465 xops[2] = GEN_INT (block_or_label);
6466 xops[3] = gen_rtx_MEM (Pmode,
6467 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
6468 xops[6] = GEN_INT (8);
6470 output_asm_insn ("push{l}\t%2", xops);
6472 output_asm_insn ("push{l}\t%1", xops);
6475 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6476 output_asm_insn ("push{l}\t%7", xops);
6478 output_asm_insn ("call\t%P3", xops);
6479 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6483 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
6485 xops[0] = const0_rtx;
6486 xops[2] = gen_rtx_MEM (Pmode,
6487 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
6488 xops[3] = gen_rtx_MEM (Pmode,
6489 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
6490 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
6491 xops[6] = GEN_INT (4);
6493 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
6495 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
6496 output_asm_insn ("jne\t%2", xops);
6499 output_asm_insn ("push{l}\t%1", xops);
6502 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
6503 output_asm_insn ("push{l}\t%7", xops);
6505 output_asm_insn ("call\t%P3", xops);
6506 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6507 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
6513 /* Output assembler code to FILE to increment a counter associated
6514 with basic block number BLOCKNO.
6516 If profile_block_flag == 2
6518 Output code to initialize the global structure `__bb' and
6519 call the function `__bb_trace_func' which will increment the
6522 `__bb' consists of two words. In the first word the number
6523 of the basic block has to be stored. In the second word
6524 the address of a block allocated in the object module
6527 The basic block number is given by BLOCKNO.
6529 The address of the block is given by the label created with
6531 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6533 by FUNCTION_BLOCK_PROFILER.
6535 Of course, since you are writing the definition of
6536 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6537 can take a short cut in the definition of this macro and use the
6538 name that you know will result.
6540 If described in a virtual assembler language the code to be
6543 move BLOCKNO -> (__bb)
6544 move LPBX0 -> (__bb+4)
6545 call __bb_trace_func
6547 Note that function `__bb_trace_func' must not change the
6548 machine state, especially the flag register. To grant
6549 this, you must output code to save and restore registers
6550 either in this macro or in the macros MACHINE_STATE_SAVE
6551 and MACHINE_STATE_RESTORE. The last two macros will be
6552 used in the function `__bb_trace_func', so you must make
6553 sure that the function prologue does not change any
6554 register prior to saving it with MACHINE_STATE_SAVE.
6556 else if profile_block_flag != 0
6558 Output code to increment the counter directly.
6559 Basic blocks are numbered separately from zero within each
6560 compiled object module. The count associated with block number
6561 BLOCKNO is at index BLOCKNO in an array of words; the name of
6562 this array is a local symbol made with this statement:
6564 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
6566 Of course, since you are writing the definition of
6567 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6568 can take a short cut in the definition of this macro and use the
6569 name that you know will result.
6571 If described in a virtual assembler language the code to be
6574 inc (LPBX2+4*BLOCKNO)
6578 ix86_output_block_profiler (file, blockno)
6579 FILE *file ATTRIBUTE_UNUSED;
6582 rtx xops[8], cnt_rtx;
6584 char *block_table = counts;
6586 switch (profile_block_flag)
6589 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6591 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6592 xops[2] = GEN_INT (blockno);
6593 xops[3] = gen_rtx_MEM (Pmode,
6594 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
6595 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
6596 xops[5] = plus_constant (xops[4], 4);
6597 xops[0] = gen_rtx_MEM (SImode, xops[4]);
6598 xops[6] = gen_rtx_MEM (SImode, xops[5]);
6600 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6602 output_asm_insn ("pushf", xops);
6603 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6606 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6607 output_asm_insn ("push{l}\t%7", xops);
6608 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6609 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
6610 output_asm_insn ("pop{l}\t%7", xops);
6613 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
6614 output_asm_insn ("call\t%P3", xops);
6615 output_asm_insn ("popf", xops);
6620 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
6621 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
6622 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
6625 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
6628 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
6630 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
6631 output_asm_insn ("inc{l}\t%0", xops);
6638 ix86_output_addr_vec_elt (file, value)
6642 const char *directive = ASM_LONG;
6647 directive = ASM_QUAD;
6653 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6657 ix86_output_addr_diff_elt (file, value, rel)
6662 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6663 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6664 else if (HAVE_AS_GOTOFF_IN_DATA)
6665 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6667 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6668 ASM_LONG, LPREFIX, value);
6671 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6675 ix86_expand_clear (dest)
6680 /* We play register width games, which are only valid after reload. */
6681 if (!reload_completed)
6684 /* Avoid HImode and its attendant prefix byte. */
6685 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6686 dest = gen_rtx_REG (SImode, REGNO (dest));
6688 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6690 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6691 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6693 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6694 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6701 ix86_expand_move (mode, operands)
6702 enum machine_mode mode;
6705 int strict = (reload_in_progress || reload_completed);
6708 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6710 /* Emit insns to move operands[1] into operands[0]. */
6712 if (GET_CODE (operands[0]) == MEM)
6713 operands[1] = force_reg (Pmode, operands[1]);
6716 rtx temp = operands[0];
6717 if (GET_CODE (temp) != REG)
6718 temp = gen_reg_rtx (Pmode);
6719 temp = legitimize_pic_address (operands[1], temp);
6720 if (temp == operands[0])
6727 if (GET_CODE (operands[0]) == MEM
6728 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6729 || !push_operand (operands[0], mode))
6730 && GET_CODE (operands[1]) == MEM)
6731 operands[1] = force_reg (mode, operands[1]);
6733 if (push_operand (operands[0], mode)
6734 && ! general_no_elim_operand (operands[1], mode))
6735 operands[1] = copy_to_mode_reg (mode, operands[1]);
6737 /* Force large constants in 64bit compilation into register
6738 to get them CSEed. */
6739 if (TARGET_64BIT && mode == DImode
6740 && immediate_operand (operands[1], mode)
6741 && !x86_64_zero_extended_value (operands[1])
6742 && !register_operand (operands[0], mode)
6743 && optimize && !reload_completed && !reload_in_progress)
6744 operands[1] = copy_to_mode_reg (mode, operands[1]);
6746 if (FLOAT_MODE_P (mode))
6748 /* If we are loading a floating point constant to a register,
6749 force the value to memory now, since we'll get better code
6750 out the back end. */
6754 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6755 && register_operand (operands[0], mode))
6756 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6760 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6765 /* Attempt to expand a binary operator. Make the expansion closer to the
6766 actual machine, then just general_operand, which will allow 3 separate
6767 memory references (one output, two input) in a single insn. */
6770 ix86_expand_binary_operator (code, mode, operands)
6772 enum machine_mode mode;
6775 int matching_memory;
6776 rtx src1, src2, dst, op, clob;
6782 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6783 if (GET_RTX_CLASS (code) == 'c'
6784 && (rtx_equal_p (dst, src2)
6785 || immediate_operand (src1, mode)))
6792 /* If the destination is memory, and we do not have matching source
6793 operands, do things in registers. */
6794 matching_memory = 0;
6795 if (GET_CODE (dst) == MEM)
6797 if (rtx_equal_p (dst, src1))
6798 matching_memory = 1;
6799 else if (GET_RTX_CLASS (code) == 'c'
6800 && rtx_equal_p (dst, src2))
6801 matching_memory = 2;
6803 dst = gen_reg_rtx (mode);
6806 /* Both source operands cannot be in memory. */
6807 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6809 if (matching_memory != 2)
6810 src2 = force_reg (mode, src2);
6812 src1 = force_reg (mode, src1);
6815 /* If the operation is not commutable, source 1 cannot be a constant
6816 or non-matching memory. */
6817 if ((CONSTANT_P (src1)
6818 || (!matching_memory && GET_CODE (src1) == MEM))
6819 && GET_RTX_CLASS (code) != 'c')
6820 src1 = force_reg (mode, src1);
6822 /* If optimizing, copy to regs to improve CSE */
6823 if (optimize && ! no_new_pseudos)
6825 if (GET_CODE (dst) == MEM)
6826 dst = gen_reg_rtx (mode);
6827 if (GET_CODE (src1) == MEM)
6828 src1 = force_reg (mode, src1);
6829 if (GET_CODE (src2) == MEM)
6830 src2 = force_reg (mode, src2);
6833 /* Emit the instruction. */
6835 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6836 if (reload_in_progress)
6838 /* Reload doesn't know about the flags register, and doesn't know that
6839 it doesn't want to clobber it. We can only do this with PLUS. */
6846 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6847 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6850 /* Fix up the destination if needed. */
6851 if (dst != operands[0])
6852 emit_move_insn (operands[0], dst);
6855 /* Return TRUE or FALSE depending on whether the binary operator meets the
6856 appropriate constraints. */
6859 ix86_binary_operator_ok (code, mode, operands)
6861 enum machine_mode mode ATTRIBUTE_UNUSED;
6864 /* Both source operands cannot be in memory. */
6865 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6867 /* If the operation is not commutable, source 1 cannot be a constant. */
6868 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6870 /* If the destination is memory, we must have a matching source operand. */
6871 if (GET_CODE (operands[0]) == MEM
6872 && ! (rtx_equal_p (operands[0], operands[1])
6873 || (GET_RTX_CLASS (code) == 'c'
6874 && rtx_equal_p (operands[0], operands[2]))))
6876 /* If the operation is not commutable and the source 1 is memory, we must
6877 have a matching destionation. */
6878 if (GET_CODE (operands[1]) == MEM
6879 && GET_RTX_CLASS (code) != 'c'
6880 && ! rtx_equal_p (operands[0], operands[1]))
6885 /* Attempt to expand a unary operator. Make the expansion closer to the
6886 actual machine, then just general_operand, which will allow 2 separate
6887 memory references (one output, one input) in a single insn. */
6890 ix86_expand_unary_operator (code, mode, operands)
6892 enum machine_mode mode;
6895 int matching_memory;
6896 rtx src, dst, op, clob;
6901 /* If the destination is memory, and we do not have matching source
6902 operands, do things in registers. */
6903 matching_memory = 0;
6904 if (GET_CODE (dst) == MEM)
6906 if (rtx_equal_p (dst, src))
6907 matching_memory = 1;
6909 dst = gen_reg_rtx (mode);
6912 /* When source operand is memory, destination must match. */
6913 if (!matching_memory && GET_CODE (src) == MEM)
6914 src = force_reg (mode, src);
6916 /* If optimizing, copy to regs to improve CSE */
6917 if (optimize && ! no_new_pseudos)
6919 if (GET_CODE (dst) == MEM)
6920 dst = gen_reg_rtx (mode);
6921 if (GET_CODE (src) == MEM)
6922 src = force_reg (mode, src);
6925 /* Emit the instruction. */
6927 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6928 if (reload_in_progress || code == NOT)
6930 /* Reload doesn't know about the flags register, and doesn't know that
6931 it doesn't want to clobber it. */
6938 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6939 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6942 /* Fix up the destination if needed. */
6943 if (dst != operands[0])
6944 emit_move_insn (operands[0], dst);
6947 /* Return TRUE or FALSE depending on whether the unary operator meets the
6948 appropriate constraints. */
6951 ix86_unary_operator_ok (code, mode, operands)
6952 enum rtx_code code ATTRIBUTE_UNUSED;
6953 enum machine_mode mode ATTRIBUTE_UNUSED;
6954 rtx operands[2] ATTRIBUTE_UNUSED;
6956 /* If one of operands is memory, source and destination must match. */
6957 if ((GET_CODE (operands[0]) == MEM
6958 || GET_CODE (operands[1]) == MEM)
6959 && ! rtx_equal_p (operands[0], operands[1]))
6964 /* Return TRUE or FALSE depending on whether the first SET in INSN
6965 has source and destination with matching CC modes, and that the
6966 CC mode is at least as constrained as REQ_MODE. */
6969 ix86_match_ccmode (insn, req_mode)
6971 enum machine_mode req_mode;
6974 enum machine_mode set_mode;
6976 set = PATTERN (insn);
6977 if (GET_CODE (set) == PARALLEL)
6978 set = XVECEXP (set, 0, 0);
6979 if (GET_CODE (set) != SET)
6981 if (GET_CODE (SET_SRC (set)) != COMPARE)
6984 set_mode = GET_MODE (SET_DEST (set));
6988 if (req_mode != CCNOmode
6989 && (req_mode != CCmode
6990 || XEXP (SET_SRC (set), 1) != const0_rtx))
6994 if (req_mode == CCGCmode)
6998 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7002 if (req_mode == CCZmode)
7012 return (GET_MODE (SET_SRC (set)) == set_mode);
7015 /* Generate insn patterns to do an integer compare of OPERANDS. */
7018 ix86_expand_int_compare (code, op0, op1)
7022 enum machine_mode cmpmode;
7025 cmpmode = SELECT_CC_MODE (code, op0, op1);
7026 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7028 /* This is very simple, but making the interface the same as in the
7029 FP case makes the rest of the code easier. */
7030 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7031 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7033 /* Return the test that should be put into the flags user, i.e.
7034 the bcc, scc, or cmov instruction. */
7035 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7038 /* Figure out whether to use ordered or unordered fp comparisons.
7039 Return the appropriate mode to use. */
7042 ix86_fp_compare_mode (code)
7043 enum rtx_code code ATTRIBUTE_UNUSED;
7045 /* ??? In order to make all comparisons reversible, we do all comparisons
7046 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7047 all forms trapping and nontrapping comparisons, we can make inequality
7048 comparisons trapping again, since it results in better code when using
7049 FCOM based compares. */
7050 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7054 ix86_cc_mode (code, op0, op1)
7058 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7059 return ix86_fp_compare_mode (code);
7062 /* Only zero flag is needed. */
7064 case NE: /* ZF!=0 */
7066 /* Codes needing carry flag. */
7067 case GEU: /* CF=0 */
7068 case GTU: /* CF=0 & ZF=0 */
7069 case LTU: /* CF=1 */
7070 case LEU: /* CF=1 | ZF=1 */
7072 /* Codes possibly doable only with sign flag when
7073 comparing against zero. */
7074 case GE: /* SF=OF or SF=0 */
7075 case LT: /* SF<>OF or SF=1 */
7076 if (op1 == const0_rtx)
7079 /* For other cases Carry flag is not required. */
7081 /* Codes doable only with sign flag when comparing
7082 against zero, but we miss jump instruction for it
7083 so we need to use relational tests agains overflow
7084 that thus needs to be zero. */
7085 case GT: /* ZF=0 & SF=OF */
7086 case LE: /* ZF=1 | SF<>OF */
7087 if (op1 == const0_rtx)
7096 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7099 ix86_use_fcomi_compare (code)
7100 enum rtx_code code ATTRIBUTE_UNUSED;
7102 enum rtx_code swapped_code = swap_condition (code);
7103 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7104 || (ix86_fp_comparison_cost (swapped_code)
7105 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7108 /* Swap, force into registers, or otherwise massage the two operands
7109 to a fp comparison. The operands are updated in place; the new
7110 comparsion code is returned. */
7112 static enum rtx_code
7113 ix86_prepare_fp_compare_args (code, pop0, pop1)
7117 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7118 rtx op0 = *pop0, op1 = *pop1;
7119 enum machine_mode op_mode = GET_MODE (op0);
7120 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7122 /* All of the unordered compare instructions only work on registers.
7123 The same is true of the XFmode compare instructions. The same is
7124 true of the fcomi compare instructions. */
7127 && (fpcmp_mode == CCFPUmode
7128 || op_mode == XFmode
7129 || op_mode == TFmode
7130 || ix86_use_fcomi_compare (code)))
7132 op0 = force_reg (op_mode, op0);
7133 op1 = force_reg (op_mode, op1);
7137 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7138 things around if they appear profitable, otherwise force op0
7141 if (standard_80387_constant_p (op0) == 0
7142 || (GET_CODE (op0) == MEM
7143 && ! (standard_80387_constant_p (op1) == 0
7144 || GET_CODE (op1) == MEM)))
7147 tmp = op0, op0 = op1, op1 = tmp;
7148 code = swap_condition (code);
7151 if (GET_CODE (op0) != REG)
7152 op0 = force_reg (op_mode, op0);
7154 if (CONSTANT_P (op1))
7156 if (standard_80387_constant_p (op1))
7157 op1 = force_reg (op_mode, op1);
7159 op1 = validize_mem (force_const_mem (op_mode, op1));
7163 /* Try to rearrange the comparison to make it cheaper. */
7164 if (ix86_fp_comparison_cost (code)
7165 > ix86_fp_comparison_cost (swap_condition (code))
7166 && (GET_CODE (op0) == REG || !reload_completed))
7169 tmp = op0, op0 = op1, op1 = tmp;
7170 code = swap_condition (code);
7171 if (GET_CODE (op0) != REG)
7172 op0 = force_reg (op_mode, op0);
7180 /* Convert comparison codes we use to represent FP comparison to integer
7181 code that will result in proper branch. Return UNKNOWN if no such code
7183 static enum rtx_code
7184 ix86_fp_compare_code_to_integer (code)
7214 /* Split comparison code CODE into comparisons we can do using branch
7215 instructions. BYPASS_CODE is comparison code for branch that will
7216 branch around FIRST_CODE and SECOND_CODE. If some of branches
7217 is not required, set value to NIL.
7218 We never require more than two branches. */
7220 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7221 enum rtx_code code, *bypass_code, *first_code, *second_code;
7227 /* The fcomi comparison sets flags as follows:
7237 case GT: /* GTU - CF=0 & ZF=0 */
7238 case GE: /* GEU - CF=0 */
7239 case ORDERED: /* PF=0 */
7240 case UNORDERED: /* PF=1 */
7241 case UNEQ: /* EQ - ZF=1 */
7242 case UNLT: /* LTU - CF=1 */
7243 case UNLE: /* LEU - CF=1 | ZF=1 */
7244 case LTGT: /* EQ - ZF=0 */
7246 case LT: /* LTU - CF=1 - fails on unordered */
7248 *bypass_code = UNORDERED;
7250 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7252 *bypass_code = UNORDERED;
7254 case EQ: /* EQ - ZF=1 - fails on unordered */
7256 *bypass_code = UNORDERED;
7258 case NE: /* NE - ZF=0 - fails on unordered */
7260 *second_code = UNORDERED;
7262 case UNGE: /* GEU - CF=0 - fails on unordered */
7264 *second_code = UNORDERED;
7266 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7268 *second_code = UNORDERED;
7273 if (!TARGET_IEEE_FP)
7280 /* Return cost of comparison done fcom + arithmetics operations on AX.
7281 All following functions do use number of instructions as an cost metrics.
7282 In future this should be tweaked to compute bytes for optimize_size and
7283 take into account performance of various instructions on various CPUs. */
7285 ix86_fp_comparison_arithmetics_cost (code)
7288 if (!TARGET_IEEE_FP)
7290 /* The cost of code output by ix86_expand_fp_compare. */
7318 /* Return cost of comparison done using fcomi operation.
7319 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7321 ix86_fp_comparison_fcomi_cost (code)
7324 enum rtx_code bypass_code, first_code, second_code;
7325 /* Return arbitarily high cost when instruction is not supported - this
7326 prevents gcc from using it. */
7329 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7330 return (bypass_code != NIL || second_code != NIL) + 2;
7333 /* Return cost of comparison done using sahf operation.
7334 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7336 ix86_fp_comparison_sahf_cost (code)
7339 enum rtx_code bypass_code, first_code, second_code;
7340 /* Return arbitarily high cost when instruction is not preferred - this
7341 avoids gcc from using it. */
7342 if (!TARGET_USE_SAHF && !optimize_size)
7344 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7345 return (bypass_code != NIL || second_code != NIL) + 3;
7348 /* Compute cost of the comparison done using any method.
7349 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7351 ix86_fp_comparison_cost (code)
7354 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7357 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7358 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7360 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7361 if (min > sahf_cost)
7363 if (min > fcomi_cost)
7368 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7371 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7373 rtx op0, op1, scratch;
7377 enum machine_mode fpcmp_mode, intcmp_mode;
7379 int cost = ix86_fp_comparison_cost (code);
7380 enum rtx_code bypass_code, first_code, second_code;
7382 fpcmp_mode = ix86_fp_compare_mode (code);
7383 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7386 *second_test = NULL_RTX;
7388 *bypass_test = NULL_RTX;
7390 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7392 /* Do fcomi/sahf based test when profitable. */
7393 if ((bypass_code == NIL || bypass_test)
7394 && (second_code == NIL || second_test)
7395 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7399 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7400 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7406 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7407 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7409 scratch = gen_reg_rtx (HImode);
7410 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7411 emit_insn (gen_x86_sahf_1 (scratch));
7414 /* The FP codes work out to act like unsigned. */
7415 intcmp_mode = fpcmp_mode;
7417 if (bypass_code != NIL)
7418 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7419 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7421 if (second_code != NIL)
7422 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7423 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7428 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7429 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7430 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7432 scratch = gen_reg_rtx (HImode);
7433 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7435 /* In the unordered case, we have to check C2 for NaN's, which
7436 doesn't happen to work out to anything nice combination-wise.
7437 So do some bit twiddling on the value we've got in AH to come
7438 up with an appropriate set of condition codes. */
7440 intcmp_mode = CCNOmode;
7445 if (code == GT || !TARGET_IEEE_FP)
7447 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7452 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7453 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7454 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7455 intcmp_mode = CCmode;
7461 if (code == LT && TARGET_IEEE_FP)
7463 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7464 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7465 intcmp_mode = CCmode;
7470 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7476 if (code == GE || !TARGET_IEEE_FP)
7478 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7483 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7484 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7491 if (code == LE && TARGET_IEEE_FP)
7493 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7494 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7495 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7496 intcmp_mode = CCmode;
7501 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7507 if (code == EQ && TARGET_IEEE_FP)
7509 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7510 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7511 intcmp_mode = CCmode;
7516 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7523 if (code == NE && TARGET_IEEE_FP)
7525 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7526 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7532 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7542 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7551 /* Return the test that should be put into the flags user, i.e.
7552 the bcc, scc, or cmov instruction. */
7553 return gen_rtx_fmt_ee (code, VOIDmode,
7554 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7559 ix86_expand_compare (code, second_test, bypass_test)
7561 rtx *second_test, *bypass_test;
7564 op0 = ix86_compare_op0;
7565 op1 = ix86_compare_op1;
7568 *second_test = NULL_RTX;
7570 *bypass_test = NULL_RTX;
7572 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7573 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7574 second_test, bypass_test);
7576 ret = ix86_expand_int_compare (code, op0, op1);
7581 /* Return true if the CODE will result in nontrivial jump sequence. */
7583 ix86_fp_jump_nontrivial_p (code)
7586 enum rtx_code bypass_code, first_code, second_code;
7589 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7590 return bypass_code != NIL || second_code != NIL;
7594 ix86_expand_branch (code, label)
7600 switch (GET_MODE (ix86_compare_op0))
7606 tmp = ix86_expand_compare (code, NULL, NULL);
7607 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7608 gen_rtx_LABEL_REF (VOIDmode, label),
7610 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7620 enum rtx_code bypass_code, first_code, second_code;
7622 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7625 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7627 /* Check whether we will use the natural sequence with one jump. If
7628 so, we can expand jump early. Otherwise delay expansion by
7629 creating compound insn to not confuse optimizers. */
7630 if (bypass_code == NIL && second_code == NIL
7633 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7634 gen_rtx_LABEL_REF (VOIDmode, label),
7639 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7640 ix86_compare_op0, ix86_compare_op1);
7641 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7642 gen_rtx_LABEL_REF (VOIDmode, label),
7644 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7646 use_fcomi = ix86_use_fcomi_compare (code);
7647 vec = rtvec_alloc (3 + !use_fcomi);
7648 RTVEC_ELT (vec, 0) = tmp;
7650 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7652 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7655 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7657 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7665 /* Expand DImode branch into multiple compare+branch. */
7667 rtx lo[2], hi[2], label2;
7668 enum rtx_code code1, code2, code3;
7670 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7672 tmp = ix86_compare_op0;
7673 ix86_compare_op0 = ix86_compare_op1;
7674 ix86_compare_op1 = tmp;
7675 code = swap_condition (code);
7677 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7678 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7680 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7681 avoid two branches. This costs one extra insn, so disable when
7682 optimizing for size. */
7684 if ((code == EQ || code == NE)
7686 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7691 if (hi[1] != const0_rtx)
7692 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7693 NULL_RTX, 0, OPTAB_WIDEN);
7696 if (lo[1] != const0_rtx)
7697 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7698 NULL_RTX, 0, OPTAB_WIDEN);
7700 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7701 NULL_RTX, 0, OPTAB_WIDEN);
7703 ix86_compare_op0 = tmp;
7704 ix86_compare_op1 = const0_rtx;
7705 ix86_expand_branch (code, label);
7709 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7710 op1 is a constant and the low word is zero, then we can just
7711 examine the high word. */
7713 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7716 case LT: case LTU: case GE: case GEU:
7717 ix86_compare_op0 = hi[0];
7718 ix86_compare_op1 = hi[1];
7719 ix86_expand_branch (code, label);
7725 /* Otherwise, we need two or three jumps. */
7727 label2 = gen_label_rtx ();
7730 code2 = swap_condition (code);
7731 code3 = unsigned_condition (code);
7735 case LT: case GT: case LTU: case GTU:
7738 case LE: code1 = LT; code2 = GT; break;
7739 case GE: code1 = GT; code2 = LT; break;
7740 case LEU: code1 = LTU; code2 = GTU; break;
7741 case GEU: code1 = GTU; code2 = LTU; break;
7743 case EQ: code1 = NIL; code2 = NE; break;
7744 case NE: code2 = NIL; break;
7752 * if (hi(a) < hi(b)) goto true;
7753 * if (hi(a) > hi(b)) goto false;
7754 * if (lo(a) < lo(b)) goto true;
7758 ix86_compare_op0 = hi[0];
7759 ix86_compare_op1 = hi[1];
7762 ix86_expand_branch (code1, label);
7764 ix86_expand_branch (code2, label2);
7766 ix86_compare_op0 = lo[0];
7767 ix86_compare_op1 = lo[1];
7768 ix86_expand_branch (code3, label);
7771 emit_label (label2);
7780 /* Split branch based on floating point condition. */
7782 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7784 rtx op1, op2, target1, target2, tmp;
7787 rtx label = NULL_RTX;
7789 int bypass_probability = -1, second_probability = -1, probability = -1;
7792 if (target2 != pc_rtx)
7795 code = reverse_condition_maybe_unordered (code);
7800 condition = ix86_expand_fp_compare (code, op1, op2,
7801 tmp, &second, &bypass);
7803 if (split_branch_probability >= 0)
7805 /* Distribute the probabilities across the jumps.
7806 Assume the BYPASS and SECOND to be always test
7808 probability = split_branch_probability;
7810 /* Value of 1 is low enought to make no need for probability
7811 to be updated. Later we may run some experiments and see
7812 if unordered values are more frequent in practice. */
7814 bypass_probability = 1;
7816 second_probability = 1;
7818 if (bypass != NULL_RTX)
7820 label = gen_label_rtx ();
7821 i = emit_jump_insn (gen_rtx_SET
7823 gen_rtx_IF_THEN_ELSE (VOIDmode,
7825 gen_rtx_LABEL_REF (VOIDmode,
7828 if (bypass_probability >= 0)
7830 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7831 GEN_INT (bypass_probability),
7834 i = emit_jump_insn (gen_rtx_SET
7836 gen_rtx_IF_THEN_ELSE (VOIDmode,
7837 condition, target1, target2)));
7838 if (probability >= 0)
7840 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7841 GEN_INT (probability),
7843 if (second != NULL_RTX)
7845 i = emit_jump_insn (gen_rtx_SET
7847 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7849 if (second_probability >= 0)
7851 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7852 GEN_INT (second_probability),
7855 if (label != NULL_RTX)
7860 ix86_expand_setcc (code, dest)
7864 rtx ret, tmp, tmpreg;
7865 rtx second_test, bypass_test;
7867 if (GET_MODE (ix86_compare_op0) == DImode
7869 return 0; /* FAIL */
7871 if (GET_MODE (dest) != QImode)
7874 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7875 PUT_MODE (ret, QImode);
7880 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7881 if (bypass_test || second_test)
7883 rtx test = second_test;
7885 rtx tmp2 = gen_reg_rtx (QImode);
7892 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7894 PUT_MODE (test, QImode);
7895 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7898 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7900 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7903 return 1; /* DONE */
7907 ix86_expand_int_movcc (operands)
7910 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7911 rtx compare_seq, compare_op;
7912 rtx second_test, bypass_test;
7913 enum machine_mode mode = GET_MODE (operands[0]);
7915 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7916 In case comparsion is done with immediate, we can convert it to LTU or
7917 GEU by altering the integer. */
7919 if ((code == LEU || code == GTU)
7920 && GET_CODE (ix86_compare_op1) == CONST_INT
7922 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
7923 && GET_CODE (operands[2]) == CONST_INT
7924 && GET_CODE (operands[3]) == CONST_INT)
7930 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7934 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7935 compare_seq = gen_sequence ();
7938 compare_code = GET_CODE (compare_op);
7940 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7941 HImode insns, we'd be swallowed in word prefix ops. */
7944 && (mode != DImode || TARGET_64BIT)
7945 && GET_CODE (operands[2]) == CONST_INT
7946 && GET_CODE (operands[3]) == CONST_INT)
7948 rtx out = operands[0];
7949 HOST_WIDE_INT ct = INTVAL (operands[2]);
7950 HOST_WIDE_INT cf = INTVAL (operands[3]);
7953 if ((compare_code == LTU || compare_code == GEU)
7954 && !second_test && !bypass_test)
7957 /* Detect overlap between destination and compare sources. */
7960 /* To simplify rest of code, restrict to the GEU case. */
7961 if (compare_code == LTU)
7966 compare_code = reverse_condition (compare_code);
7967 code = reverse_condition (code);
7971 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7972 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7973 tmp = gen_reg_rtx (mode);
7975 emit_insn (compare_seq);
7977 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7979 emit_insn (gen_x86_movsicc_0_m1 (tmp));
7991 tmp = expand_simple_binop (mode, PLUS,
7993 tmp, 1, OPTAB_DIRECT);
8004 tmp = expand_simple_binop (mode, IOR,
8006 tmp, 1, OPTAB_DIRECT);
8008 else if (diff == -1 && ct)
8018 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8020 tmp = expand_simple_binop (mode, PLUS,
8022 tmp, 1, OPTAB_DIRECT);
8029 * andl cf - ct, dest
8034 tmp = expand_simple_binop (mode, AND,
8036 GEN_INT (trunc_int_for_mode
8038 tmp, 1, OPTAB_DIRECT);
8040 tmp = expand_simple_binop (mode, PLUS,
8042 tmp, 1, OPTAB_DIRECT);
8046 emit_move_insn (out, tmp);
8048 return 1; /* DONE */
8055 tmp = ct, ct = cf, cf = tmp;
8057 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8059 /* We may be reversing unordered compare to normal compare, that
8060 is not valid in general (we may convert non-trapping condition
8061 to trapping one), however on i386 we currently emit all
8062 comparisons unordered. */
8063 compare_code = reverse_condition_maybe_unordered (compare_code);
8064 code = reverse_condition_maybe_unordered (code);
8068 compare_code = reverse_condition (compare_code);
8069 code = reverse_condition (code);
8072 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8073 || diff == 3 || diff == 5 || diff == 9)
8074 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8080 * lea cf(dest*(ct-cf)),dest
8084 * This also catches the degenerate setcc-only case.
8090 out = emit_store_flag (out, code, ix86_compare_op0,
8091 ix86_compare_op1, VOIDmode, 0, 1);
8094 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8095 done in proper mode to match. */
8102 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8106 tmp = gen_rtx_PLUS (mode, tmp, out1);
8112 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8116 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8122 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8123 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8125 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8126 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8130 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8132 if (out != operands[0])
8133 emit_move_insn (operands[0], out);
8135 return 1; /* DONE */
8139 * General case: Jumpful:
8140 * xorl dest,dest cmpl op1, op2
8141 * cmpl op1, op2 movl ct, dest
8143 * decl dest movl cf, dest
8144 * andl (cf-ct),dest 1:
8149 * This is reasonably steep, but branch mispredict costs are
8150 * high on modern cpus, so consider failing only if optimizing
8153 * %%% Parameterize branch_cost on the tuning architecture, then
8154 * use that. The 80386 couldn't care less about mispredicts.
8157 if (!optimize_size && !TARGET_CMOVE)
8163 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8165 /* We may be reversing unordered compare to normal compare,
8166 that is not valid in general (we may convert non-trapping
8167 condition to trapping one), however on i386 we currently
8168 emit all comparisons unordered. */
8169 compare_code = reverse_condition_maybe_unordered (compare_code);
8170 code = reverse_condition_maybe_unordered (code);
8174 compare_code = reverse_condition (compare_code);
8175 code = reverse_condition (code);
8179 out = emit_store_flag (out, code, ix86_compare_op0,
8180 ix86_compare_op1, VOIDmode, 0, 1);
8182 out = expand_simple_binop (mode, PLUS,
8184 out, 1, OPTAB_DIRECT);
8185 out = expand_simple_binop (mode, AND,
8187 GEN_INT (trunc_int_for_mode
8189 out, 1, OPTAB_DIRECT);
8190 out = expand_simple_binop (mode, PLUS,
8192 out, 1, OPTAB_DIRECT);
8193 if (out != operands[0])
8194 emit_move_insn (operands[0], out);
8196 return 1; /* DONE */
8202 /* Try a few things more with specific constants and a variable. */
8205 rtx var, orig_out, out, tmp;
8208 return 0; /* FAIL */
8210 /* If one of the two operands is an interesting constant, load a
8211 constant with the above and mask it in with a logical operation. */
8213 if (GET_CODE (operands[2]) == CONST_INT)
8216 if (INTVAL (operands[2]) == 0)
8217 operands[3] = constm1_rtx, op = and_optab;
8218 else if (INTVAL (operands[2]) == -1)
8219 operands[3] = const0_rtx, op = ior_optab;
8221 return 0; /* FAIL */
8223 else if (GET_CODE (operands[3]) == CONST_INT)
8226 if (INTVAL (operands[3]) == 0)
8227 operands[2] = constm1_rtx, op = and_optab;
8228 else if (INTVAL (operands[3]) == -1)
8229 operands[2] = const0_rtx, op = ior_optab;
8231 return 0; /* FAIL */
8234 return 0; /* FAIL */
8236 orig_out = operands[0];
8237 tmp = gen_reg_rtx (mode);
8240 /* Recurse to get the constant loaded. */
8241 if (ix86_expand_int_movcc (operands) == 0)
8242 return 0; /* FAIL */
8244 /* Mask in the interesting variable. */
8245 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8247 if (out != orig_out)
8248 emit_move_insn (orig_out, out);
8250 return 1; /* DONE */
8254 * For comparison with above,
8264 if (! nonimmediate_operand (operands[2], mode))
8265 operands[2] = force_reg (mode, operands[2]);
8266 if (! nonimmediate_operand (operands[3], mode))
8267 operands[3] = force_reg (mode, operands[3]);
8269 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8271 rtx tmp = gen_reg_rtx (mode);
8272 emit_move_insn (tmp, operands[3]);
8275 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8277 rtx tmp = gen_reg_rtx (mode);
8278 emit_move_insn (tmp, operands[2]);
8281 if (! register_operand (operands[2], VOIDmode)
8282 && ! register_operand (operands[3], VOIDmode))
8283 operands[2] = force_reg (mode, operands[2]);
8285 emit_insn (compare_seq);
8286 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8287 gen_rtx_IF_THEN_ELSE (mode,
8288 compare_op, operands[2],
8291 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8292 gen_rtx_IF_THEN_ELSE (mode,
8297 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8298 gen_rtx_IF_THEN_ELSE (mode,
8303 return 1; /* DONE */
8307 ix86_expand_fp_movcc (operands)
8312 rtx compare_op, second_test, bypass_test;
8314 /* For SF/DFmode conditional moves based on comparisons
8315 in same mode, we may want to use SSE min/max instructions. */
8316 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
8317 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
8318 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8319 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8321 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8322 /* We may be called from the post-reload splitter. */
8323 && (!REG_P (operands[0])
8324 || SSE_REG_P (operands[0])
8325 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8327 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8328 code = GET_CODE (operands[1]);
8330 /* See if we have (cross) match between comparison operands and
8331 conditional move operands. */
8332 if (rtx_equal_p (operands[2], op1))
8337 code = reverse_condition_maybe_unordered (code);
8339 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8341 /* Check for min operation. */
8344 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8345 if (memory_operand (op0, VOIDmode))
8346 op0 = force_reg (GET_MODE (operands[0]), op0);
8347 if (GET_MODE (operands[0]) == SFmode)
8348 emit_insn (gen_minsf3 (operands[0], op0, op1));
8350 emit_insn (gen_mindf3 (operands[0], op0, op1));
8353 /* Check for max operation. */
8356 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8357 if (memory_operand (op0, VOIDmode))
8358 op0 = force_reg (GET_MODE (operands[0]), op0);
8359 if (GET_MODE (operands[0]) == SFmode)
8360 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8362 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8366 /* Manage condition to be sse_comparison_operator. In case we are
8367 in non-ieee mode, try to canonicalize the destination operand
8368 to be first in the comparison - this helps reload to avoid extra
8370 if (!sse_comparison_operator (operands[1], VOIDmode)
8371 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8373 rtx tmp = ix86_compare_op0;
8374 ix86_compare_op0 = ix86_compare_op1;
8375 ix86_compare_op1 = tmp;
8376 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8377 VOIDmode, ix86_compare_op0,
8380 /* Similary try to manage result to be first operand of conditional
8381 move. We also don't support the NE comparison on SSE, so try to
8383 if ((rtx_equal_p (operands[0], operands[3])
8384 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8385 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8387 rtx tmp = operands[2];
8388 operands[2] = operands[3];
8390 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8391 (GET_CODE (operands[1])),
8392 VOIDmode, ix86_compare_op0,
8395 if (GET_MODE (operands[0]) == SFmode)
8396 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8397 operands[2], operands[3],
8398 ix86_compare_op0, ix86_compare_op1));
8400 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8401 operands[2], operands[3],
8402 ix86_compare_op0, ix86_compare_op1));
8406 /* The floating point conditional move instructions don't directly
8407 support conditions resulting from a signed integer comparison. */
8409 code = GET_CODE (operands[1]);
8410 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8412 /* The floating point conditional move instructions don't directly
8413 support signed integer comparisons. */
8415 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8417 if (second_test != NULL || bypass_test != NULL)
8419 tmp = gen_reg_rtx (QImode);
8420 ix86_expand_setcc (code, tmp);
8422 ix86_compare_op0 = tmp;
8423 ix86_compare_op1 = const0_rtx;
8424 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8426 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8428 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8429 emit_move_insn (tmp, operands[3]);
8432 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8434 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8435 emit_move_insn (tmp, operands[2]);
8439 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8440 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8445 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8446 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8451 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8452 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8460 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8461 works for floating pointer parameters and nonoffsetable memories.
8462 For pushes, it returns just stack offsets; the values will be saved
8463 in the right order. Maximally three parts are generated. */
8466 ix86_split_to_parts (operand, parts, mode)
8469 enum machine_mode mode;
8474 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8476 size = (GET_MODE_SIZE (mode) + 4) / 8;
8478 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8480 if (size < 2 || size > 3)
8483 /* Optimize constant pool reference to immediates. This is used by fp moves,
8484 that force all constants to memory to allow combining. */
8486 if (GET_CODE (operand) == MEM
8487 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8488 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8489 operand = get_pool_constant (XEXP (operand, 0));
8491 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8493 /* The only non-offsetable memories we handle are pushes. */
8494 if (! push_operand (operand, VOIDmode))
8497 operand = copy_rtx (operand);
8498 PUT_MODE (operand, Pmode);
8499 parts[0] = parts[1] = parts[2] = operand;
8501 else if (!TARGET_64BIT)
8504 split_di (&operand, 1, &parts[0], &parts[1]);
8507 if (REG_P (operand))
8509 if (!reload_completed)
8511 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8512 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8514 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8516 else if (offsettable_memref_p (operand))
8518 operand = adjust_address (operand, SImode, 0);
8520 parts[1] = adjust_address (operand, SImode, 4);
8522 parts[2] = adjust_address (operand, SImode, 8);
8524 else if (GET_CODE (operand) == CONST_DOUBLE)
8529 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8534 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8535 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8538 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8543 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8544 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8553 split_ti (&operand, 1, &parts[0], &parts[1]);
8554 if (mode == XFmode || mode == TFmode)
8556 if (REG_P (operand))
8558 if (!reload_completed)
8560 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8561 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8563 else if (offsettable_memref_p (operand))
8565 operand = adjust_address (operand, DImode, 0);
8567 parts[1] = adjust_address (operand, SImode, 8);
8569 else if (GET_CODE (operand) == CONST_DOUBLE)
8574 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8575 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8576 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8577 if (HOST_BITS_PER_WIDE_INT >= 64)
8579 = GEN_INT (trunc_int_for_mode
8580 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8581 + ((((HOST_WIDE_INT)l[1]) << 31) << 1),
8584 parts[0] = immed_double_const (l[0], l[1], DImode);
8585 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8595 /* Emit insns to perform a move or push of DI, DF, and XF values.
8596 Return false when normal moves are needed; true when all required
8597 insns have been emitted. Operands 2-4 contain the input values
8598 int the correct order; operands 5-7 contain the output values. */
8601 ix86_split_long_move (operands)
8608 enum machine_mode mode = GET_MODE (operands[0]);
8610 /* The DFmode expanders may ask us to move double.
8611 For 64bit target this is single move. By hiding the fact
8612 here we simplify i386.md splitters. */
8613 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8615 /* Optimize constant pool reference to immediates. This is used by fp moves,
8616 that force all constants to memory to allow combining. */
8618 if (GET_CODE (operands[1]) == MEM
8619 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8620 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8621 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8622 if (push_operand (operands[0], VOIDmode))
8624 operands[0] = copy_rtx (operands[0]);
8625 PUT_MODE (operands[0], Pmode);
8628 operands[0] = gen_lowpart (DImode, operands[0]);
8629 operands[1] = gen_lowpart (DImode, operands[1]);
8630 emit_move_insn (operands[0], operands[1]);
8634 /* The only non-offsettable memory we handle is push. */
8635 if (push_operand (operands[0], VOIDmode))
8637 else if (GET_CODE (operands[0]) == MEM
8638 && ! offsettable_memref_p (operands[0]))
8641 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8642 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8644 /* When emitting push, take care for source operands on the stack. */
8645 if (push && GET_CODE (operands[1]) == MEM
8646 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8649 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8650 XEXP (part[1][2], 0));
8651 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8652 XEXP (part[1][1], 0));
8655 /* We need to do copy in the right order in case an address register
8656 of the source overlaps the destination. */
8657 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8659 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8661 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8664 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8667 /* Collision in the middle part can be handled by reordering. */
8668 if (collisions == 1 && nparts == 3
8669 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8672 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8673 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8676 /* If there are more collisions, we can't handle it by reordering.
8677 Do an lea to the last part and use only one colliding move. */
8678 else if (collisions > 1)
8681 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8682 XEXP (part[1][0], 0)));
8683 part[1][0] = change_address (part[1][0],
8684 TARGET_64BIT ? DImode : SImode,
8685 part[0][nparts - 1]);
8686 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8688 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8698 /* We use only first 12 bytes of TFmode value, but for pushing we
8699 are required to adjust stack as if we were pushing real 16byte
8701 if (mode == TFmode && !TARGET_64BIT)
8702 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8704 emit_move_insn (part[0][2], part[1][2]);
8709 /* In 64bit mode we don't have 32bit push available. In case this is
8710 register, it is OK - we will just use larger counterpart. We also
8711 retype memory - these comes from attempt to avoid REX prefix on
8712 moving of second half of TFmode value. */
8713 if (GET_MODE (part[1][1]) == SImode)
8715 if (GET_CODE (part[1][1]) == MEM)
8716 part[1][1] = adjust_address (part[1][1], DImode, 0);
8717 else if (REG_P (part[1][1]))
8718 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8721 if (GET_MODE (part[1][0]) == SImode)
8722 part[1][0] = part[1][1];
8725 emit_move_insn (part[0][1], part[1][1]);
8726 emit_move_insn (part[0][0], part[1][0]);
8730 /* Choose correct order to not overwrite the source before it is copied. */
8731 if ((REG_P (part[0][0])
8732 && REG_P (part[1][1])
8733 && (REGNO (part[0][0]) == REGNO (part[1][1])
8735 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8737 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8741 operands[2] = part[0][2];
8742 operands[3] = part[0][1];
8743 operands[4] = part[0][0];
8744 operands[5] = part[1][2];
8745 operands[6] = part[1][1];
8746 operands[7] = part[1][0];
8750 operands[2] = part[0][1];
8751 operands[3] = part[0][0];
8752 operands[5] = part[1][1];
8753 operands[6] = part[1][0];
8760 operands[2] = part[0][0];
8761 operands[3] = part[0][1];
8762 operands[4] = part[0][2];
8763 operands[5] = part[1][0];
8764 operands[6] = part[1][1];
8765 operands[7] = part[1][2];
8769 operands[2] = part[0][0];
8770 operands[3] = part[0][1];
8771 operands[5] = part[1][0];
8772 operands[6] = part[1][1];
8775 emit_move_insn (operands[2], operands[5]);
8776 emit_move_insn (operands[3], operands[6]);
8778 emit_move_insn (operands[4], operands[7]);
8784 ix86_split_ashldi (operands, scratch)
8785 rtx *operands, scratch;
8787 rtx low[2], high[2];
8790 if (GET_CODE (operands[2]) == CONST_INT)
8792 split_di (operands, 2, low, high);
8793 count = INTVAL (operands[2]) & 63;
8797 emit_move_insn (high[0], low[1]);
8798 emit_move_insn (low[0], const0_rtx);
8801 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8805 if (!rtx_equal_p (operands[0], operands[1]))
8806 emit_move_insn (operands[0], operands[1]);
8807 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8808 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8813 if (!rtx_equal_p (operands[0], operands[1]))
8814 emit_move_insn (operands[0], operands[1]);
8816 split_di (operands, 1, low, high);
8818 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8819 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8821 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8823 if (! no_new_pseudos)
8824 scratch = force_reg (SImode, const0_rtx);
8826 emit_move_insn (scratch, const0_rtx);
8828 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8832 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8837 ix86_split_ashrdi (operands, scratch)
8838 rtx *operands, scratch;
8840 rtx low[2], high[2];
8843 if (GET_CODE (operands[2]) == CONST_INT)
8845 split_di (operands, 2, low, high);
8846 count = INTVAL (operands[2]) & 63;
8850 emit_move_insn (low[0], high[1]);
8852 if (! reload_completed)
8853 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8856 emit_move_insn (high[0], low[0]);
8857 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8861 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8865 if (!rtx_equal_p (operands[0], operands[1]))
8866 emit_move_insn (operands[0], operands[1]);
8867 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8868 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8873 if (!rtx_equal_p (operands[0], operands[1]))
8874 emit_move_insn (operands[0], operands[1]);
8876 split_di (operands, 1, low, high);
8878 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8879 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8881 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8883 if (! no_new_pseudos)
8884 scratch = gen_reg_rtx (SImode);
8885 emit_move_insn (scratch, high[0]);
8886 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8887 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8891 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8896 ix86_split_lshrdi (operands, scratch)
8897 rtx *operands, scratch;
8899 rtx low[2], high[2];
8902 if (GET_CODE (operands[2]) == CONST_INT)
8904 split_di (operands, 2, low, high);
8905 count = INTVAL (operands[2]) & 63;
8909 emit_move_insn (low[0], high[1]);
8910 emit_move_insn (high[0], const0_rtx);
8913 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8917 if (!rtx_equal_p (operands[0], operands[1]))
8918 emit_move_insn (operands[0], operands[1]);
8919 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8920 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8925 if (!rtx_equal_p (operands[0], operands[1]))
8926 emit_move_insn (operands[0], operands[1]);
8928 split_di (operands, 1, low, high);
8930 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8931 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8933 /* Heh. By reversing the arguments, we can reuse this pattern. */
8934 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8936 if (! no_new_pseudos)
8937 scratch = force_reg (SImode, const0_rtx);
8939 emit_move_insn (scratch, const0_rtx);
8941 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8945 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8949 /* Helper function for the string operations below. Dest VARIABLE whether
8950 it is aligned to VALUE bytes. If true, jump to the label. */
8952 ix86_expand_aligntest (variable, value)
8956 rtx label = gen_label_rtx ();
8957 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8958 if (GET_MODE (variable) == DImode)
8959 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8961 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8962 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8967 /* Adjust COUNTER by the VALUE. */
8969 ix86_adjust_counter (countreg, value)
8971 HOST_WIDE_INT value;
8973 if (GET_MODE (countreg) == DImode)
8974 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8976 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8979 /* Zero extend possibly SImode EXP to Pmode register. */
8981 ix86_zero_extend_to_Pmode (exp)
8985 if (GET_MODE (exp) == VOIDmode)
8986 return force_reg (Pmode, exp);
8987 if (GET_MODE (exp) == Pmode)
8988 return copy_to_mode_reg (Pmode, exp);
8989 r = gen_reg_rtx (Pmode);
8990 emit_insn (gen_zero_extendsidi2 (r, exp));
8994 /* Expand string move (memcpy) operation. Use i386 string operations when
8995 profitable. expand_clrstr contains similar code. */
8997 ix86_expand_movstr (dst, src, count_exp, align_exp)
8998 rtx dst, src, count_exp, align_exp;
9000 rtx srcreg, destreg, countreg;
9001 enum machine_mode counter_mode;
9002 HOST_WIDE_INT align = 0;
9003 unsigned HOST_WIDE_INT count = 0;
9008 if (GET_CODE (align_exp) == CONST_INT)
9009 align = INTVAL (align_exp);
9011 /* This simple hack avoids all inlining code and simplifies code bellow. */
9012 if (!TARGET_ALIGN_STRINGOPS)
9015 if (GET_CODE (count_exp) == CONST_INT)
9016 count = INTVAL (count_exp);
9018 /* Figure out proper mode for counter. For 32bits it is always SImode,
9019 for 64bits use SImode when possible, otherwise DImode.
9020 Set count to number of bytes copied when known at compile time. */
9021 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9022 || x86_64_zero_extended_value (count_exp))
9023 counter_mode = SImode;
9025 counter_mode = DImode;
9027 if (counter_mode != SImode && counter_mode != DImode)
9030 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9031 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9033 emit_insn (gen_cld ());
9035 /* When optimizing for size emit simple rep ; movsb instruction for
9036 counts not divisible by 4. */
9038 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9040 countreg = ix86_zero_extend_to_Pmode (count_exp);
9042 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9043 destreg, srcreg, countreg));
9045 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9046 destreg, srcreg, countreg));
9049 /* For constant aligned (or small unaligned) copies use rep movsl
9050 followed by code copying the rest. For PentiumPro ensure 8 byte
9051 alignment to allow rep movsl acceleration. */
9055 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9056 || optimize_size || count < (unsigned int)64))
9058 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9059 if (count & ~(size - 1))
9061 countreg = copy_to_mode_reg (counter_mode,
9062 GEN_INT ((count >> (size == 4 ? 2 : 3))
9063 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9064 countreg = ix86_zero_extend_to_Pmode (countreg);
9068 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9069 destreg, srcreg, countreg));
9071 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9072 destreg, srcreg, countreg));
9075 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9076 destreg, srcreg, countreg));
9078 if (size == 8 && (count & 0x04))
9079 emit_insn (gen_strmovsi (destreg, srcreg));
9081 emit_insn (gen_strmovhi (destreg, srcreg));
9083 emit_insn (gen_strmovqi (destreg, srcreg));
9085 /* The generic code based on the glibc implementation:
9086 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9087 allowing accelerated copying there)
9088 - copy the data using rep movsl
9095 /* In case we don't know anything about the alignment, default to
9096 library version, since it is usually equally fast and result in
9098 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9104 if (TARGET_SINGLE_STRINGOP)
9105 emit_insn (gen_cld ());
9107 countreg2 = gen_reg_rtx (Pmode);
9108 countreg = copy_to_mode_reg (counter_mode, count_exp);
9110 /* We don't use loops to align destination and to copy parts smaller
9111 than 4 bytes, because gcc is able to optimize such code better (in
9112 the case the destination or the count really is aligned, gcc is often
9113 able to predict the branches) and also it is friendlier to the
9114 hardware branch prediction.
9116 Using loops is benefical for generic case, because we can
9117 handle small counts using the loops. Many CPUs (such as Athlon)
9118 have large REP prefix setup costs.
9120 This is quite costy. Maybe we can revisit this decision later or
9121 add some customizability to this code. */
9124 && align < (TARGET_PENTIUMPRO && (count == 0
9125 || count >= (unsigned int)260)
9126 ? 8 : UNITS_PER_WORD))
9128 label = gen_label_rtx ();
9129 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9130 LEU, 0, counter_mode, 1, 0, label);
9134 rtx label = ix86_expand_aligntest (destreg, 1);
9135 emit_insn (gen_strmovqi (destreg, srcreg));
9136 ix86_adjust_counter (countreg, 1);
9138 LABEL_NUSES (label) = 1;
9142 rtx label = ix86_expand_aligntest (destreg, 2);
9143 emit_insn (gen_strmovhi (destreg, srcreg));
9144 ix86_adjust_counter (countreg, 2);
9146 LABEL_NUSES (label) = 1;
9149 && ((TARGET_PENTIUMPRO && (count == 0
9150 || count >= (unsigned int)260))
9153 rtx label = ix86_expand_aligntest (destreg, 4);
9154 emit_insn (gen_strmovsi (destreg, srcreg));
9155 ix86_adjust_counter (countreg, 4);
9157 LABEL_NUSES (label) = 1;
9160 if (!TARGET_SINGLE_STRINGOP)
9161 emit_insn (gen_cld ());
9164 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9166 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9167 destreg, srcreg, countreg2));
9171 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9172 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9173 destreg, srcreg, countreg2));
9179 LABEL_NUSES (label) = 1;
9181 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9182 emit_insn (gen_strmovsi (destreg, srcreg));
9183 if ((align <= 4 || count == 0) && TARGET_64BIT)
9185 rtx label = ix86_expand_aligntest (countreg, 4);
9186 emit_insn (gen_strmovsi (destreg, srcreg));
9188 LABEL_NUSES (label) = 1;
9190 if (align > 2 && count != 0 && (count & 2))
9191 emit_insn (gen_strmovhi (destreg, srcreg));
9192 if (align <= 2 || count == 0)
9194 rtx label = ix86_expand_aligntest (countreg, 2);
9195 emit_insn (gen_strmovhi (destreg, srcreg));
9197 LABEL_NUSES (label) = 1;
9199 if (align > 1 && count != 0 && (count & 1))
9200 emit_insn (gen_strmovqi (destreg, srcreg));
9201 if (align <= 1 || count == 0)
9203 rtx label = ix86_expand_aligntest (countreg, 1);
9204 emit_insn (gen_strmovqi (destreg, srcreg));
9206 LABEL_NUSES (label) = 1;
9210 insns = get_insns ();
9213 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9218 /* Expand string clear operation (bzero). Use i386 string operations when
9219 profitable. expand_movstr contains similar code. */
9221 ix86_expand_clrstr (src, count_exp, align_exp)
9222 rtx src, count_exp, align_exp;
9224 rtx destreg, zeroreg, countreg;
9225 enum machine_mode counter_mode;
9226 HOST_WIDE_INT align = 0;
9227 unsigned HOST_WIDE_INT count = 0;
9229 if (GET_CODE (align_exp) == CONST_INT)
9230 align = INTVAL (align_exp);
9232 /* This simple hack avoids all inlining code and simplifies code bellow. */
9233 if (!TARGET_ALIGN_STRINGOPS)
9236 if (GET_CODE (count_exp) == CONST_INT)
9237 count = INTVAL (count_exp);
9238 /* Figure out proper mode for counter. For 32bits it is always SImode,
9239 for 64bits use SImode when possible, otherwise DImode.
9240 Set count to number of bytes copied when known at compile time. */
9241 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9242 || x86_64_zero_extended_value (count_exp))
9243 counter_mode = SImode;
9245 counter_mode = DImode;
9247 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9249 emit_insn (gen_cld ());
9251 /* When optimizing for size emit simple rep ; movsb instruction for
9252 counts not divisible by 4. */
9254 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9256 countreg = ix86_zero_extend_to_Pmode (count_exp);
9257 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9259 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9260 destreg, countreg));
9262 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9263 destreg, countreg));
9267 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9268 || optimize_size || count < (unsigned int)64))
9270 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9271 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9272 if (count & ~(size - 1))
9274 countreg = copy_to_mode_reg (counter_mode,
9275 GEN_INT ((count >> (size == 4 ? 2 : 3))
9276 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9277 countreg = ix86_zero_extend_to_Pmode (countreg);
9281 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9282 destreg, countreg));
9284 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9285 destreg, countreg));
9288 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9289 destreg, countreg));
9291 if (size == 8 && (count & 0x04))
9292 emit_insn (gen_strsetsi (destreg,
9293 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9295 emit_insn (gen_strsethi (destreg,
9296 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9298 emit_insn (gen_strsetqi (destreg,
9299 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9306 /* In case we don't know anything about the alignment, default to
9307 library version, since it is usually equally fast and result in
9309 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9312 if (TARGET_SINGLE_STRINGOP)
9313 emit_insn (gen_cld ());
9315 countreg2 = gen_reg_rtx (Pmode);
9316 countreg = copy_to_mode_reg (counter_mode, count_exp);
9317 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9320 && align < (TARGET_PENTIUMPRO && (count == 0
9321 || count >= (unsigned int)260)
9322 ? 8 : UNITS_PER_WORD))
9324 label = gen_label_rtx ();
9325 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9326 LEU, 0, counter_mode, 1, 0, label);
9330 rtx label = ix86_expand_aligntest (destreg, 1);
9331 emit_insn (gen_strsetqi (destreg,
9332 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9333 ix86_adjust_counter (countreg, 1);
9335 LABEL_NUSES (label) = 1;
9339 rtx label = ix86_expand_aligntest (destreg, 2);
9340 emit_insn (gen_strsethi (destreg,
9341 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9342 ix86_adjust_counter (countreg, 2);
9344 LABEL_NUSES (label) = 1;
9346 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9347 || count >= (unsigned int)260))
9349 rtx label = ix86_expand_aligntest (destreg, 4);
9350 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9351 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9353 ix86_adjust_counter (countreg, 4);
9355 LABEL_NUSES (label) = 1;
9358 if (!TARGET_SINGLE_STRINGOP)
9359 emit_insn (gen_cld ());
9362 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9364 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9365 destreg, countreg2));
9369 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9370 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9371 destreg, countreg2));
9377 LABEL_NUSES (label) = 1;
9379 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9380 emit_insn (gen_strsetsi (destreg,
9381 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9382 if (TARGET_64BIT && (align <= 4 || count == 0))
9384 rtx label = ix86_expand_aligntest (destreg, 2);
9385 emit_insn (gen_strsetsi (destreg,
9386 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9388 LABEL_NUSES (label) = 1;
9390 if (align > 2 && count != 0 && (count & 2))
9391 emit_insn (gen_strsethi (destreg,
9392 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9393 if (align <= 2 || count == 0)
9395 rtx label = ix86_expand_aligntest (destreg, 2);
9396 emit_insn (gen_strsethi (destreg,
9397 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9399 LABEL_NUSES (label) = 1;
9401 if (align > 1 && count != 0 && (count & 1))
9402 emit_insn (gen_strsetqi (destreg,
9403 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9404 if (align <= 1 || count == 0)
9406 rtx label = ix86_expand_aligntest (destreg, 1);
9407 emit_insn (gen_strsetqi (destreg,
9408 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9410 LABEL_NUSES (label) = 1;
9415 /* Expand strlen. */
9417 ix86_expand_strlen (out, src, eoschar, align)
9418 rtx out, src, eoschar, align;
9420 rtx addr, scratch1, scratch2, scratch3, scratch4;
9422 /* The generic case of strlen expander is long. Avoid it's
9423 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9425 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9426 && !TARGET_INLINE_ALL_STRINGOPS
9428 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9431 addr = force_reg (Pmode, XEXP (src, 0));
9432 scratch1 = gen_reg_rtx (Pmode);
9434 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9437 /* Well it seems that some optimizer does not combine a call like
9438 foo(strlen(bar), strlen(bar));
9439 when the move and the subtraction is done here. It does calculate
9440 the length just once when these instructions are done inside of
9441 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9442 often used and I use one fewer register for the lifetime of
9443 output_strlen_unroll() this is better. */
9445 emit_move_insn (out, addr);
9447 ix86_expand_strlensi_unroll_1 (out, align);
9449 /* strlensi_unroll_1 returns the address of the zero at the end of
9450 the string, like memchr(), so compute the length by subtracting
9451 the start address. */
9453 emit_insn (gen_subdi3 (out, out, addr));
9455 emit_insn (gen_subsi3 (out, out, addr));
9459 scratch2 = gen_reg_rtx (Pmode);
9460 scratch3 = gen_reg_rtx (Pmode);
9461 scratch4 = force_reg (Pmode, constm1_rtx);
9463 emit_move_insn (scratch3, addr);
9464 eoschar = force_reg (QImode, eoschar);
9466 emit_insn (gen_cld ());
9469 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9470 align, scratch4, scratch3));
9471 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9472 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9476 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9477 align, scratch4, scratch3));
9478 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9479 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9485 /* Expand the appropriate insns for doing strlen if not just doing
9488 out = result, initialized with the start address
9489 align_rtx = alignment of the address.
9490 scratch = scratch register, initialized with the startaddress when
9491 not aligned, otherwise undefined
9493 This is just the body. It needs the initialisations mentioned above and
9494 some address computing at the end. These things are done in i386.md. */
9497 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9502 rtx align_2_label = NULL_RTX;
9503 rtx align_3_label = NULL_RTX;
9504 rtx align_4_label = gen_label_rtx ();
9505 rtx end_0_label = gen_label_rtx ();
9507 rtx tmpreg = gen_reg_rtx (SImode);
9508 rtx scratch = gen_reg_rtx (SImode);
9511 if (GET_CODE (align_rtx) == CONST_INT)
9512 align = INTVAL (align_rtx);
9514 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9516 /* Is there a known alignment and is it less than 4? */
9519 rtx scratch1 = gen_reg_rtx (Pmode);
9520 emit_move_insn (scratch1, out);
9521 /* Is there a known alignment and is it not 2? */
9524 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9525 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9527 /* Leave just the 3 lower bits. */
9528 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9529 NULL_RTX, 0, OPTAB_WIDEN);
9531 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9532 Pmode, 1, 0, align_4_label);
9533 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9534 Pmode, 1, 0, align_2_label);
9535 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9536 Pmode, 1, 0, align_3_label);
9540 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9541 check if is aligned to 4 - byte. */
9543 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9544 NULL_RTX, 0, OPTAB_WIDEN);
9546 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9547 Pmode, 1, 0, align_4_label);
9550 mem = gen_rtx_MEM (QImode, out);
9552 /* Now compare the bytes. */
9554 /* Compare the first n unaligned byte on a byte per byte basis. */
9555 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9556 QImode, 1, 0, end_0_label);
9558 /* Increment the address. */
9560 emit_insn (gen_adddi3 (out, out, const1_rtx));
9562 emit_insn (gen_addsi3 (out, out, const1_rtx));
9564 /* Not needed with an alignment of 2 */
9567 emit_label (align_2_label);
9569 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9570 QImode, 1, 0, end_0_label);
9573 emit_insn (gen_adddi3 (out, out, const1_rtx));
9575 emit_insn (gen_addsi3 (out, out, const1_rtx));
9577 emit_label (align_3_label);
9580 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9581 QImode, 1, 0, end_0_label);
9584 emit_insn (gen_adddi3 (out, out, const1_rtx));
9586 emit_insn (gen_addsi3 (out, out, const1_rtx));
9589 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9590 align this loop. It gives only huge programs, but does not help to
9592 emit_label (align_4_label);
9594 mem = gen_rtx_MEM (SImode, out);
9595 emit_move_insn (scratch, mem);
9597 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9599 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9601 /* This formula yields a nonzero result iff one of the bytes is zero.
9602 This saves three branches inside loop and many cycles. */
9604 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9605 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9606 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9607 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9608 GEN_INT (trunc_int_for_mode
9609 (0x80808080, SImode))));
9610 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
9611 SImode, 1, 0, align_4_label);
9615 rtx reg = gen_reg_rtx (SImode);
9616 rtx reg2 = gen_reg_rtx (Pmode);
9617 emit_move_insn (reg, tmpreg);
9618 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9620 /* If zero is not in the first two bytes, move two bytes forward. */
9621 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9622 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9623 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9624 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9625 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9628 /* Emit lea manually to avoid clobbering of flags. */
9629 emit_insn (gen_rtx_SET (SImode, reg2,
9630 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9632 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9633 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9634 emit_insn (gen_rtx_SET (VOIDmode, out,
9635 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9642 rtx end_2_label = gen_label_rtx ();
9643 /* Is zero in the first two bytes? */
9645 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9646 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9647 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9648 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9649 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9651 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9652 JUMP_LABEL (tmp) = end_2_label;
9654 /* Not in the first two. Move two bytes forward. */
9655 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9657 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9659 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9661 emit_label (end_2_label);
9665 /* Avoid branch in fixing the byte. */
9666 tmpreg = gen_lowpart (QImode, tmpreg);
9667 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9669 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9671 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9673 emit_label (end_0_label);
9676 /* Clear stack slot assignments remembered from previous functions.
9677 This is called from INIT_EXPANDERS once before RTL is emitted for each
9681 ix86_init_machine_status (p)
9684 p->machine = (struct machine_function *)
9685 xcalloc (1, sizeof (struct machine_function));
9688 /* Mark machine specific bits of P for GC. */
9690 ix86_mark_machine_status (p)
9693 struct machine_function *machine = p->machine;
9694 enum machine_mode mode;
9700 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9701 mode = (enum machine_mode) ((int) mode + 1))
9702 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9703 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9707 ix86_free_machine_status (p)
9714 /* Return a MEM corresponding to a stack slot with mode MODE.
9715 Allocate a new slot if necessary.
9717 The RTL for a function can have several slots available: N is
9718 which slot to use. */
9721 assign_386_stack_local (mode, n)
9722 enum machine_mode mode;
9725 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9728 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9729 ix86_stack_locals[(int) mode][n]
9730 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9732 return ix86_stack_locals[(int) mode][n];
9735 /* Calculate the length of the memory address in the instruction
9736 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9739 memory_address_length (addr)
9742 struct ix86_address parts;
9743 rtx base, index, disp;
9746 if (GET_CODE (addr) == PRE_DEC
9747 || GET_CODE (addr) == POST_INC
9748 || GET_CODE (addr) == PRE_MODIFY
9749 || GET_CODE (addr) == POST_MODIFY)
9752 if (! ix86_decompose_address (addr, &parts))
9756 index = parts.index;
9760 /* Register Indirect. */
9761 if (base && !index && !disp)
9763 /* Special cases: ebp and esp need the two-byte modrm form. */
9764 if (addr == stack_pointer_rtx
9765 || addr == arg_pointer_rtx
9766 || addr == frame_pointer_rtx
9767 || addr == hard_frame_pointer_rtx)
9771 /* Direct Addressing. */
9772 else if (disp && !base && !index)
9777 /* Find the length of the displacement constant. */
9780 if (GET_CODE (disp) == CONST_INT
9781 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9787 /* An index requires the two-byte modrm form. */
9795 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9796 expect that insn have 8bit immediate alternative. */
9798 ix86_attr_length_immediate_default (insn, shortform)
9804 extract_insn_cached (insn);
9805 for (i = recog_data.n_operands - 1; i >= 0; --i)
9806 if (CONSTANT_P (recog_data.operand[i]))
9811 && GET_CODE (recog_data.operand[i]) == CONST_INT
9812 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9816 switch (get_attr_mode (insn))
9827 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9832 fatal_insn ("Unknown insn mode", insn);
9838 /* Compute default value for "length_address" attribute. */
9840 ix86_attr_length_address_default (insn)
9844 extract_insn_cached (insn);
9845 for (i = recog_data.n_operands - 1; i >= 0; --i)
9846 if (GET_CODE (recog_data.operand[i]) == MEM)
9848 return memory_address_length (XEXP (recog_data.operand[i], 0));
9854 /* Return the maximum number of instructions a cpu can issue. */
9861 case PROCESSOR_PENTIUM:
9865 case PROCESSOR_PENTIUMPRO:
9866 case PROCESSOR_PENTIUM4:
9867 case PROCESSOR_ATHLON:
9875 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9876 by DEP_INSN and nothing set by DEP_INSN. */
9879 ix86_flags_dependant (insn, dep_insn, insn_type)
9881 enum attr_type insn_type;
9885 /* Simplify the test for uninteresting insns. */
9886 if (insn_type != TYPE_SETCC
9887 && insn_type != TYPE_ICMOV
9888 && insn_type != TYPE_FCMOV
9889 && insn_type != TYPE_IBR)
9892 if ((set = single_set (dep_insn)) != 0)
9894 set = SET_DEST (set);
9897 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9898 && XVECLEN (PATTERN (dep_insn), 0) == 2
9899 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9900 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9902 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9903 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9908 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9911 /* This test is true if the dependant insn reads the flags but
9912 not any other potentially set register. */
9913 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9916 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9922 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9923 address with operands set by DEP_INSN. */
9926 ix86_agi_dependant (insn, dep_insn, insn_type)
9928 enum attr_type insn_type;
9932 if (insn_type == TYPE_LEA
9935 addr = PATTERN (insn);
9936 if (GET_CODE (addr) == SET)
9938 else if (GET_CODE (addr) == PARALLEL
9939 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9940 addr = XVECEXP (addr, 0, 0);
9943 addr = SET_SRC (addr);
9948 extract_insn_cached (insn);
9949 for (i = recog_data.n_operands - 1; i >= 0; --i)
9950 if (GET_CODE (recog_data.operand[i]) == MEM)
9952 addr = XEXP (recog_data.operand[i], 0);
9959 return modified_in_p (addr, dep_insn);
9963 ix86_adjust_cost (insn, link, dep_insn, cost)
9964 rtx insn, link, dep_insn;
9967 enum attr_type insn_type, dep_insn_type;
9968 enum attr_memory memory, dep_memory;
9970 int dep_insn_code_number;
9972 /* Anti and output depenancies have zero cost on all CPUs. */
9973 if (REG_NOTE_KIND (link) != 0)
9976 dep_insn_code_number = recog_memoized (dep_insn);
9978 /* If we can't recognize the insns, we can't really do anything. */
9979 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9982 insn_type = get_attr_type (insn);
9983 dep_insn_type = get_attr_type (dep_insn);
9987 case PROCESSOR_PENTIUM:
9988 /* Address Generation Interlock adds a cycle of latency. */
9989 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9992 /* ??? Compares pair with jump/setcc. */
9993 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9996 /* Floating point stores require value to be ready one cycle ealier. */
9997 if (insn_type == TYPE_FMOV
9998 && get_attr_memory (insn) == MEMORY_STORE
9999 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10003 case PROCESSOR_PENTIUMPRO:
10004 memory = get_attr_memory (insn);
10005 dep_memory = get_attr_memory (dep_insn);
10007 /* Since we can't represent delayed latencies of load+operation,
10008 increase the cost here for non-imov insns. */
10009 if (dep_insn_type != TYPE_IMOV
10010 && dep_insn_type != TYPE_FMOV
10011 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10014 /* INT->FP conversion is expensive. */
10015 if (get_attr_fp_int_src (dep_insn))
10018 /* There is one cycle extra latency between an FP op and a store. */
10019 if (insn_type == TYPE_FMOV
10020 && (set = single_set (dep_insn)) != NULL_RTX
10021 && (set2 = single_set (insn)) != NULL_RTX
10022 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10023 && GET_CODE (SET_DEST (set2)) == MEM)
10026 /* Show ability of reorder buffer to hide latency of load by executing
10027 in parallel with previous instruction in case
10028 previous instruction is not needed to compute the address. */
10029 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10030 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10032 /* Claim moves to take one cycle, as core can issue one load
10033 at time and the next load can start cycle later. */
10034 if (dep_insn_type == TYPE_IMOV
10035 || dep_insn_type == TYPE_FMOV)
10043 memory = get_attr_memory (insn);
10044 dep_memory = get_attr_memory (dep_insn);
10045 /* The esp dependency is resolved before the instruction is really
10047 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10048 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10051 /* Since we can't represent delayed latencies of load+operation,
10052 increase the cost here for non-imov insns. */
10053 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10054 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10056 /* INT->FP conversion is expensive. */
10057 if (get_attr_fp_int_src (dep_insn))
10060 /* Show ability of reorder buffer to hide latency of load by executing
10061 in parallel with previous instruction in case
10062 previous instruction is not needed to compute the address. */
10063 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10064 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10066 /* Claim moves to take one cycle, as core can issue one load
10067 at time and the next load can start cycle later. */
10068 if (dep_insn_type == TYPE_IMOV
10069 || dep_insn_type == TYPE_FMOV)
10078 case PROCESSOR_ATHLON:
10079 memory = get_attr_memory (insn);
10080 dep_memory = get_attr_memory (dep_insn);
10082 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10084 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10089 /* Show ability of reorder buffer to hide latency of load by executing
10090 in parallel with previous instruction in case
10091 previous instruction is not needed to compute the address. */
10092 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10093 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10095 /* Claim moves to take one cycle, as core can issue one load
10096 at time and the next load can start cycle later. */
10097 if (dep_insn_type == TYPE_IMOV
10098 || dep_insn_type == TYPE_FMOV)
10100 else if (cost >= 3)
10115 struct ppro_sched_data
10118 int issued_this_cycle;
10123 ix86_safe_length (insn)
10126 if (recog_memoized (insn) >= 0)
10127 return get_attr_length(insn);
10133 ix86_safe_length_prefix (insn)
10136 if (recog_memoized (insn) >= 0)
10137 return get_attr_length(insn);
10142 static enum attr_memory
10143 ix86_safe_memory (insn)
10146 if (recog_memoized (insn) >= 0)
10147 return get_attr_memory(insn);
10149 return MEMORY_UNKNOWN;
10152 static enum attr_pent_pair
10153 ix86_safe_pent_pair (insn)
10156 if (recog_memoized (insn) >= 0)
10157 return get_attr_pent_pair(insn);
10159 return PENT_PAIR_NP;
10162 static enum attr_ppro_uops
10163 ix86_safe_ppro_uops (insn)
10166 if (recog_memoized (insn) >= 0)
10167 return get_attr_ppro_uops (insn);
10169 return PPRO_UOPS_MANY;
10173 ix86_dump_ppro_packet (dump)
10176 if (ix86_sched_data.ppro.decode[0])
10178 fprintf (dump, "PPRO packet: %d",
10179 INSN_UID (ix86_sched_data.ppro.decode[0]));
10180 if (ix86_sched_data.ppro.decode[1])
10181 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10182 if (ix86_sched_data.ppro.decode[2])
10183 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10184 fputc ('\n', dump);
10188 /* We're beginning a new block. Initialize data structures as necessary. */
10191 ix86_sched_init (dump, sched_verbose, veclen)
10192 FILE *dump ATTRIBUTE_UNUSED;
10193 int sched_verbose ATTRIBUTE_UNUSED;
10194 int veclen ATTRIBUTE_UNUSED;
10196 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10199 /* Shift INSN to SLOT, and shift everything else down. */
10202 ix86_reorder_insn (insnp, slot)
10209 insnp[0] = insnp[1];
10210 while (++insnp != slot);
10215 /* Find an instruction with given pairability and minimal amount of cycles
10216 lost by the fact that the CPU waits for both pipelines to finish before
10217 reading next instructions. Also take care that both instructions together
10218 can not exceed 7 bytes. */
10221 ix86_pent_find_pair (e_ready, ready, type, first)
10224 enum attr_pent_pair type;
10227 int mincycles, cycles;
10228 enum attr_pent_pair tmp;
10229 enum attr_memory memory;
10230 rtx *insnp, *bestinsnp = NULL;
10232 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10235 memory = ix86_safe_memory (first);
10236 cycles = result_ready_cost (first);
10237 mincycles = INT_MAX;
10239 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10240 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10241 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10243 enum attr_memory second_memory;
10244 int secondcycles, currentcycles;
10246 second_memory = ix86_safe_memory (*insnp);
10247 secondcycles = result_ready_cost (*insnp);
10248 currentcycles = abs (cycles - secondcycles);
10250 if (secondcycles >= 1 && cycles >= 1)
10252 /* Two read/modify/write instructions together takes two
10254 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10255 currentcycles += 2;
10257 /* Read modify/write instruction followed by read/modify
10258 takes one cycle longer. */
10259 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10260 && tmp != PENT_PAIR_UV
10261 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10262 currentcycles += 1;
10264 if (currentcycles < mincycles)
10265 bestinsnp = insnp, mincycles = currentcycles;
10271 /* Subroutines of ix86_sched_reorder. */
10274 ix86_sched_reorder_pentium (ready, e_ready)
10278 enum attr_pent_pair pair1, pair2;
10281 /* This wouldn't be necessary if Haifa knew that static insn ordering
10282 is important to which pipe an insn is issued to. So we have to make
10283 some minor rearrangements. */
10285 pair1 = ix86_safe_pent_pair (*e_ready);
10287 /* If the first insn is non-pairable, let it be. */
10288 if (pair1 == PENT_PAIR_NP)
10291 pair2 = PENT_PAIR_NP;
10294 /* If the first insn is UV or PV pairable, search for a PU
10295 insn to go with. */
10296 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10298 insnp = ix86_pent_find_pair (e_ready-1, ready,
10299 PENT_PAIR_PU, *e_ready);
10301 pair2 = PENT_PAIR_PU;
10304 /* If the first insn is PU or UV pairable, search for a PV
10305 insn to go with. */
10306 if (pair2 == PENT_PAIR_NP
10307 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10309 insnp = ix86_pent_find_pair (e_ready-1, ready,
10310 PENT_PAIR_PV, *e_ready);
10312 pair2 = PENT_PAIR_PV;
10315 /* If the first insn is pairable, search for a UV
10316 insn to go with. */
10317 if (pair2 == PENT_PAIR_NP)
10319 insnp = ix86_pent_find_pair (e_ready-1, ready,
10320 PENT_PAIR_UV, *e_ready);
10322 pair2 = PENT_PAIR_UV;
10325 if (pair2 == PENT_PAIR_NP)
10328 /* Found something! Decide if we need to swap the order. */
10329 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10330 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10331 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10332 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10333 ix86_reorder_insn (insnp, e_ready);
10335 ix86_reorder_insn (insnp, e_ready - 1);
10339 ix86_sched_reorder_ppro (ready, e_ready)
10344 enum attr_ppro_uops cur_uops;
10345 int issued_this_cycle;
10349 /* At this point .ppro.decode contains the state of the three
10350 decoders from last "cycle". That is, those insns that were
10351 actually independent. But here we're scheduling for the
10352 decoder, and we may find things that are decodable in the
10355 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10356 issued_this_cycle = 0;
10359 cur_uops = ix86_safe_ppro_uops (*insnp);
10361 /* If the decoders are empty, and we've a complex insn at the
10362 head of the priority queue, let it issue without complaint. */
10363 if (decode[0] == NULL)
10365 if (cur_uops == PPRO_UOPS_MANY)
10367 decode[0] = *insnp;
10371 /* Otherwise, search for a 2-4 uop unsn to issue. */
10372 while (cur_uops != PPRO_UOPS_FEW)
10374 if (insnp == ready)
10376 cur_uops = ix86_safe_ppro_uops (*--insnp);
10379 /* If so, move it to the head of the line. */
10380 if (cur_uops == PPRO_UOPS_FEW)
10381 ix86_reorder_insn (insnp, e_ready);
10383 /* Issue the head of the queue. */
10384 issued_this_cycle = 1;
10385 decode[0] = *e_ready--;
10388 /* Look for simple insns to fill in the other two slots. */
10389 for (i = 1; i < 3; ++i)
10390 if (decode[i] == NULL)
10392 if (ready >= e_ready)
10396 cur_uops = ix86_safe_ppro_uops (*insnp);
10397 while (cur_uops != PPRO_UOPS_ONE)
10399 if (insnp == ready)
10401 cur_uops = ix86_safe_ppro_uops (*--insnp);
10404 /* Found one. Move it to the head of the queue and issue it. */
10405 if (cur_uops == PPRO_UOPS_ONE)
10407 ix86_reorder_insn (insnp, e_ready);
10408 decode[i] = *e_ready--;
10409 issued_this_cycle++;
10413 /* ??? Didn't find one. Ideally, here we would do a lazy split
10414 of 2-uop insns, issue one and queue the other. */
10418 if (issued_this_cycle == 0)
10419 issued_this_cycle = 1;
10420 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10423 /* We are about to being issuing insns for this clock cycle.
10424 Override the default sort algorithm to better slot instructions. */
10426 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10427 FILE *dump ATTRIBUTE_UNUSED;
10428 int sched_verbose ATTRIBUTE_UNUSED;
10431 int clock_var ATTRIBUTE_UNUSED;
10433 int n_ready = *n_readyp;
10434 rtx *e_ready = ready + n_ready - 1;
10444 case PROCESSOR_PENTIUM:
10445 ix86_sched_reorder_pentium (ready, e_ready);
10448 case PROCESSOR_PENTIUMPRO:
10449 ix86_sched_reorder_ppro (ready, e_ready);
10454 return ix86_issue_rate ();
10457 /* We are about to issue INSN. Return the number of insns left on the
10458 ready queue that can be issued this cycle. */
10461 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10465 int can_issue_more;
10471 return can_issue_more - 1;
10473 case PROCESSOR_PENTIUMPRO:
10475 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10477 if (uops == PPRO_UOPS_MANY)
10480 ix86_dump_ppro_packet (dump);
10481 ix86_sched_data.ppro.decode[0] = insn;
10482 ix86_sched_data.ppro.decode[1] = NULL;
10483 ix86_sched_data.ppro.decode[2] = NULL;
10485 ix86_dump_ppro_packet (dump);
10486 ix86_sched_data.ppro.decode[0] = NULL;
10488 else if (uops == PPRO_UOPS_FEW)
10491 ix86_dump_ppro_packet (dump);
10492 ix86_sched_data.ppro.decode[0] = insn;
10493 ix86_sched_data.ppro.decode[1] = NULL;
10494 ix86_sched_data.ppro.decode[2] = NULL;
10498 for (i = 0; i < 3; ++i)
10499 if (ix86_sched_data.ppro.decode[i] == NULL)
10501 ix86_sched_data.ppro.decode[i] = insn;
10509 ix86_dump_ppro_packet (dump);
10510 ix86_sched_data.ppro.decode[0] = NULL;
10511 ix86_sched_data.ppro.decode[1] = NULL;
10512 ix86_sched_data.ppro.decode[2] = NULL;
10516 return --ix86_sched_data.ppro.issued_this_cycle;
10520 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10521 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10525 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10527 rtx dstref, srcref, dstreg, srcreg;
10531 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10533 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10537 /* Subroutine of above to actually do the updating by recursively walking
10541 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10543 rtx dstref, srcref, dstreg, srcreg;
10545 enum rtx_code code = GET_CODE (x);
10546 const char *format_ptr = GET_RTX_FORMAT (code);
10549 if (code == MEM && XEXP (x, 0) == dstreg)
10550 MEM_COPY_ATTRIBUTES (x, dstref);
10551 else if (code == MEM && XEXP (x, 0) == srcreg)
10552 MEM_COPY_ATTRIBUTES (x, srcref);
10554 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10556 if (*format_ptr == 'e')
10557 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10559 else if (*format_ptr == 'E')
10560 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10561 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10566 /* Compute the alignment given to a constant that is being placed in memory.
10567 EXP is the constant and ALIGN is the alignment that the object would
10569 The value of this function is used instead of that alignment to align
10573 ix86_constant_alignment (exp, align)
10577 if (TREE_CODE (exp) == REAL_CST)
10579 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10581 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10584 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10591 /* Compute the alignment for a static variable.
10592 TYPE is the data type, and ALIGN is the alignment that
10593 the object would ordinarily have. The value of this function is used
10594 instead of that alignment to align the object. */
10597 ix86_data_alignment (type, align)
10601 if (AGGREGATE_TYPE_P (type)
10602 && TYPE_SIZE (type)
10603 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10604 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10605 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10608 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10609 to 16byte boundary. */
10612 if (AGGREGATE_TYPE_P (type)
10613 && TYPE_SIZE (type)
10614 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10615 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10616 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10620 if (TREE_CODE (type) == ARRAY_TYPE)
10622 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10624 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10627 else if (TREE_CODE (type) == COMPLEX_TYPE)
10630 if (TYPE_MODE (type) == DCmode && align < 64)
10632 if (TYPE_MODE (type) == XCmode && align < 128)
10635 else if ((TREE_CODE (type) == RECORD_TYPE
10636 || TREE_CODE (type) == UNION_TYPE
10637 || TREE_CODE (type) == QUAL_UNION_TYPE)
10638 && TYPE_FIELDS (type))
10640 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10642 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10645 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10646 || TREE_CODE (type) == INTEGER_TYPE)
10648 if (TYPE_MODE (type) == DFmode && align < 64)
10650 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10657 /* Compute the alignment for a local variable.
10658 TYPE is the data type, and ALIGN is the alignment that
10659 the object would ordinarily have. The value of this macro is used
10660 instead of that alignment to align the object. */
10663 ix86_local_alignment (type, align)
10667 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10668 to 16byte boundary. */
10671 if (AGGREGATE_TYPE_P (type)
10672 && TYPE_SIZE (type)
10673 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10674 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10675 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10678 if (TREE_CODE (type) == ARRAY_TYPE)
10680 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10682 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10685 else if (TREE_CODE (type) == COMPLEX_TYPE)
10687 if (TYPE_MODE (type) == DCmode && align < 64)
10689 if (TYPE_MODE (type) == XCmode && align < 128)
10692 else if ((TREE_CODE (type) == RECORD_TYPE
10693 || TREE_CODE (type) == UNION_TYPE
10694 || TREE_CODE (type) == QUAL_UNION_TYPE)
10695 && TYPE_FIELDS (type))
10697 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10699 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10702 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10703 || TREE_CODE (type) == INTEGER_TYPE)
10706 if (TYPE_MODE (type) == DFmode && align < 64)
10708 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10714 /* Emit RTL insns to initialize the variable parts of a trampoline.
10715 FNADDR is an RTX for the address of the function's pure code.
10716 CXT is an RTX for the static chain value for the function. */
10718 x86_initialize_trampoline (tramp, fnaddr, cxt)
10719 rtx tramp, fnaddr, cxt;
10723 /* Compute offset from the end of the jmp to the target function. */
10724 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10725 plus_constant (tramp, 10),
10726 NULL_RTX, 1, OPTAB_DIRECT);
10727 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10728 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10729 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10730 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10731 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10732 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10737 /* Try to load address using shorter movl instead of movabs.
10738 We may want to support movq for kernel mode, but kernel does not use
10739 trampolines at the moment. */
10740 if (x86_64_zero_extended_value (fnaddr))
10742 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10743 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10744 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10745 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10746 gen_lowpart (SImode, fnaddr));
10751 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10752 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10753 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10757 /* Load static chain using movabs to r10. */
10758 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10759 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10760 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10763 /* Jump to the r11 */
10764 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10765 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10766 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10767 GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10769 if (offset > TRAMPOLINE_SIZE)
10774 #define def_builtin(MASK, NAME, TYPE, CODE) \
10776 if ((MASK) & target_flags) \
10777 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10780 struct builtin_description
10782 const unsigned int mask;
10783 const enum insn_code icode;
10784 const char *const name;
10785 const enum ix86_builtins code;
10786 const enum rtx_code comparison;
10787 const unsigned int flag;
10790 static const struct builtin_description bdesc_comi[] =
10792 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10793 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10794 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10795 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10796 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10797 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10798 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10799 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10800 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10801 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10802 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10803 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10806 static const struct builtin_description bdesc_2arg[] =
10809 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10810 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10811 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10812 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10813 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10814 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10815 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10816 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10818 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10819 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10820 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10821 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10822 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10823 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10824 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10825 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10826 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10827 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10828 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10829 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10830 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10831 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10832 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10833 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10834 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10835 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10836 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10837 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10838 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10839 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10840 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10841 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10843 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10844 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10845 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10846 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10848 { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
10849 { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
10850 { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
10851 { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
10853 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10854 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10855 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10856 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10857 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10860 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10861 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10862 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10863 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10864 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10865 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10867 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10868 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10869 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10870 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10871 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10872 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10873 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10874 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10876 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10877 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10878 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10880 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10881 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10882 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10883 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10885 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10886 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10888 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10889 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10890 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10891 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10892 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10893 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10895 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10896 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10897 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10898 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10900 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10901 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10902 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10903 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10904 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10905 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10908 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10909 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10910 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10912 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10913 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10915 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10916 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10917 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10918 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10919 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10920 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10922 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10923 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10924 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10925 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10926 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10927 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10929 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10930 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10931 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10932 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10934 { MASK_SSE, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10935 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10939 static const struct builtin_description bdesc_1arg[] =
10941 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10942 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10944 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10945 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10946 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10948 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10949 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10950 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10951 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10956 ix86_init_builtins ()
10959 ix86_init_mmx_sse_builtins ();
10962 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10963 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10966 ix86_init_mmx_sse_builtins ()
10968 const struct builtin_description * d;
10970 tree endlink = void_list_node;
10972 tree pchar_type_node = build_pointer_type (char_type_node);
10973 tree pfloat_type_node = build_pointer_type (float_type_node);
10974 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10975 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10978 tree int_ftype_v4sf_v4sf
10979 = build_function_type (integer_type_node,
10980 tree_cons (NULL_TREE, V4SF_type_node,
10981 tree_cons (NULL_TREE,
10984 tree v4si_ftype_v4sf_v4sf
10985 = build_function_type (V4SI_type_node,
10986 tree_cons (NULL_TREE, V4SF_type_node,
10987 tree_cons (NULL_TREE,
10990 /* MMX/SSE/integer conversions. */
10991 tree int_ftype_v4sf
10992 = build_function_type (integer_type_node,
10993 tree_cons (NULL_TREE, V4SF_type_node,
10995 tree int_ftype_v8qi
10996 = build_function_type (integer_type_node,
10997 tree_cons (NULL_TREE, V8QI_type_node,
10999 tree int_ftype_v2si
11000 = build_function_type (integer_type_node,
11001 tree_cons (NULL_TREE, V2SI_type_node,
11003 tree v2si_ftype_int
11004 = build_function_type (V2SI_type_node,
11005 tree_cons (NULL_TREE, integer_type_node,
11007 tree v4sf_ftype_v4sf_int
11008 = build_function_type (V4SF_type_node,
11009 tree_cons (NULL_TREE, V4SF_type_node,
11010 tree_cons (NULL_TREE, integer_type_node,
11012 tree v4sf_ftype_v4sf_v2si
11013 = build_function_type (V4SF_type_node,
11014 tree_cons (NULL_TREE, V4SF_type_node,
11015 tree_cons (NULL_TREE, V2SI_type_node,
11017 tree int_ftype_v4hi_int
11018 = build_function_type (integer_type_node,
11019 tree_cons (NULL_TREE, V4HI_type_node,
11020 tree_cons (NULL_TREE, integer_type_node,
11022 tree v4hi_ftype_v4hi_int_int
11023 = build_function_type (V4HI_type_node,
11024 tree_cons (NULL_TREE, V4HI_type_node,
11025 tree_cons (NULL_TREE, integer_type_node,
11026 tree_cons (NULL_TREE,
11029 /* Miscellaneous. */
11030 tree v8qi_ftype_v4hi_v4hi
11031 = build_function_type (V8QI_type_node,
11032 tree_cons (NULL_TREE, V4HI_type_node,
11033 tree_cons (NULL_TREE, V4HI_type_node,
11035 tree v4hi_ftype_v2si_v2si
11036 = build_function_type (V4HI_type_node,
11037 tree_cons (NULL_TREE, V2SI_type_node,
11038 tree_cons (NULL_TREE, V2SI_type_node,
11040 tree v4sf_ftype_v4sf_v4sf_int
11041 = build_function_type (V4SF_type_node,
11042 tree_cons (NULL_TREE, V4SF_type_node,
11043 tree_cons (NULL_TREE, V4SF_type_node,
11044 tree_cons (NULL_TREE,
11047 tree v4hi_ftype_v8qi_v8qi
11048 = build_function_type (V4HI_type_node,
11049 tree_cons (NULL_TREE, V8QI_type_node,
11050 tree_cons (NULL_TREE, V8QI_type_node,
11052 tree v2si_ftype_v4hi_v4hi
11053 = build_function_type (V2SI_type_node,
11054 tree_cons (NULL_TREE, V4HI_type_node,
11055 tree_cons (NULL_TREE, V4HI_type_node,
11057 tree v4hi_ftype_v4hi_int
11058 = build_function_type (V4HI_type_node,
11059 tree_cons (NULL_TREE, V4HI_type_node,
11060 tree_cons (NULL_TREE, integer_type_node,
11062 tree v4hi_ftype_v4hi_di
11063 = build_function_type (V4HI_type_node,
11064 tree_cons (NULL_TREE, V4HI_type_node,
11065 tree_cons (NULL_TREE,
11066 long_long_integer_type_node,
11068 tree v2si_ftype_v2si_di
11069 = build_function_type (V2SI_type_node,
11070 tree_cons (NULL_TREE, V2SI_type_node,
11071 tree_cons (NULL_TREE,
11072 long_long_integer_type_node,
11074 tree void_ftype_void
11075 = build_function_type (void_type_node, endlink);
11076 tree void_ftype_pchar_int
11077 = build_function_type (void_type_node,
11078 tree_cons (NULL_TREE, pchar_type_node,
11079 tree_cons (NULL_TREE, integer_type_node,
11081 tree void_ftype_unsigned
11082 = build_function_type (void_type_node,
11083 tree_cons (NULL_TREE, unsigned_type_node,
11085 tree unsigned_ftype_void
11086 = build_function_type (unsigned_type_node, endlink);
11088 = build_function_type (long_long_unsigned_type_node, endlink);
11090 = build_function_type (intTI_type_node, endlink);
11091 tree v2si_ftype_v4sf
11092 = build_function_type (V2SI_type_node,
11093 tree_cons (NULL_TREE, V4SF_type_node,
11095 /* Loads/stores. */
11096 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11097 tree_cons (NULL_TREE, V8QI_type_node,
11098 tree_cons (NULL_TREE,
11101 tree void_ftype_v8qi_v8qi_pchar
11102 = build_function_type (void_type_node, maskmovq_args);
11103 tree v4sf_ftype_pfloat
11104 = build_function_type (V4SF_type_node,
11105 tree_cons (NULL_TREE, pfloat_type_node,
11107 tree v4sf_ftype_float
11108 = build_function_type (V4SF_type_node,
11109 tree_cons (NULL_TREE, float_type_node,
11111 tree v4sf_ftype_float_float_float_float
11112 = build_function_type (V4SF_type_node,
11113 tree_cons (NULL_TREE, float_type_node,
11114 tree_cons (NULL_TREE, float_type_node,
11115 tree_cons (NULL_TREE,
11117 tree_cons (NULL_TREE,
11120 /* @@@ the type is bogus */
11121 tree v4sf_ftype_v4sf_pv2si
11122 = build_function_type (V4SF_type_node,
11123 tree_cons (NULL_TREE, V4SF_type_node,
11124 tree_cons (NULL_TREE, pv2si_type_node,
11126 tree void_ftype_pv2si_v4sf
11127 = build_function_type (void_type_node,
11128 tree_cons (NULL_TREE, pv2si_type_node,
11129 tree_cons (NULL_TREE, V4SF_type_node,
11131 tree void_ftype_pfloat_v4sf
11132 = build_function_type (void_type_node,
11133 tree_cons (NULL_TREE, pfloat_type_node,
11134 tree_cons (NULL_TREE, V4SF_type_node,
11136 tree void_ftype_pdi_di
11137 = build_function_type (void_type_node,
11138 tree_cons (NULL_TREE, pdi_type_node,
11139 tree_cons (NULL_TREE,
11140 long_long_unsigned_type_node,
11142 /* Normal vector unops. */
11143 tree v4sf_ftype_v4sf
11144 = build_function_type (V4SF_type_node,
11145 tree_cons (NULL_TREE, V4SF_type_node,
11148 /* Normal vector binops. */
11149 tree v4sf_ftype_v4sf_v4sf
11150 = build_function_type (V4SF_type_node,
11151 tree_cons (NULL_TREE, V4SF_type_node,
11152 tree_cons (NULL_TREE, V4SF_type_node,
11154 tree v8qi_ftype_v8qi_v8qi
11155 = build_function_type (V8QI_type_node,
11156 tree_cons (NULL_TREE, V8QI_type_node,
11157 tree_cons (NULL_TREE, V8QI_type_node,
11159 tree v4hi_ftype_v4hi_v4hi
11160 = build_function_type (V4HI_type_node,
11161 tree_cons (NULL_TREE, V4HI_type_node,
11162 tree_cons (NULL_TREE, V4HI_type_node,
11164 tree v2si_ftype_v2si_v2si
11165 = build_function_type (V2SI_type_node,
11166 tree_cons (NULL_TREE, V2SI_type_node,
11167 tree_cons (NULL_TREE, V2SI_type_node,
11169 tree ti_ftype_ti_ti
11170 = build_function_type (intTI_type_node,
11171 tree_cons (NULL_TREE, intTI_type_node,
11172 tree_cons (NULL_TREE, intTI_type_node,
11174 tree di_ftype_di_di
11175 = build_function_type (long_long_unsigned_type_node,
11176 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11177 tree_cons (NULL_TREE,
11178 long_long_unsigned_type_node,
11181 tree v2si_ftype_v2sf
11182 = build_function_type (V2SI_type_node,
11183 tree_cons (NULL_TREE, V2SF_type_node,
11185 tree v2sf_ftype_v2si
11186 = build_function_type (V2SF_type_node,
11187 tree_cons (NULL_TREE, V2SI_type_node,
11189 tree v2si_ftype_v2si
11190 = build_function_type (V2SI_type_node,
11191 tree_cons (NULL_TREE, V2SI_type_node,
11193 tree v2sf_ftype_v2sf
11194 = build_function_type (V2SF_type_node,
11195 tree_cons (NULL_TREE, V2SF_type_node,
11197 tree v2sf_ftype_v2sf_v2sf
11198 = build_function_type (V2SF_type_node,
11199 tree_cons (NULL_TREE, V2SF_type_node,
11200 tree_cons (NULL_TREE,
11203 tree v2si_ftype_v2sf_v2sf
11204 = build_function_type (V2SI_type_node,
11205 tree_cons (NULL_TREE, V2SF_type_node,
11206 tree_cons (NULL_TREE,
11210 tree void_ftype_pchar
11211 = build_function_type (void_type_node,
11212 tree_cons (NULL_TREE, pchar_type_node,
11215 /* Add all builtins that are more or less simple operations on two
11217 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11219 /* Use one of the operands; the target can have a different mode for
11220 mask-generating compares. */
11221 enum machine_mode mode;
11226 mode = insn_data[d->icode].operand[1].mode;
11231 type = v4sf_ftype_v4sf_v4sf;
11234 type = v8qi_ftype_v8qi_v8qi;
11237 type = v4hi_ftype_v4hi_v4hi;
11240 type = v2si_ftype_v2si_v2si;
11243 type = ti_ftype_ti_ti;
11246 type = di_ftype_di_di;
11253 /* Override for comparisons. */
11254 if (d->icode == CODE_FOR_maskcmpv4sf3
11255 || d->icode == CODE_FOR_maskncmpv4sf3
11256 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11257 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11258 type = v4si_ftype_v4sf_v4sf;
11260 def_builtin (d->mask, d->name, type, d->code);
11263 /* Add the remaining MMX insns with somewhat more complicated types. */
11264 def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
11265 def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
11266 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11267 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11268 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11269 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11270 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11271 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11272 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11274 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11275 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11276 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11278 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11279 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11281 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11282 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11284 /* comi/ucomi insns. */
11285 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11286 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11288 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11289 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11290 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11292 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11293 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11294 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11295 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11296 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11297 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11299 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11300 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11302 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11304 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11305 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11306 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11307 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11308 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11309 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11311 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11312 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11313 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11314 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11316 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11317 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11318 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11319 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11321 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11322 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
11324 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11326 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11327 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11328 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11329 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11330 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11331 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11333 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11335 /* Original 3DNow! */
11336 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11337 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11338 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11339 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11340 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11341 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11342 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11343 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11344 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11345 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11346 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11347 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11348 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11349 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11350 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11351 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11352 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11353 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11354 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11355 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11356 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
11357 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
11359 /* 3DNow! extension as used in the Athlon CPU. */
11360 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11361 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11362 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11363 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11364 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11365 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11367 /* Composite intrinsics. */
11368 def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
11369 def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
11370 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
11371 def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
11372 def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
11373 def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
11374 def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
11377 /* Errors in the source file can cause expand_expr to return const0_rtx
11378 where we expect a vector. To avoid crashing, use one of the vector
11379 clear instructions. */
11381 safe_vector_operand (x, mode)
11383 enum machine_mode mode;
11385 if (x != const0_rtx)
11387 x = gen_reg_rtx (mode);
11389 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11390 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11391 : gen_rtx_SUBREG (DImode, x, 0)));
11393 emit_insn (gen_sse_clrti (mode == TImode ? x
11394 : gen_rtx_SUBREG (TImode, x, 0)));
11398 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11401 ix86_expand_binop_builtin (icode, arglist, target)
11402 enum insn_code icode;
11407 tree arg0 = TREE_VALUE (arglist);
11408 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11409 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11410 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11411 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11412 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11413 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11415 if (VECTOR_MODE_P (mode0))
11416 op0 = safe_vector_operand (op0, mode0);
11417 if (VECTOR_MODE_P (mode1))
11418 op1 = safe_vector_operand (op1, mode1);
11421 || GET_MODE (target) != tmode
11422 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11423 target = gen_reg_rtx (tmode);
11425 /* In case the insn wants input operands in modes different from
11426 the result, abort. */
11427 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11430 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11431 op0 = copy_to_mode_reg (mode0, op0);
11432 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11433 op1 = copy_to_mode_reg (mode1, op1);
11435 pat = GEN_FCN (icode) (target, op0, op1);
11442 /* Subroutine of ix86_expand_builtin to take care of stores. */
11445 ix86_expand_store_builtin (icode, arglist, shuffle)
11446 enum insn_code icode;
11451 tree arg0 = TREE_VALUE (arglist);
11452 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11453 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11454 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11455 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11456 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11458 if (VECTOR_MODE_P (mode1))
11459 op1 = safe_vector_operand (op1, mode1);
11461 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11462 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11463 op1 = copy_to_mode_reg (mode1, op1);
11465 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
11466 pat = GEN_FCN (icode) (op0, op1);
11472 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11475 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11476 enum insn_code icode;
11482 tree arg0 = TREE_VALUE (arglist);
11483 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11484 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11485 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11488 || GET_MODE (target) != tmode
11489 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11490 target = gen_reg_rtx (tmode);
11492 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11495 if (VECTOR_MODE_P (mode0))
11496 op0 = safe_vector_operand (op0, mode0);
11498 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11499 op0 = copy_to_mode_reg (mode0, op0);
11502 pat = GEN_FCN (icode) (target, op0);
11509 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11510 sqrtss, rsqrtss, rcpss. */
11513 ix86_expand_unop1_builtin (icode, arglist, target)
11514 enum insn_code icode;
11519 tree arg0 = TREE_VALUE (arglist);
11520 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11521 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11522 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11525 || GET_MODE (target) != tmode
11526 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11527 target = gen_reg_rtx (tmode);
11529 if (VECTOR_MODE_P (mode0))
11530 op0 = safe_vector_operand (op0, mode0);
11532 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11533 op0 = copy_to_mode_reg (mode0, op0);
11535 pat = GEN_FCN (icode) (target, op0, op0);
11542 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11545 ix86_expand_sse_compare (d, arglist, target)
11546 const struct builtin_description *d;
11551 tree arg0 = TREE_VALUE (arglist);
11552 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11553 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11554 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11556 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11557 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11558 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11559 enum rtx_code comparison = d->comparison;
11561 if (VECTOR_MODE_P (mode0))
11562 op0 = safe_vector_operand (op0, mode0);
11563 if (VECTOR_MODE_P (mode1))
11564 op1 = safe_vector_operand (op1, mode1);
11566 /* Swap operands if we have a comparison that isn't available in
11570 rtx tmp = gen_reg_rtx (mode1);
11571 emit_move_insn (tmp, op1);
11577 || GET_MODE (target) != tmode
11578 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11579 target = gen_reg_rtx (tmode);
11581 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11582 op0 = copy_to_mode_reg (mode0, op0);
11583 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11584 op1 = copy_to_mode_reg (mode1, op1);
11586 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11587 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11594 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11597 ix86_expand_sse_comi (d, arglist, target)
11598 const struct builtin_description *d;
11603 tree arg0 = TREE_VALUE (arglist);
11604 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11605 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11606 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11608 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11609 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11610 enum rtx_code comparison = d->comparison;
11612 if (VECTOR_MODE_P (mode0))
11613 op0 = safe_vector_operand (op0, mode0);
11614 if (VECTOR_MODE_P (mode1))
11615 op1 = safe_vector_operand (op1, mode1);
11617 /* Swap operands if we have a comparison that isn't available in
11626 target = gen_reg_rtx (SImode);
11627 emit_move_insn (target, const0_rtx);
11628 target = gen_rtx_SUBREG (QImode, target, 0);
11630 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11631 op0 = copy_to_mode_reg (mode0, op0);
11632 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11633 op1 = copy_to_mode_reg (mode1, op1);
11635 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11636 pat = GEN_FCN (d->icode) (op0, op1, op2);
11640 emit_insn (gen_setcc_2 (target, op2));
11645 /* Expand an expression EXP that calls a built-in function,
11646 with result going to TARGET if that's convenient
11647 (and in mode MODE if that's convenient).
11648 SUBTARGET may be used as the target for computing one of EXP's operands.
11649 IGNORE is nonzero if the value is to be ignored. */
11652 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11655 rtx subtarget ATTRIBUTE_UNUSED;
11656 enum machine_mode mode ATTRIBUTE_UNUSED;
11657 int ignore ATTRIBUTE_UNUSED;
11659 const struct builtin_description *d;
11661 enum insn_code icode;
11662 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11663 tree arglist = TREE_OPERAND (exp, 1);
11664 tree arg0, arg1, arg2, arg3;
11665 rtx op0, op1, op2, pat;
11666 enum machine_mode tmode, mode0, mode1, mode2;
11667 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11671 case IX86_BUILTIN_EMMS:
11672 emit_insn (gen_emms ());
11675 case IX86_BUILTIN_SFENCE:
11676 emit_insn (gen_sfence ());
11679 case IX86_BUILTIN_M_FROM_INT:
11680 target = gen_reg_rtx (DImode);
11681 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11682 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
11685 case IX86_BUILTIN_M_TO_INT:
11686 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11687 op0 = copy_to_mode_reg (DImode, op0);
11688 target = gen_reg_rtx (SImode);
11689 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
11692 case IX86_BUILTIN_PEXTRW:
11693 icode = CODE_FOR_mmx_pextrw;
11694 arg0 = TREE_VALUE (arglist);
11695 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11696 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11697 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11698 tmode = insn_data[icode].operand[0].mode;
11699 mode0 = insn_data[icode].operand[1].mode;
11700 mode1 = insn_data[icode].operand[2].mode;
11702 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11703 op0 = copy_to_mode_reg (mode0, op0);
11704 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11706 /* @@@ better error message */
11707 error ("selector must be an immediate");
11711 || GET_MODE (target) != tmode
11712 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11713 target = gen_reg_rtx (tmode);
11714 pat = GEN_FCN (icode) (target, op0, op1);
11720 case IX86_BUILTIN_PINSRW:
11721 icode = CODE_FOR_mmx_pinsrw;
11722 arg0 = TREE_VALUE (arglist);
11723 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11724 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11725 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11726 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11727 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11728 tmode = insn_data[icode].operand[0].mode;
11729 mode0 = insn_data[icode].operand[1].mode;
11730 mode1 = insn_data[icode].operand[2].mode;
11731 mode2 = insn_data[icode].operand[3].mode;
11733 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11734 op0 = copy_to_mode_reg (mode0, op0);
11735 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11736 op1 = copy_to_mode_reg (mode1, op1);
11737 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11739 /* @@@ better error message */
11740 error ("selector must be an immediate");
11744 || GET_MODE (target) != tmode
11745 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11746 target = gen_reg_rtx (tmode);
11747 pat = GEN_FCN (icode) (target, op0, op1, op2);
11753 case IX86_BUILTIN_MASKMOVQ:
11754 icode = CODE_FOR_mmx_maskmovq;
11755 /* Note the arg order is different from the operand order. */
11756 arg1 = TREE_VALUE (arglist);
11757 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11758 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11759 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11760 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11761 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11762 mode0 = insn_data[icode].operand[0].mode;
11763 mode1 = insn_data[icode].operand[1].mode;
11764 mode2 = insn_data[icode].operand[2].mode;
11766 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11767 op0 = copy_to_mode_reg (mode0, op0);
11768 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11769 op1 = copy_to_mode_reg (mode1, op1);
11770 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11771 op2 = copy_to_mode_reg (mode2, op2);
11772 pat = GEN_FCN (icode) (op0, op1, op2);
11778 case IX86_BUILTIN_SQRTSS:
11779 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11780 case IX86_BUILTIN_RSQRTSS:
11781 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11782 case IX86_BUILTIN_RCPSS:
11783 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11785 case IX86_BUILTIN_LOADAPS:
11786 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11788 case IX86_BUILTIN_LOADUPS:
11789 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11791 case IX86_BUILTIN_STOREAPS:
11792 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
11793 case IX86_BUILTIN_STOREUPS:
11794 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
11796 case IX86_BUILTIN_LOADSS:
11797 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11799 case IX86_BUILTIN_STORESS:
11800 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
11802 case IX86_BUILTIN_LOADHPS:
11803 case IX86_BUILTIN_LOADLPS:
11804 icode = (fcode == IX86_BUILTIN_LOADHPS
11805 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11806 arg0 = TREE_VALUE (arglist);
11807 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11808 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11809 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11810 tmode = insn_data[icode].operand[0].mode;
11811 mode0 = insn_data[icode].operand[1].mode;
11812 mode1 = insn_data[icode].operand[2].mode;
11814 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11815 op0 = copy_to_mode_reg (mode0, op0);
11816 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11818 || GET_MODE (target) != tmode
11819 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11820 target = gen_reg_rtx (tmode);
11821 pat = GEN_FCN (icode) (target, op0, op1);
11827 case IX86_BUILTIN_STOREHPS:
11828 case IX86_BUILTIN_STORELPS:
11829 icode = (fcode == IX86_BUILTIN_STOREHPS
11830 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11831 arg0 = TREE_VALUE (arglist);
11832 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11833 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11834 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11835 mode0 = insn_data[icode].operand[1].mode;
11836 mode1 = insn_data[icode].operand[2].mode;
11838 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11839 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11840 op1 = copy_to_mode_reg (mode1, op1);
11842 pat = GEN_FCN (icode) (op0, op0, op1);
11848 case IX86_BUILTIN_MOVNTPS:
11849 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
11850 case IX86_BUILTIN_MOVNTQ:
11851 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
11853 case IX86_BUILTIN_LDMXCSR:
11854 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11855 target = assign_386_stack_local (SImode, 0);
11856 emit_move_insn (target, op0);
11857 emit_insn (gen_ldmxcsr (target));
11860 case IX86_BUILTIN_STMXCSR:
11861 target = assign_386_stack_local (SImode, 0);
11862 emit_insn (gen_stmxcsr (target));
11863 return copy_to_mode_reg (SImode, target);
11865 case IX86_BUILTIN_PREFETCH:
11866 icode = CODE_FOR_prefetch;
11867 arg0 = TREE_VALUE (arglist);
11868 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11869 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11870 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11871 mode0 = insn_data[icode].operand[0].mode;
11872 mode1 = insn_data[icode].operand[1].mode;
11874 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11876 /* @@@ better error message */
11877 error ("selector must be an immediate");
11881 op0 = copy_to_mode_reg (Pmode, op0);
11882 pat = GEN_FCN (icode) (op0, op1);
11888 case IX86_BUILTIN_SHUFPS:
11889 icode = CODE_FOR_sse_shufps;
11890 arg0 = TREE_VALUE (arglist);
11891 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11892 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11893 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11894 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11895 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11896 tmode = insn_data[icode].operand[0].mode;
11897 mode0 = insn_data[icode].operand[1].mode;
11898 mode1 = insn_data[icode].operand[2].mode;
11899 mode2 = insn_data[icode].operand[3].mode;
11901 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11902 op0 = copy_to_mode_reg (mode0, op0);
11903 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11904 op1 = copy_to_mode_reg (mode1, op1);
11905 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11907 /* @@@ better error message */
11908 error ("mask must be an immediate");
11912 || GET_MODE (target) != tmode
11913 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11914 target = gen_reg_rtx (tmode);
11915 pat = GEN_FCN (icode) (target, op0, op1, op2);
11921 case IX86_BUILTIN_PSHUFW:
11922 icode = CODE_FOR_mmx_pshufw;
11923 arg0 = TREE_VALUE (arglist);
11924 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11925 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11926 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11927 tmode = insn_data[icode].operand[0].mode;
11928 mode0 = insn_data[icode].operand[2].mode;
11929 mode1 = insn_data[icode].operand[3].mode;
11931 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11932 op0 = copy_to_mode_reg (mode0, op0);
11933 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
11935 /* @@@ better error message */
11936 error ("mask must be an immediate");
11940 || GET_MODE (target) != tmode
11941 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11942 target = gen_reg_rtx (tmode);
11943 pat = GEN_FCN (icode) (target, target, op0, op1);
11949 case IX86_BUILTIN_FEMMS:
11950 emit_insn (gen_femms ());
11953 case IX86_BUILTIN_PAVGUSB:
11954 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11956 case IX86_BUILTIN_PF2ID:
11957 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11959 case IX86_BUILTIN_PFACC:
11960 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11962 case IX86_BUILTIN_PFADD:
11963 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11965 case IX86_BUILTIN_PFCMPEQ:
11966 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11968 case IX86_BUILTIN_PFCMPGE:
11969 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11971 case IX86_BUILTIN_PFCMPGT:
11972 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11974 case IX86_BUILTIN_PFMAX:
11975 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11977 case IX86_BUILTIN_PFMIN:
11978 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11980 case IX86_BUILTIN_PFMUL:
11981 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11983 case IX86_BUILTIN_PFRCP:
11984 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11986 case IX86_BUILTIN_PFRCPIT1:
11987 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11989 case IX86_BUILTIN_PFRCPIT2:
11990 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11992 case IX86_BUILTIN_PFRSQIT1:
11993 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11995 case IX86_BUILTIN_PFRSQRT:
11996 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11998 case IX86_BUILTIN_PFSUB:
11999 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12001 case IX86_BUILTIN_PFSUBR:
12002 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12004 case IX86_BUILTIN_PI2FD:
12005 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12007 case IX86_BUILTIN_PMULHRW:
12008 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12010 case IX86_BUILTIN_PREFETCH_3DNOW:
12011 icode = CODE_FOR_prefetch_3dnow;
12012 arg0 = TREE_VALUE (arglist);
12013 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12014 mode0 = insn_data[icode].operand[0].mode;
12015 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
12021 case IX86_BUILTIN_PREFETCHW:
12022 icode = CODE_FOR_prefetchw;
12023 arg0 = TREE_VALUE (arglist);
12024 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12025 mode0 = insn_data[icode].operand[0].mode;
12026 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
12032 case IX86_BUILTIN_PF2IW:
12033 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12035 case IX86_BUILTIN_PFNACC:
12036 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12038 case IX86_BUILTIN_PFPNACC:
12039 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12041 case IX86_BUILTIN_PI2FW:
12042 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12044 case IX86_BUILTIN_PSWAPDSI:
12045 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12047 case IX86_BUILTIN_PSWAPDSF:
12048 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12050 /* Composite intrinsics. */
12051 case IX86_BUILTIN_SETPS1:
12052 target = assign_386_stack_local (SFmode, 0);
12053 arg0 = TREE_VALUE (arglist);
12054 emit_move_insn (adjust_address (target, SFmode, 0),
12055 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12056 op0 = gen_reg_rtx (V4SFmode);
12057 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
12058 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
12061 case IX86_BUILTIN_SETPS:
12062 target = assign_386_stack_local (V4SFmode, 0);
12063 arg0 = TREE_VALUE (arglist);
12064 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12065 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12066 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
12067 emit_move_insn (adjust_address (target, SFmode, 0),
12068 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12069 emit_move_insn (adjust_address (target, SFmode, 4),
12070 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
12071 emit_move_insn (adjust_address (target, SFmode, 8),
12072 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
12073 emit_move_insn (adjust_address (target, SFmode, 12),
12074 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
12075 op0 = gen_reg_rtx (V4SFmode);
12076 emit_insn (gen_sse_movaps (op0, target));
12079 case IX86_BUILTIN_CLRPS:
12080 target = gen_reg_rtx (TImode);
12081 emit_insn (gen_sse_clrti (target));
12084 case IX86_BUILTIN_LOADRPS:
12085 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
12086 gen_reg_rtx (V4SFmode), 1);
12087 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
12090 case IX86_BUILTIN_LOADPS1:
12091 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
12092 gen_reg_rtx (V4SFmode), 1);
12093 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
12096 case IX86_BUILTIN_STOREPS1:
12097 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
12098 case IX86_BUILTIN_STORERPS:
12099 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
12101 case IX86_BUILTIN_MMX_ZERO:
12102 target = gen_reg_rtx (DImode);
12103 emit_insn (gen_mmx_clrdi (target));
12110 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
12111 if (d->code == fcode)
12113 /* Compares are treated specially. */
12114 if (d->icode == CODE_FOR_maskcmpv4sf3
12115 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12116 || d->icode == CODE_FOR_maskncmpv4sf3
12117 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12118 return ix86_expand_sse_compare (d, arglist, target);
12120 return ix86_expand_binop_builtin (d->icode, arglist, target);
12123 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12124 if (d->code == fcode)
12125 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12127 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12128 if (d->code == fcode)
12129 return ix86_expand_sse_comi (d, arglist, target);
12131 /* @@@ Should really do something sensible here. */
12135 /* Store OPERAND to the memory after reload is completed. This means
12136 that we can't easily use assign_stack_local. */
12138 ix86_force_to_memory (mode, operand)
12139 enum machine_mode mode;
12143 if (!reload_completed)
12145 if (TARGET_64BIT && TARGET_RED_ZONE)
12147 result = gen_rtx_MEM (mode,
12148 gen_rtx_PLUS (Pmode,
12150 GEN_INT (-RED_ZONE_SIZE)));
12151 emit_move_insn (result, operand);
12153 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12159 operand = gen_lowpart (DImode, operand);
12163 gen_rtx_SET (VOIDmode,
12164 gen_rtx_MEM (DImode,
12165 gen_rtx_PRE_DEC (DImode,
12166 stack_pointer_rtx)),
12172 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12181 split_di (&operand, 1, operands, operands + 1);
12183 gen_rtx_SET (VOIDmode,
12184 gen_rtx_MEM (SImode,
12185 gen_rtx_PRE_DEC (Pmode,
12186 stack_pointer_rtx)),
12189 gen_rtx_SET (VOIDmode,
12190 gen_rtx_MEM (SImode,
12191 gen_rtx_PRE_DEC (Pmode,
12192 stack_pointer_rtx)),
12197 /* It is better to store HImodes as SImodes. */
12198 if (!TARGET_PARTIAL_REG_STALL)
12199 operand = gen_lowpart (SImode, operand);
12203 gen_rtx_SET (VOIDmode,
12204 gen_rtx_MEM (GET_MODE (operand),
12205 gen_rtx_PRE_DEC (SImode,
12206 stack_pointer_rtx)),
12212 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12217 /* Free operand from the memory. */
12219 ix86_free_from_memory (mode)
12220 enum machine_mode mode;
12222 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12226 if (mode == DImode || TARGET_64BIT)
12228 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12232 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12233 to pop or add instruction if registers are available. */
12234 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12235 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12240 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12241 QImode must go into class Q_REGS.
12242 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12243 movdf to do mem-to-mem moves through integer regs. */
12245 ix86_preferred_reload_class (x, class)
12247 enum reg_class class;
12249 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12251 /* SSE can't load any constant directly yet. */
12252 if (SSE_CLASS_P (class))
12254 /* Floats can load 0 and 1. */
12255 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12257 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12258 if (MAYBE_SSE_CLASS_P (class))
12259 return (reg_class_subset_p (class, GENERAL_REGS)
12260 ? GENERAL_REGS : FLOAT_REGS);
12264 /* General regs can load everything. */
12265 if (reg_class_subset_p (class, GENERAL_REGS))
12266 return GENERAL_REGS;
12267 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12268 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12271 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12273 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12278 /* If we are copying between general and FP registers, we need a memory
12279 location. The same is true for SSE and MMX registers.
12281 The macro can't work reliably when one of the CLASSES is class containing
12282 registers from multiple units (SSE, MMX, integer). We avoid this by never
12283 combining those units in single alternative in the machine description.
12284 Ensure that this constraint holds to avoid unexpected surprises.
12286 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12287 enforce these sanity checks. */
12289 ix86_secondary_memory_needed (class1, class2, mode, strict)
12290 enum reg_class class1, class2;
12291 enum machine_mode mode;
12294 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12295 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12296 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12297 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12298 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12299 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12306 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12307 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12308 && (mode) != SImode)
12309 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12310 && (mode) != SImode));
12312 /* Return the cost of moving data from a register in class CLASS1 to
12313 one in class CLASS2.
12315 It is not required that the cost always equal 2 when FROM is the same as TO;
12316 on some machines it is expensive to move between registers if they are not
12317 general registers. */
12319 ix86_register_move_cost (mode, class1, class2)
12320 enum machine_mode mode;
12321 enum reg_class class1, class2;
12323 /* In case we require secondary memory, compute cost of the store followed
12324 by load. In case of copying from general_purpose_register we may emit
12325 multiple stores followed by single load causing memory size mismatch
12326 stall. Count this as arbitarily high cost of 20. */
12327 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12330 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12332 return (MEMORY_MOVE_COST (mode, class1, 0)
12333 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12335 /* Moves between SSE/MMX and integer unit are expensive. */
12336 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12337 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12338 return ix86_cost->mmxsse_to_integer;
12339 if (MAYBE_FLOAT_CLASS_P (class1))
12340 return ix86_cost->fp_move;
12341 if (MAYBE_SSE_CLASS_P (class1))
12342 return ix86_cost->sse_move;
12343 if (MAYBE_MMX_CLASS_P (class1))
12344 return ix86_cost->mmx_move;
12348 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12350 ix86_hard_regno_mode_ok (regno, mode)
12352 enum machine_mode mode;
12354 /* Flags and only flags can only hold CCmode values. */
12355 if (CC_REGNO_P (regno))
12356 return GET_MODE_CLASS (mode) == MODE_CC;
12357 if (GET_MODE_CLASS (mode) == MODE_CC
12358 || GET_MODE_CLASS (mode) == MODE_RANDOM
12359 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12361 if (FP_REGNO_P (regno))
12362 return VALID_FP_MODE_P (mode);
12363 if (SSE_REGNO_P (regno))
12364 return VALID_SSE_REG_MODE (mode);
12365 if (MMX_REGNO_P (regno))
12366 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12367 /* We handle both integer and floats in the general purpose registers.
12368 In future we should be able to handle vector modes as well. */
12369 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12371 /* Take care for QImode values - they can be in non-QI regs, but then
12372 they do cause partial register stalls. */
12373 if (regno < 4 || mode != QImode || TARGET_64BIT)
12375 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12378 /* Return the cost of moving data of mode M between a
12379 register and memory. A value of 2 is the default; this cost is
12380 relative to those in `REGISTER_MOVE_COST'.
12382 If moving between registers and memory is more expensive than
12383 between two registers, you should define this macro to express the
12386 Model also increased moving costs of QImode registers in non
12390 ix86_memory_move_cost (mode, class, in)
12391 enum machine_mode mode;
12392 enum reg_class class;
12395 if (FLOAT_CLASS_P (class))
12413 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12415 if (SSE_CLASS_P (class))
12418 switch (GET_MODE_SIZE (mode))
12432 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12434 if (MMX_CLASS_P (class))
12437 switch (GET_MODE_SIZE (mode))
12448 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12450 switch (GET_MODE_SIZE (mode))
12454 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12455 : ix86_cost->movzbl_load);
12457 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12458 : ix86_cost->int_store[0] + 4);
12461 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12463 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12464 if (mode == TFmode)
12466 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12467 * (int) GET_MODE_SIZE (mode) / 4);
12471 #ifdef DO_GLOBAL_CTORS_BODY
12473 ix86_svr3_asm_out_constructor (symbol, priority)
12475 int priority ATTRIBUTE_UNUSED;
12478 fputs ("\tpushl $", asm_out_file);
12479 assemble_name (asm_out_file, XSTR (symbol, 0));
12480 fputc ('\n', asm_out_file);