1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
50 /* Processor costs (relative to an add) */
52 struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 3, /* cost of movsx */
61 3, /* cost of movzx */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
84 0, /* size of prefetch block */
85 0, /* number of parallel prefetches */
87 /* Processor costs (relative to an add) */
89 struct processor_costs i386_cost = { /* 386 specific costs */
90 1, /* cost of an add instruction */
91 1, /* cost of a lea instruction */
92 3, /* variable shift costs */
93 2, /* constant shift costs */
94 6, /* cost of starting a multiply */
95 1, /* cost of multiply per each bit set */
96 23, /* cost of a divide/mod */
97 3, /* cost of movsx */
98 2, /* cost of movzx */
99 15, /* "large" insn */
101 4, /* cost for loading QImode using movzbl */
102 {2, 4, 2}, /* cost of loading integer registers
103 in QImode, HImode and SImode.
104 Relative to reg-reg move (2). */
105 {2, 4, 2}, /* cost of storing integer registers */
106 2, /* cost of reg,reg fld/fst */
107 {8, 8, 8}, /* cost of loading fp registers
108 in SFmode, DFmode and XFmode */
109 {8, 8, 8}, /* cost of loading integer registers */
110 2, /* cost of moving MMX register */
111 {4, 8}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {4, 8}, /* cost of storing MMX registers
114 in SImode and DImode */
115 2, /* cost of moving SSE register */
116 {4, 8, 16}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {4, 8, 16}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
126 struct processor_costs i486_cost = { /* 486 specific costs */
127 1, /* cost of an add instruction */
128 1, /* cost of a lea instruction */
129 3, /* variable shift costs */
130 2, /* constant shift costs */
131 12, /* cost of starting a multiply */
132 1, /* cost of multiply per each bit set */
133 40, /* cost of a divide/mod */
134 3, /* cost of movsx */
135 2, /* cost of movzx */
136 15, /* "large" insn */
138 4, /* cost for loading QImode using movzbl */
139 {2, 4, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 4, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {8, 8, 8}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {8, 8, 8}, /* cost of loading integer registers */
147 2, /* cost of moving MMX register */
148 {4, 8}, /* cost of loading MMX registers
149 in SImode and DImode */
150 {4, 8}, /* cost of storing MMX registers
151 in SImode and DImode */
152 2, /* cost of moving SSE register */
153 {4, 8, 16}, /* cost of loading SSE registers
154 in SImode, DImode and TImode */
155 {4, 8, 16}, /* cost of storing SSE registers
156 in SImode, DImode and TImode */
157 3, /* MMX or SSE register to integer */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
163 struct processor_costs pentium_cost = {
164 1, /* cost of an add instruction */
165 1, /* cost of a lea instruction */
166 4, /* variable shift costs */
167 1, /* constant shift costs */
168 11, /* cost of starting a multiply */
169 0, /* cost of multiply per each bit set */
170 25, /* cost of a divide/mod */
171 3, /* cost of movsx */
172 2, /* cost of movzx */
173 8, /* "large" insn */
175 6, /* cost for loading QImode using movzbl */
176 {2, 4, 2}, /* cost of loading integer registers
177 in QImode, HImode and SImode.
178 Relative to reg-reg move (2). */
179 {2, 4, 2}, /* cost of storing integer registers */
180 2, /* cost of reg,reg fld/fst */
181 {2, 2, 6}, /* cost of loading fp registers
182 in SFmode, DFmode and XFmode */
183 {4, 4, 6}, /* cost of loading integer registers */
184 8, /* cost of moving MMX register */
185 {8, 8}, /* cost of loading MMX registers
186 in SImode and DImode */
187 {8, 8}, /* cost of storing MMX registers
188 in SImode and DImode */
189 2, /* cost of moving SSE register */
190 {4, 8, 16}, /* cost of loading SSE registers
191 in SImode, DImode and TImode */
192 {4, 8, 16}, /* cost of storing SSE registers
193 in SImode, DImode and TImode */
194 3, /* MMX or SSE register to integer */
195 0, /* size of prefetch block */
196 0, /* number of parallel prefetches */
200 struct processor_costs pentiumpro_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
203 1, /* variable shift costs */
204 1, /* constant shift costs */
205 4, /* cost of starting a multiply */
206 0, /* cost of multiply per each bit set */
207 17, /* cost of a divide/mod */
208 1, /* cost of movsx */
209 1, /* cost of movzx */
210 8, /* "large" insn */
212 2, /* cost for loading QImode using movzbl */
213 {4, 4, 4}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 2, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {4, 4, 6}, /* cost of loading integer registers */
221 2, /* cost of moving MMX register */
222 {2, 2}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {2, 2}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {2, 2, 8}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {2, 2, 8}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 32, /* size of prefetch block */
233 6, /* number of parallel prefetches */
237 struct processor_costs k6_cost = {
238 1, /* cost of an add instruction */
239 2, /* cost of a lea instruction */
240 1, /* variable shift costs */
241 1, /* constant shift costs */
242 3, /* cost of starting a multiply */
243 0, /* cost of multiply per each bit set */
244 18, /* cost of a divide/mod */
245 2, /* cost of movsx */
246 2, /* cost of movzx */
247 8, /* "large" insn */
249 3, /* cost for loading QImode using movzbl */
250 {4, 5, 4}, /* cost of loading integer registers
251 in QImode, HImode and SImode.
252 Relative to reg-reg move (2). */
253 {2, 3, 2}, /* cost of storing integer registers */
254 4, /* cost of reg,reg fld/fst */
255 {6, 6, 6}, /* cost of loading fp registers
256 in SFmode, DFmode and XFmode */
257 {4, 4, 4}, /* cost of loading integer registers */
258 2, /* cost of moving MMX register */
259 {2, 2}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {2, 2}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {2, 2, 8}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {2, 2, 8}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 6, /* MMX or SSE register to integer */
269 32, /* size of prefetch block */
270 1, /* number of parallel prefetches */
274 struct processor_costs athlon_cost = {
275 1, /* cost of an add instruction */
276 2, /* cost of a lea instruction */
277 1, /* variable shift costs */
278 1, /* constant shift costs */
279 5, /* cost of starting a multiply */
280 0, /* cost of multiply per each bit set */
281 42, /* cost of a divide/mod */
282 1, /* cost of movsx */
283 1, /* cost of movzx */
284 8, /* "large" insn */
286 4, /* cost for loading QImode using movzbl */
287 {4, 5, 4}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 3, 2}, /* cost of storing integer registers */
291 4, /* cost of reg,reg fld/fst */
292 {6, 6, 20}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {4, 4, 16}, /* cost of loading integer registers */
295 2, /* cost of moving MMX register */
296 {2, 2}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {2, 2}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {2, 2, 8}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {2, 2, 8}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 6, /* MMX or SSE register to integer */
306 64, /* size of prefetch block */
307 6, /* number of parallel prefetches */
311 struct processor_costs pentium4_cost = {
312 1, /* cost of an add instruction */
313 1, /* cost of a lea instruction */
314 8, /* variable shift costs */
315 8, /* constant shift costs */
316 30, /* cost of starting a multiply */
317 0, /* cost of multiply per each bit set */
318 112, /* cost of a divide/mod */
319 1, /* cost of movsx */
320 1, /* cost of movzx */
321 16, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 5, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 3, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of loading integer registers */
332 2, /* cost of moving MMX register */
333 {2, 2}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {2, 2}, /* cost of storing MMX registers
336 in SImode and DImode */
337 12, /* cost of moving SSE register */
338 {12, 12, 12}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {2, 2, 8}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 10, /* MMX or SSE register to integer */
343 64, /* size of prefetch block */
344 6, /* number of parallel prefetches */
347 const struct processor_costs *ix86_cost = &pentium_cost;
349 /* Processor feature/optimization bitmasks. */
350 #define m_386 (1<<PROCESSOR_I386)
351 #define m_486 (1<<PROCESSOR_I486)
352 #define m_PENT (1<<PROCESSOR_PENTIUM)
353 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
354 #define m_K6 (1<<PROCESSOR_K6)
355 #define m_ATHLON (1<<PROCESSOR_ATHLON)
356 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
359 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
360 const int x86_zero_extend_with_and = m_486 | m_PENT;
361 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
362 const int x86_double_with_add = ~m_386;
363 const int x86_use_bit_test = m_386;
364 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
365 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
366 const int x86_3dnow_a = m_ATHLON;
367 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
368 const int x86_branch_hints = m_PENT4;
369 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
370 const int x86_partial_reg_stall = m_PPRO;
371 const int x86_use_loop = m_K6;
372 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
373 const int x86_use_mov0 = m_K6;
374 const int x86_use_cltd = ~(m_PENT | m_K6);
375 const int x86_read_modify_write = ~m_PENT;
376 const int x86_read_modify = ~(m_PENT | m_PPRO);
377 const int x86_split_long_moves = m_PPRO;
378 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
379 const int x86_single_stringop = m_386 | m_PENT4;
380 const int x86_qimode_math = ~(0);
381 const int x86_promote_qi_regs = 0;
382 const int x86_himode_math = ~(m_PPRO);
383 const int x86_promote_hi_regs = m_PPRO;
384 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
385 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
386 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
387 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
388 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
389 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
390 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
391 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
392 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_decompose_lea = m_PENT4;
396 /* In case the avreage insn count for single function invocation is
397 lower than this constant, emit fast (but longer) prologue and
399 #define FAST_PROLOGUE_INSN_COUNT 30
400 /* Set by prologue expander and used by epilogue expander to determine
402 static int use_fast_prologue_epilogue;
404 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
406 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
407 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
408 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
410 /* Array of the smallest class containing reg number REGNO, indexed by
411 REGNO. Used by REGNO_REG_CLASS in i386.h. */
413 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
416 AREG, DREG, CREG, BREG,
418 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
420 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
421 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
424 /* flags, fpsr, dirflag, frame */
425 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
426 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
428 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
430 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
431 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
436 /* The "default" register map used in 32bit mode. */
438 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
440 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
441 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
442 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
443 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
444 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
445 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
446 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
449 static int x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
450 1 /*RDX*/, 2 /*RCX*/,
451 FIRST_REX_INT_REG /*R8 */,
452 FIRST_REX_INT_REG + 1 /*R9 */};
453 static int x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
455 /* The "default" register map used in 64bit mode. */
456 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
458 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
459 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
460 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
461 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
462 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
463 8,9,10,11,12,13,14,15, /* extended integer registers */
464 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
467 /* Define the register numbers to be used in Dwarf debugging information.
468 The SVR4 reference port C compiler uses the following register numbers
469 in its Dwarf output code:
470 0 for %eax (gcc regno = 0)
471 1 for %ecx (gcc regno = 2)
472 2 for %edx (gcc regno = 1)
473 3 for %ebx (gcc regno = 3)
474 4 for %esp (gcc regno = 7)
475 5 for %ebp (gcc regno = 6)
476 6 for %esi (gcc regno = 4)
477 7 for %edi (gcc regno = 5)
478 The following three DWARF register numbers are never generated by
479 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
480 believes these numbers have these meanings.
481 8 for %eip (no gcc equivalent)
482 9 for %eflags (gcc regno = 17)
483 10 for %trapno (no gcc equivalent)
484 It is not at all clear how we should number the FP stack registers
485 for the x86 architecture. If the version of SDB on x86/svr4 were
486 a bit less brain dead with respect to floating-point then we would
487 have a precedent to follow with respect to DWARF register numbers
488 for x86 FP registers, but the SDB on x86/svr4 is so completely
489 broken with respect to FP registers that it is hardly worth thinking
490 of it as something to strive for compatibility with.
491 The version of x86/svr4 SDB I have at the moment does (partially)
492 seem to believe that DWARF register number 11 is associated with
493 the x86 register %st(0), but that's about all. Higher DWARF
494 register numbers don't seem to be associated with anything in
495 particular, and even for DWARF regno 11, SDB only seems to under-
496 stand that it should say that a variable lives in %st(0) (when
497 asked via an `=' command) if we said it was in DWARF regno 11,
498 but SDB still prints garbage when asked for the value of the
499 variable in question (via a `/' command).
500 (Also note that the labels SDB prints for various FP stack regs
501 when doing an `x' command are all wrong.)
502 Note that these problems generally don't affect the native SVR4
503 C compiler because it doesn't allow the use of -O with -g and
504 because when it is *not* optimizing, it allocates a memory
505 location for each floating-point variable, and the memory
506 location is what gets described in the DWARF AT_location
507 attribute for the variable in question.
508 Regardless of the severe mental illness of the x86/svr4 SDB, we
509 do something sensible here and we use the following DWARF
510 register numbers. Note that these are all stack-top-relative
512 11 for %st(0) (gcc regno = 8)
513 12 for %st(1) (gcc regno = 9)
514 13 for %st(2) (gcc regno = 10)
515 14 for %st(3) (gcc regno = 11)
516 15 for %st(4) (gcc regno = 12)
517 16 for %st(5) (gcc regno = 13)
518 17 for %st(6) (gcc regno = 14)
519 18 for %st(7) (gcc regno = 15)
521 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
523 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
524 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
525 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
526 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
527 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
528 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
529 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
532 /* Test and compare insns in i386.md store the information needed to
533 generate branch and scc insns here. */
535 struct rtx_def *ix86_compare_op0 = NULL_RTX;
536 struct rtx_def *ix86_compare_op1 = NULL_RTX;
538 #define MAX_386_STACK_LOCALS 3
539 /* Size of the register save area. */
540 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
542 /* Define the structure for the machine field in struct function. */
543 struct machine_function
545 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
546 int save_varrargs_registers;
547 int accesses_prev_frame;
550 #define ix86_stack_locals (cfun->machine->stack_locals)
551 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
553 /* Structure describing stack frame layout.
554 Stack grows downward:
560 saved frame pointer if frame_pointer_needed
561 <- HARD_FRAME_POINTER
567 > to_allocate <- FRAME_POINTER
579 int outgoing_arguments_size;
582 HOST_WIDE_INT to_allocate;
583 /* The offsets relative to ARG_POINTER. */
584 HOST_WIDE_INT frame_pointer_offset;
585 HOST_WIDE_INT hard_frame_pointer_offset;
586 HOST_WIDE_INT stack_pointer_offset;
589 /* Code model option as passed by user. */
590 const char *ix86_cmodel_string;
592 enum cmodel ix86_cmodel;
594 /* which cpu are we scheduling for */
595 enum processor_type ix86_cpu;
597 /* which unit we are generating floating point math for */
598 enum fpmath_unit ix86_fpmath;
600 /* which instruction set architecture to use. */
603 /* Strings to hold which cpu and instruction set architecture to use. */
604 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
605 const char *ix86_arch_string; /* for -march=<xxx> */
606 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
608 /* # of registers to use to pass arguments. */
609 const char *ix86_regparm_string;
611 /* true if sse prefetch instruction is not NOOP. */
612 int x86_prefetch_sse;
614 /* ix86_regparm_string as a number */
617 /* Alignment to use for loops and jumps: */
619 /* Power of two alignment for loops. */
620 const char *ix86_align_loops_string;
622 /* Power of two alignment for non-loop jumps. */
623 const char *ix86_align_jumps_string;
625 /* Power of two alignment for stack boundary in bytes. */
626 const char *ix86_preferred_stack_boundary_string;
628 /* Preferred alignment for stack boundary in bits. */
629 int ix86_preferred_stack_boundary;
631 /* Values 1-5: see jump.c */
632 int ix86_branch_cost;
633 const char *ix86_branch_cost_string;
635 /* Power of two alignment for functions. */
636 const char *ix86_align_funcs_string;
638 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
639 static char internal_label_prefix[16];
640 static int internal_label_prefix_len;
642 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
643 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
644 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
646 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
647 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
649 static rtx gen_push PARAMS ((rtx));
650 static int memory_address_length PARAMS ((rtx addr));
651 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
652 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
653 static int ix86_safe_length PARAMS ((rtx));
654 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
655 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
656 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
657 static void ix86_dump_ppro_packet PARAMS ((FILE *));
658 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
659 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
661 static void ix86_init_machine_status PARAMS ((struct function *));
662 static void ix86_mark_machine_status PARAMS ((struct function *));
663 static void ix86_free_machine_status PARAMS ((struct function *));
664 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
665 static int ix86_safe_length_prefix PARAMS ((rtx));
666 static int ix86_nsaved_regs PARAMS((void));
667 static void ix86_emit_save_regs PARAMS((void));
668 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
669 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
670 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
671 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
672 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
673 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
674 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
675 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
676 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
677 static int ix86_issue_rate PARAMS ((void));
678 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
679 static void ix86_sched_init PARAMS ((FILE *, int, int));
680 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
681 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
685 rtx base, index, disp;
689 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
691 struct builtin_description;
692 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
694 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
696 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
697 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
698 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
699 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
700 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
701 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
702 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
706 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
708 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
709 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
710 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
711 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
712 static int ix86_save_reg PARAMS ((int, int));
713 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
714 static int ix86_comp_type_attributes PARAMS ((tree, tree));
715 const struct attribute_spec ix86_attribute_table[];
716 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
717 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
719 #ifdef DO_GLOBAL_CTORS_BODY
720 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
723 /* Register class used for passing given 64bit part of the argument.
724 These represent classes as documented by the PS ABI, with the exception
725 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
726 use SF or DFmode move instead of DImode to avoid reformating penalties.
728 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
729 whenever possible (upper half does contain padding).
731 enum x86_64_reg_class
734 X86_64_INTEGER_CLASS,
735 X86_64_INTEGERSI_CLASS,
744 const char * const x86_64_reg_class_name[] =
745 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
747 #define MAX_CLASSES 4
748 static int classify_argument PARAMS ((enum machine_mode, tree,
749 enum x86_64_reg_class [MAX_CLASSES],
751 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
753 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
755 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
756 enum x86_64_reg_class));
758 /* Initialize the GCC target structure. */
759 #undef TARGET_ATTRIBUTE_TABLE
760 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
761 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
762 # undef TARGET_MERGE_DECL_ATTRIBUTES
763 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
766 #undef TARGET_COMP_TYPE_ATTRIBUTES
767 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
769 #undef TARGET_INIT_BUILTINS
770 #define TARGET_INIT_BUILTINS ix86_init_builtins
772 #undef TARGET_EXPAND_BUILTIN
773 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
775 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
776 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
778 # undef TARGET_ASM_FUNCTION_PROLOGUE
779 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
782 #undef TARGET_ASM_OPEN_PAREN
783 #define TARGET_ASM_OPEN_PAREN ""
784 #undef TARGET_ASM_CLOSE_PAREN
785 #define TARGET_ASM_CLOSE_PAREN ""
787 #undef TARGET_ASM_ALIGNED_HI_OP
788 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
789 #undef TARGET_ASM_ALIGNED_SI_OP
790 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
792 #undef TARGET_ASM_ALIGNED_DI_OP
793 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
796 #undef TARGET_ASM_UNALIGNED_HI_OP
797 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
798 #undef TARGET_ASM_UNALIGNED_SI_OP
799 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
800 #undef TARGET_ASM_UNALIGNED_DI_OP
801 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
803 #undef TARGET_SCHED_ADJUST_COST
804 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
805 #undef TARGET_SCHED_ISSUE_RATE
806 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
807 #undef TARGET_SCHED_VARIABLE_ISSUE
808 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
809 #undef TARGET_SCHED_INIT
810 #define TARGET_SCHED_INIT ix86_sched_init
811 #undef TARGET_SCHED_REORDER
812 #define TARGET_SCHED_REORDER ix86_sched_reorder
814 struct gcc_target targetm = TARGET_INITIALIZER;
816 /* Sometimes certain combinations of command options do not make
817 sense on a particular target machine. You can define a macro
818 `OVERRIDE_OPTIONS' to take account of this. This macro, if
819 defined, is executed once just after all the command options have
822 Don't use this macro to turn on various extra optimizations for
823 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
829 /* Comes from final.c -- no real reason to change it. */
830 #define MAX_CODE_ALIGN 16
834 const struct processor_costs *cost; /* Processor costs */
835 const int target_enable; /* Target flags to enable. */
836 const int target_disable; /* Target flags to disable. */
837 const int align_loop; /* Default alignments. */
838 const int align_loop_max_skip;
839 const int align_jump;
840 const int align_jump_max_skip;
841 const int align_func;
842 const int branch_cost;
844 const processor_target_table[PROCESSOR_max] =
846 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
847 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
848 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
849 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
850 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
851 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
852 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
855 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
858 const char *const name; /* processor name or nickname. */
859 const enum processor_type processor;
865 PTA_PREFETCH_SSE = 8,
870 const processor_alias_table[] =
872 {"i386", PROCESSOR_I386, 0},
873 {"i486", PROCESSOR_I486, 0},
874 {"i586", PROCESSOR_PENTIUM, 0},
875 {"pentium", PROCESSOR_PENTIUM, 0},
876 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
877 {"i686", PROCESSOR_PENTIUMPRO, 0},
878 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
879 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
880 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
881 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
882 PTA_MMX | PTA_PREFETCH_SSE},
883 {"k6", PROCESSOR_K6, PTA_MMX},
884 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
885 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
886 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
888 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
889 | PTA_3DNOW | PTA_3DNOW_A},
890 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
891 | PTA_3DNOW_A | PTA_SSE},
892 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
893 | PTA_3DNOW_A | PTA_SSE},
894 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
895 | PTA_3DNOW_A | PTA_SSE},
898 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
900 #ifdef SUBTARGET_OVERRIDE_OPTIONS
901 SUBTARGET_OVERRIDE_OPTIONS;
904 if (!ix86_cpu_string && ix86_arch_string)
905 ix86_cpu_string = ix86_arch_string;
906 if (!ix86_cpu_string)
907 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
908 if (!ix86_arch_string)
909 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
911 if (ix86_cmodel_string != 0)
913 if (!strcmp (ix86_cmodel_string, "small"))
914 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
916 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
917 else if (!strcmp (ix86_cmodel_string, "32"))
919 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
920 ix86_cmodel = CM_KERNEL;
921 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
922 ix86_cmodel = CM_MEDIUM;
923 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
924 ix86_cmodel = CM_LARGE;
926 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
932 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
934 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
935 error ("code model `%s' not supported in the %s bit mode",
936 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
937 if (ix86_cmodel == CM_LARGE)
938 sorry ("code model `large' not supported yet");
939 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
940 sorry ("%i-bit mode not compiled in",
941 (target_flags & MASK_64BIT) ? 64 : 32);
943 for (i = 0; i < pta_size; i++)
944 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
946 ix86_arch = processor_alias_table[i].processor;
947 /* Default cpu tuning to the architecture. */
948 ix86_cpu = ix86_arch;
949 if (processor_alias_table[i].flags & PTA_MMX
950 && !(target_flags & MASK_MMX_SET))
951 target_flags |= MASK_MMX;
952 if (processor_alias_table[i].flags & PTA_3DNOW
953 && !(target_flags & MASK_3DNOW_SET))
954 target_flags |= MASK_3DNOW;
955 if (processor_alias_table[i].flags & PTA_3DNOW_A
956 && !(target_flags & MASK_3DNOW_A_SET))
957 target_flags |= MASK_3DNOW_A;
958 if (processor_alias_table[i].flags & PTA_SSE
959 && !(target_flags & MASK_SSE_SET))
960 target_flags |= MASK_SSE;
961 if (processor_alias_table[i].flags & PTA_SSE2
962 && !(target_flags & MASK_SSE2_SET))
963 target_flags |= MASK_SSE2;
964 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
965 x86_prefetch_sse = true;
970 error ("bad value (%s) for -march= switch", ix86_arch_string);
972 for (i = 0; i < pta_size; i++)
973 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
975 ix86_cpu = processor_alias_table[i].processor;
978 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
979 x86_prefetch_sse = true;
981 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
984 ix86_cost = &size_cost;
986 ix86_cost = processor_target_table[ix86_cpu].cost;
987 target_flags |= processor_target_table[ix86_cpu].target_enable;
988 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
990 /* Arrange to set up i386_stack_locals for all functions. */
991 init_machine_status = ix86_init_machine_status;
992 mark_machine_status = ix86_mark_machine_status;
993 free_machine_status = ix86_free_machine_status;
995 /* Validate -mregparm= value. */
996 if (ix86_regparm_string)
998 i = atoi (ix86_regparm_string);
999 if (i < 0 || i > REGPARM_MAX)
1000 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1006 ix86_regparm = REGPARM_MAX;
1008 /* If the user has provided any of the -malign-* options,
1009 warn and use that value only if -falign-* is not set.
1010 Remove this code in GCC 3.2 or later. */
1011 if (ix86_align_loops_string)
1013 warning ("-malign-loops is obsolete, use -falign-loops");
1014 if (align_loops == 0)
1016 i = atoi (ix86_align_loops_string);
1017 if (i < 0 || i > MAX_CODE_ALIGN)
1018 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1020 align_loops = 1 << i;
1024 if (ix86_align_jumps_string)
1026 warning ("-malign-jumps is obsolete, use -falign-jumps");
1027 if (align_jumps == 0)
1029 i = atoi (ix86_align_jumps_string);
1030 if (i < 0 || i > MAX_CODE_ALIGN)
1031 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1033 align_jumps = 1 << i;
1037 if (ix86_align_funcs_string)
1039 warning ("-malign-functions is obsolete, use -falign-functions");
1040 if (align_functions == 0)
1042 i = atoi (ix86_align_funcs_string);
1043 if (i < 0 || i > MAX_CODE_ALIGN)
1044 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1046 align_functions = 1 << i;
1050 /* Default align_* from the processor table. */
1051 #define abs(n) (n < 0 ? -n : n)
1052 if (align_loops == 0)
1054 align_loops = processor_target_table[ix86_cpu].align_loop;
1055 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1057 if (align_jumps == 0)
1059 align_jumps = processor_target_table[ix86_cpu].align_jump;
1060 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1062 if (align_functions == 0)
1064 align_functions = processor_target_table[ix86_cpu].align_func;
1067 /* Validate -mpreferred-stack-boundary= value, or provide default.
1068 The default of 128 bits is for Pentium III's SSE __m128, but we
1069 don't want additional code to keep the stack aligned when
1070 optimizing for code size. */
1071 ix86_preferred_stack_boundary = (optimize_size
1072 ? TARGET_64BIT ? 64 : 32
1074 if (ix86_preferred_stack_boundary_string)
1076 i = atoi (ix86_preferred_stack_boundary_string);
1077 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1078 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1079 TARGET_64BIT ? 3 : 2);
1081 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1084 /* Validate -mbranch-cost= value, or provide default. */
1085 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1086 if (ix86_branch_cost_string)
1088 i = atoi (ix86_branch_cost_string);
1090 error ("-mbranch-cost=%d is not between 0 and 5", i);
1092 ix86_branch_cost = i;
1095 /* Keep nonleaf frame pointers. */
1096 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1097 flag_omit_frame_pointer = 1;
1099 /* If we're doing fast math, we don't care about comparison order
1100 wrt NaNs. This lets us use a shorter comparison sequence. */
1101 if (flag_unsafe_math_optimizations)
1102 target_flags &= ~MASK_IEEE_FP;
1106 if (TARGET_ALIGN_DOUBLE)
1107 error ("-malign-double makes no sense in the 64bit mode");
1109 error ("-mrtd calling convention not supported in the 64bit mode");
1110 /* Enable by default the SSE and MMX builtins. */
1111 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1112 ix86_fpmath = FPMATH_SSE;
1115 ix86_fpmath = FPMATH_387;
1117 if (ix86_fpmath_string != 0)
1119 if (! strcmp (ix86_fpmath_string, "387"))
1120 ix86_fpmath = FPMATH_387;
1121 else if (! strcmp (ix86_fpmath_string, "sse"))
1125 warning ("SSE instruction set disabled, using 387 arithmetics");
1126 ix86_fpmath = FPMATH_387;
1129 ix86_fpmath = FPMATH_SSE;
1131 else if (! strcmp (ix86_fpmath_string, "387,sse")
1132 || ! strcmp (ix86_fpmath_string, "sse,387"))
1136 warning ("SSE instruction set disabled, using 387 arithmetics");
1137 ix86_fpmath = FPMATH_387;
1139 else if (!TARGET_80387)
1141 warning ("387 instruction set disabled, using SSE arithmetics");
1142 ix86_fpmath = FPMATH_SSE;
1145 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1148 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1151 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1154 target_flags |= MASK_MMX;
1156 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1159 target_flags |= MASK_MMX;
1160 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1161 extensions it adds. */
1162 if (x86_3dnow_a & (1 << ix86_arch))
1163 target_flags |= MASK_3DNOW_A;
1165 if ((x86_accumulate_outgoing_args & CPUMASK)
1166 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1168 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1170 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1173 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1174 p = strchr (internal_label_prefix, 'X');
1175 internal_label_prefix_len = p - internal_label_prefix;
1181 optimization_options (level, size)
1183 int size ATTRIBUTE_UNUSED;
1185 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1186 make the problem with not enough registers even worse. */
1187 #ifdef INSN_SCHEDULING
1189 flag_schedule_insns = 0;
1191 if (TARGET_64BIT && optimize >= 1)
1192 flag_omit_frame_pointer = 1;
1195 flag_pcc_struct_return = 0;
1196 flag_asynchronous_unwind_tables = 1;
1200 /* Table of valid machine attributes. */
1201 const struct attribute_spec ix86_attribute_table[] =
1203 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1204 /* Stdcall attribute says callee is responsible for popping arguments
1205 if they are not variable. */
1206 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1207 /* Cdecl attribute says the callee is a normal C declaration */
1208 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1209 /* Regparm attribute specifies how many integer arguments are to be
1210 passed in registers. */
1211 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1212 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1213 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1214 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1215 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1217 { NULL, 0, 0, false, false, false, NULL }
1220 /* Handle a "cdecl" or "stdcall" attribute;
1221 arguments as in struct attribute_spec.handler. */
1223 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1226 tree args ATTRIBUTE_UNUSED;
1227 int flags ATTRIBUTE_UNUSED;
1230 if (TREE_CODE (*node) != FUNCTION_TYPE
1231 && TREE_CODE (*node) != METHOD_TYPE
1232 && TREE_CODE (*node) != FIELD_DECL
1233 && TREE_CODE (*node) != TYPE_DECL)
1235 warning ("`%s' attribute only applies to functions",
1236 IDENTIFIER_POINTER (name));
1237 *no_add_attrs = true;
1242 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1243 *no_add_attrs = true;
1249 /* Handle a "regparm" attribute;
1250 arguments as in struct attribute_spec.handler. */
1252 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1256 int flags ATTRIBUTE_UNUSED;
1259 if (TREE_CODE (*node) != FUNCTION_TYPE
1260 && TREE_CODE (*node) != METHOD_TYPE
1261 && TREE_CODE (*node) != FIELD_DECL
1262 && TREE_CODE (*node) != TYPE_DECL)
1264 warning ("`%s' attribute only applies to functions",
1265 IDENTIFIER_POINTER (name));
1266 *no_add_attrs = true;
1272 cst = TREE_VALUE (args);
1273 if (TREE_CODE (cst) != INTEGER_CST)
1275 warning ("`%s' attribute requires an integer constant argument",
1276 IDENTIFIER_POINTER (name));
1277 *no_add_attrs = true;
1279 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1281 warning ("argument to `%s' attribute larger than %d",
1282 IDENTIFIER_POINTER (name), REGPARM_MAX);
1283 *no_add_attrs = true;
1290 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1292 /* Generate the assembly code for function entry. FILE is a stdio
1293 stream to output the code to. SIZE is an int: how many units of
1294 temporary storage to allocate.
1296 Refer to the array `regs_ever_live' to determine which registers to
1297 save; `regs_ever_live[I]' is nonzero if register number I is ever
1298 used in the function. This function is responsible for knowing
1299 which registers should not be saved even if used.
1301 We override it here to allow for the new profiling code to go before
1302 the prologue and the old mcount code to go after the prologue (and
1303 after %ebx has been set up for ELF shared library support). */
1306 ix86_osf_output_function_prologue (file, size)
1310 const char *prefix = "";
1311 const char *const lprefix = LPREFIX;
1312 int labelno = profile_label_no;
1316 if (TARGET_UNDERSCORES)
1319 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1321 if (!flag_pic && !HALF_PIC_P ())
1323 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1324 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1327 else if (HALF_PIC_P ())
1331 HALF_PIC_EXTERNAL ("_mcount_ptr");
1332 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1335 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1336 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1338 fprintf (file, "\tcall *(%%eax)\n");
1343 static int call_no = 0;
1345 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1346 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1347 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1348 lprefix, call_no++);
1349 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1351 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1353 fprintf (file, "\tcall *(%%eax)\n");
1359 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1363 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1364 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1369 static int call_no = 0;
1371 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1372 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1373 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1374 lprefix, call_no++);
1375 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1377 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1379 fprintf (file, "\tcall *(%%eax)\n");
1382 #endif /* !OSF_OS */
1384 function_prologue (file, size);
1387 #endif /* OSF_OS || TARGET_OSF1ELF */
1389 /* Return 0 if the attributes for two types are incompatible, 1 if they
1390 are compatible, and 2 if they are nearly compatible (which causes a
1391 warning to be generated). */
1394 ix86_comp_type_attributes (type1, type2)
1398 /* Check for mismatch of non-default calling convention. */
1399 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1401 if (TREE_CODE (type1) != FUNCTION_TYPE)
1404 /* Check for mismatched return types (cdecl vs stdcall). */
1405 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1406 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1411 /* Value is the number of bytes of arguments automatically
1412 popped when returning from a subroutine call.
1413 FUNDECL is the declaration node of the function (as a tree),
1414 FUNTYPE is the data type of the function (as a tree),
1415 or for a library call it is an identifier node for the subroutine name.
1416 SIZE is the number of bytes of arguments passed on the stack.
1418 On the 80386, the RTD insn may be used to pop them if the number
1419 of args is fixed, but if the number is variable then the caller
1420 must pop them all. RTD can't be used for library calls now
1421 because the library is compiled with the Unix compiler.
1422 Use of RTD is a selectable option, since it is incompatible with
1423 standard Unix calling sequences. If the option is not selected,
1424 the caller must always pop the args.
1426 The attribute stdcall is equivalent to RTD on a per module basis. */
1429 ix86_return_pops_args (fundecl, funtype, size)
1434 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1436 /* Cdecl functions override -mrtd, and never pop the stack. */
1437 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1439 /* Stdcall functions will pop the stack if not variable args. */
1440 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1444 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1445 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1446 == void_type_node)))
1450 /* Lose any fake structure return argument. */
1451 if (aggregate_value_p (TREE_TYPE (funtype))
1453 return GET_MODE_SIZE (Pmode);
1458 /* Argument support functions. */
1460 /* Return true when register may be used to pass function parameters. */
1462 ix86_function_arg_regno_p (regno)
1467 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1468 if (SSE_REGNO_P (regno) && TARGET_SSE)
1470 /* RAX is used as hidden argument to va_arg functions. */
1473 for (i = 0; i < REGPARM_MAX; i++)
1474 if (regno == x86_64_int_parameter_registers[i])
1479 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1480 for a call to a function whose data type is FNTYPE.
1481 For a library call, FNTYPE is 0. */
1484 init_cumulative_args (cum, fntype, libname)
1485 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1486 tree fntype; /* tree ptr for function decl */
1487 rtx libname; /* SYMBOL_REF of library name or 0 */
1489 static CUMULATIVE_ARGS zero_cum;
1490 tree param, next_param;
1492 if (TARGET_DEBUG_ARG)
1494 fprintf (stderr, "\ninit_cumulative_args (");
1496 fprintf (stderr, "fntype code = %s, ret code = %s",
1497 tree_code_name[(int) TREE_CODE (fntype)],
1498 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1500 fprintf (stderr, "no fntype");
1503 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1508 /* Set up the number of registers to use for passing arguments. */
1509 cum->nregs = ix86_regparm;
1510 cum->sse_nregs = SSE_REGPARM_MAX;
1511 if (fntype && !TARGET_64BIT)
1513 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1516 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1518 cum->maybe_vaarg = false;
1520 /* Determine if this function has variable arguments. This is
1521 indicated by the last argument being 'void_type_mode' if there
1522 are no variable arguments. If there are variable arguments, then
1523 we won't pass anything in registers */
1527 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1528 param != 0; param = next_param)
1530 next_param = TREE_CHAIN (param);
1531 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1535 cum->maybe_vaarg = true;
1539 if ((!fntype && !libname)
1540 || (fntype && !TYPE_ARG_TYPES (fntype)))
1541 cum->maybe_vaarg = 1;
1543 if (TARGET_DEBUG_ARG)
1544 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1549 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1550 of this code is to classify each 8bytes of incoming argument by the register
1551 class and assign registers accordingly. */
1553 /* Return the union class of CLASS1 and CLASS2.
1554 See the x86-64 PS ABI for details. */
1556 static enum x86_64_reg_class
1557 merge_classes (class1, class2)
1558 enum x86_64_reg_class class1, class2;
1560 /* Rule #1: If both classes are equal, this is the resulting class. */
1561 if (class1 == class2)
1564 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1566 if (class1 == X86_64_NO_CLASS)
1568 if (class2 == X86_64_NO_CLASS)
1571 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1572 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1573 return X86_64_MEMORY_CLASS;
1575 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1576 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1577 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1578 return X86_64_INTEGERSI_CLASS;
1579 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1580 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1581 return X86_64_INTEGER_CLASS;
1583 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1584 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1585 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1586 return X86_64_MEMORY_CLASS;
1588 /* Rule #6: Otherwise class SSE is used. */
1589 return X86_64_SSE_CLASS;
1592 /* Classify the argument of type TYPE and mode MODE.
1593 CLASSES will be filled by the register class used to pass each word
1594 of the operand. The number of words is returned. In case the parameter
1595 should be passed in memory, 0 is returned. As a special case for zero
1596 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1598 BIT_OFFSET is used internally for handling records and specifies offset
1599 of the offset in bits modulo 256 to avoid overflow cases.
1601 See the x86-64 PS ABI for details.
1605 classify_argument (mode, type, classes, bit_offset)
1606 enum machine_mode mode;
1608 enum x86_64_reg_class classes[MAX_CLASSES];
1612 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1613 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1615 if (type && AGGREGATE_TYPE_P (type))
1619 enum x86_64_reg_class subclasses[MAX_CLASSES];
1621 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1625 for (i = 0; i < words; i++)
1626 classes[i] = X86_64_NO_CLASS;
1628 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1629 signalize memory class, so handle it as special case. */
1632 classes[0] = X86_64_NO_CLASS;
1636 /* Classify each field of record and merge classes. */
1637 if (TREE_CODE (type) == RECORD_TYPE)
1639 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1641 if (TREE_CODE (field) == FIELD_DECL)
1645 /* Bitfields are always classified as integer. Handle them
1646 early, since later code would consider them to be
1647 misaligned integers. */
1648 if (DECL_BIT_FIELD (field))
1650 for (i = int_bit_position (field) / 8 / 8;
1651 i < (int_bit_position (field)
1652 + tree_low_cst (DECL_SIZE (field), 0)
1655 merge_classes (X86_64_INTEGER_CLASS,
1660 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1661 TREE_TYPE (field), subclasses,
1662 (int_bit_position (field)
1663 + bit_offset) % 256);
1666 for (i = 0; i < num; i++)
1669 (int_bit_position (field) + bit_offset) / 8 / 8;
1671 merge_classes (subclasses[i], classes[i + pos]);
1677 /* Arrays are handled as small records. */
1678 else if (TREE_CODE (type) == ARRAY_TYPE)
1681 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1682 TREE_TYPE (type), subclasses, bit_offset);
1686 /* The partial classes are now full classes. */
1687 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1688 subclasses[0] = X86_64_SSE_CLASS;
1689 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1690 subclasses[0] = X86_64_INTEGER_CLASS;
1692 for (i = 0; i < words; i++)
1693 classes[i] = subclasses[i % num];
1695 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1696 else if (TREE_CODE (type) == UNION_TYPE)
1698 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1700 if (TREE_CODE (field) == FIELD_DECL)
1703 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1704 TREE_TYPE (field), subclasses,
1708 for (i = 0; i < num; i++)
1709 classes[i] = merge_classes (subclasses[i], classes[i]);
1716 /* Final merger cleanup. */
1717 for (i = 0; i < words; i++)
1719 /* If one class is MEMORY, everything should be passed in
1721 if (classes[i] == X86_64_MEMORY_CLASS)
1724 /* The X86_64_SSEUP_CLASS should be always preceded by
1725 X86_64_SSE_CLASS. */
1726 if (classes[i] == X86_64_SSEUP_CLASS
1727 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1728 classes[i] = X86_64_SSE_CLASS;
1730 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1731 if (classes[i] == X86_64_X87UP_CLASS
1732 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1733 classes[i] = X86_64_SSE_CLASS;
1738 /* Compute alignment needed. We align all types to natural boundaries with
1739 exception of XFmode that is aligned to 64bits. */
1740 if (mode != VOIDmode && mode != BLKmode)
1742 int mode_alignment = GET_MODE_BITSIZE (mode);
1745 mode_alignment = 128;
1746 else if (mode == XCmode)
1747 mode_alignment = 256;
1748 /* Misaligned fields are always returned in memory. */
1749 if (bit_offset % mode_alignment)
1753 /* Classification of atomic types. */
1763 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1764 classes[0] = X86_64_INTEGERSI_CLASS;
1766 classes[0] = X86_64_INTEGER_CLASS;
1770 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1773 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1774 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1777 if (!(bit_offset % 64))
1778 classes[0] = X86_64_SSESF_CLASS;
1780 classes[0] = X86_64_SSE_CLASS;
1783 classes[0] = X86_64_SSEDF_CLASS;
1786 classes[0] = X86_64_X87_CLASS;
1787 classes[1] = X86_64_X87UP_CLASS;
1790 classes[0] = X86_64_X87_CLASS;
1791 classes[1] = X86_64_X87UP_CLASS;
1792 classes[2] = X86_64_X87_CLASS;
1793 classes[3] = X86_64_X87UP_CLASS;
1796 classes[0] = X86_64_SSEDF_CLASS;
1797 classes[1] = X86_64_SSEDF_CLASS;
1800 classes[0] = X86_64_SSE_CLASS;
1809 /* Examine the argument and return set number of register required in each
1810 class. Return 0 iff parameter should be passed in memory. */
1812 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1813 enum machine_mode mode;
1815 int *int_nregs, *sse_nregs;
1818 enum x86_64_reg_class class[MAX_CLASSES];
1819 int n = classify_argument (mode, type, class, 0);
1825 for (n--; n >= 0; n--)
1828 case X86_64_INTEGER_CLASS:
1829 case X86_64_INTEGERSI_CLASS:
1832 case X86_64_SSE_CLASS:
1833 case X86_64_SSESF_CLASS:
1834 case X86_64_SSEDF_CLASS:
1837 case X86_64_NO_CLASS:
1838 case X86_64_SSEUP_CLASS:
1840 case X86_64_X87_CLASS:
1841 case X86_64_X87UP_CLASS:
1845 case X86_64_MEMORY_CLASS:
1850 /* Construct container for the argument used by GCC interface. See
1851 FUNCTION_ARG for the detailed description. */
1853 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1854 enum machine_mode mode;
1857 int nintregs, nsseregs;
1858 int *intreg, sse_regno;
1860 enum machine_mode tmpmode;
1862 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1863 enum x86_64_reg_class class[MAX_CLASSES];
1867 int needed_sseregs, needed_intregs;
1868 rtx exp[MAX_CLASSES];
1871 n = classify_argument (mode, type, class, 0);
1872 if (TARGET_DEBUG_ARG)
1875 fprintf (stderr, "Memory class\n");
1878 fprintf (stderr, "Classes:");
1879 for (i = 0; i < n; i++)
1881 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1883 fprintf (stderr, "\n");
1888 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1890 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1893 /* First construct simple cases. Avoid SCmode, since we want to use
1894 single register to pass this type. */
1895 if (n == 1 && mode != SCmode)
1898 case X86_64_INTEGER_CLASS:
1899 case X86_64_INTEGERSI_CLASS:
1900 return gen_rtx_REG (mode, intreg[0]);
1901 case X86_64_SSE_CLASS:
1902 case X86_64_SSESF_CLASS:
1903 case X86_64_SSEDF_CLASS:
1904 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1905 case X86_64_X87_CLASS:
1906 return gen_rtx_REG (mode, FIRST_STACK_REG);
1907 case X86_64_NO_CLASS:
1908 /* Zero sized array, struct or class. */
1913 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1914 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1916 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1917 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1918 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1919 && class[1] == X86_64_INTEGER_CLASS
1920 && (mode == CDImode || mode == TImode)
1921 && intreg[0] + 1 == intreg[1])
1922 return gen_rtx_REG (mode, intreg[0]);
1924 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1925 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1926 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1928 /* Otherwise figure out the entries of the PARALLEL. */
1929 for (i = 0; i < n; i++)
1933 case X86_64_NO_CLASS:
1935 case X86_64_INTEGER_CLASS:
1936 case X86_64_INTEGERSI_CLASS:
1937 /* Merge TImodes on aligned occassions here too. */
1938 if (i * 8 + 8 > bytes)
1939 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1940 else if (class[i] == X86_64_INTEGERSI_CLASS)
1944 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1945 if (tmpmode == BLKmode)
1947 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1948 gen_rtx_REG (tmpmode, *intreg),
1952 case X86_64_SSESF_CLASS:
1953 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1954 gen_rtx_REG (SFmode,
1955 SSE_REGNO (sse_regno)),
1959 case X86_64_SSEDF_CLASS:
1960 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1961 gen_rtx_REG (DFmode,
1962 SSE_REGNO (sse_regno)),
1966 case X86_64_SSE_CLASS:
1967 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1968 tmpmode = TImode, i++;
1971 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1972 gen_rtx_REG (tmpmode,
1973 SSE_REGNO (sse_regno)),
1981 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
1982 for (i = 0; i < nexps; i++)
1983 XVECEXP (ret, 0, i) = exp [i];
1987 /* Update the data in CUM to advance over an argument
1988 of mode MODE and data type TYPE.
1989 (TYPE is null for libcalls where that information may not be available.) */
1992 function_arg_advance (cum, mode, type, named)
1993 CUMULATIVE_ARGS *cum; /* current arg information */
1994 enum machine_mode mode; /* current arg mode */
1995 tree type; /* type of the argument or 0 if lib support */
1996 int named; /* whether or not the argument was named */
1999 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2000 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2002 if (TARGET_DEBUG_ARG)
2004 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2005 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2008 int int_nregs, sse_nregs;
2009 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2010 cum->words += words;
2011 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2013 cum->nregs -= int_nregs;
2014 cum->sse_nregs -= sse_nregs;
2015 cum->regno += int_nregs;
2016 cum->sse_regno += sse_nregs;
2019 cum->words += words;
2023 if (TARGET_SSE && mode == TImode)
2025 cum->sse_words += words;
2026 cum->sse_nregs -= 1;
2027 cum->sse_regno += 1;
2028 if (cum->sse_nregs <= 0)
2036 cum->words += words;
2037 cum->nregs -= words;
2038 cum->regno += words;
2040 if (cum->nregs <= 0)
2050 /* Define where to put the arguments to a function.
2051 Value is zero to push the argument on the stack,
2052 or a hard register in which to store the argument.
2054 MODE is the argument's machine mode.
2055 TYPE is the data type of the argument (as a tree).
2056 This is null for libcalls where that information may
2058 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2059 the preceding args and about the function being called.
2060 NAMED is nonzero if this argument is a named parameter
2061 (otherwise it is an extra parameter matching an ellipsis). */
2064 function_arg (cum, mode, type, named)
2065 CUMULATIVE_ARGS *cum; /* current arg information */
2066 enum machine_mode mode; /* current arg mode */
2067 tree type; /* type of the argument or 0 if lib support */
2068 int named; /* != 0 for normal args, == 0 for ... args */
2072 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2073 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2075 /* Handle an hidden AL argument containing number of registers for varargs
2076 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2078 if (mode == VOIDmode)
2081 return GEN_INT (cum->maybe_vaarg
2082 ? (cum->sse_nregs < 0
2090 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2091 &x86_64_int_parameter_registers [cum->regno],
2096 /* For now, pass fp/complex values on the stack. */
2105 if (words <= cum->nregs)
2106 ret = gen_rtx_REG (mode, cum->regno);
2110 ret = gen_rtx_REG (mode, cum->sse_regno);
2114 if (TARGET_DEBUG_ARG)
2117 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2118 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2121 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
2123 fprintf (stderr, ", stack");
2125 fprintf (stderr, " )\n");
2131 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2135 ix86_function_arg_boundary (mode, type)
2136 enum machine_mode mode;
2141 return PARM_BOUNDARY;
2143 align = TYPE_ALIGN (type);
2145 align = GET_MODE_ALIGNMENT (mode);
2146 if (align < PARM_BOUNDARY)
2147 align = PARM_BOUNDARY;
2153 /* Return true if N is a possible register number of function value. */
2155 ix86_function_value_regno_p (regno)
2160 return ((regno) == 0
2161 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2162 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2164 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2165 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2166 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2169 /* Define how to find the value returned by a function.
2170 VALTYPE is the data type of the value (as a tree).
2171 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2172 otherwise, FUNC is 0. */
2174 ix86_function_value (valtype)
2179 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2180 REGPARM_MAX, SSE_REGPARM_MAX,
2181 x86_64_int_return_registers, 0);
2182 /* For zero sized structures, construct_continer return NULL, but we need
2183 to keep rest of compiler happy by returning meaningfull value. */
2185 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2189 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2192 /* Return false iff type is returned in memory. */
2194 ix86_return_in_memory (type)
2197 int needed_intregs, needed_sseregs;
2200 return !examine_argument (TYPE_MODE (type), type, 1,
2201 &needed_intregs, &needed_sseregs);
2205 if (TYPE_MODE (type) == BLKmode
2206 || (VECTOR_MODE_P (TYPE_MODE (type))
2207 && int_size_in_bytes (type) == 8)
2208 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2209 && TYPE_MODE (type) != TFmode
2210 && !VECTOR_MODE_P (TYPE_MODE (type))))
2216 /* Define how to find the value returned by a library function
2217 assuming the value has mode MODE. */
2219 ix86_libcall_value (mode)
2220 enum machine_mode mode;
2230 return gen_rtx_REG (mode, FIRST_SSE_REG);
2233 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2235 return gen_rtx_REG (mode, 0);
2239 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2242 /* Create the va_list data type. */
2245 ix86_build_va_list ()
2247 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2249 /* For i386 we use plain pointer to argument area. */
2251 return build_pointer_type (char_type_node);
2253 record = make_lang_type (RECORD_TYPE);
2254 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2256 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2257 unsigned_type_node);
2258 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2259 unsigned_type_node);
2260 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2262 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2265 DECL_FIELD_CONTEXT (f_gpr) = record;
2266 DECL_FIELD_CONTEXT (f_fpr) = record;
2267 DECL_FIELD_CONTEXT (f_ovf) = record;
2268 DECL_FIELD_CONTEXT (f_sav) = record;
2270 TREE_CHAIN (record) = type_decl;
2271 TYPE_NAME (record) = type_decl;
2272 TYPE_FIELDS (record) = f_gpr;
2273 TREE_CHAIN (f_gpr) = f_fpr;
2274 TREE_CHAIN (f_fpr) = f_ovf;
2275 TREE_CHAIN (f_ovf) = f_sav;
2277 layout_type (record);
2279 /* The correct type is an array type of one element. */
2280 return build_array_type (record, build_index_type (size_zero_node));
2283 /* Perform any needed actions needed for a function that is receiving a
2284 variable number of arguments.
2288 MODE and TYPE are the mode and type of the current parameter.
2290 PRETEND_SIZE is a variable that should be set to the amount of stack
2291 that must be pushed by the prolog to pretend that our caller pushed
2294 Normally, this macro will push all remaining incoming registers on the
2295 stack and set PRETEND_SIZE to the length of the registers pushed. */
2298 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2299 CUMULATIVE_ARGS *cum;
2300 enum machine_mode mode;
2302 int *pretend_size ATTRIBUTE_UNUSED;
2306 CUMULATIVE_ARGS next_cum;
2307 rtx save_area = NULL_RTX, mem;
2320 /* Indicate to allocate space on the stack for varargs save area. */
2321 ix86_save_varrargs_registers = 1;
2323 fntype = TREE_TYPE (current_function_decl);
2324 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2325 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2326 != void_type_node));
2328 /* For varargs, we do not want to skip the dummy va_dcl argument.
2329 For stdargs, we do want to skip the last named argument. */
2332 function_arg_advance (&next_cum, mode, type, 1);
2335 save_area = frame_pointer_rtx;
2337 set = get_varargs_alias_set ();
2339 for (i = next_cum.regno; i < ix86_regparm; i++)
2341 mem = gen_rtx_MEM (Pmode,
2342 plus_constant (save_area, i * UNITS_PER_WORD));
2343 set_mem_alias_set (mem, set);
2344 emit_move_insn (mem, gen_rtx_REG (Pmode,
2345 x86_64_int_parameter_registers[i]));
2348 if (next_cum.sse_nregs)
2350 /* Now emit code to save SSE registers. The AX parameter contains number
2351 of SSE parameter regsiters used to call this function. We use
2352 sse_prologue_save insn template that produces computed jump across
2353 SSE saves. We need some preparation work to get this working. */
2355 label = gen_label_rtx ();
2356 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2358 /* Compute address to jump to :
2359 label - 5*eax + nnamed_sse_arguments*5 */
2360 tmp_reg = gen_reg_rtx (Pmode);
2361 nsse_reg = gen_reg_rtx (Pmode);
2362 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2363 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2364 gen_rtx_MULT (Pmode, nsse_reg,
2366 if (next_cum.sse_regno)
2369 gen_rtx_CONST (DImode,
2370 gen_rtx_PLUS (DImode,
2372 GEN_INT (next_cum.sse_regno * 4))));
2374 emit_move_insn (nsse_reg, label_ref);
2375 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2377 /* Compute address of memory block we save into. We always use pointer
2378 pointing 127 bytes after first byte to store - this is needed to keep
2379 instruction size limited by 4 bytes. */
2380 tmp_reg = gen_reg_rtx (Pmode);
2381 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2382 plus_constant (save_area,
2383 8 * REGPARM_MAX + 127)));
2384 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2385 set_mem_alias_set (mem, set);
2386 set_mem_align (mem, BITS_PER_WORD);
2388 /* And finally do the dirty job! */
2389 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2390 GEN_INT (next_cum.sse_regno), label));
2395 /* Implement va_start. */
2398 ix86_va_start (stdarg_p, valist, nextarg)
2403 HOST_WIDE_INT words, n_gpr, n_fpr;
2404 tree f_gpr, f_fpr, f_ovf, f_sav;
2405 tree gpr, fpr, ovf, sav, t;
2407 /* Only 64bit target needs something special. */
2410 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2414 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2415 f_fpr = TREE_CHAIN (f_gpr);
2416 f_ovf = TREE_CHAIN (f_fpr);
2417 f_sav = TREE_CHAIN (f_ovf);
2419 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2420 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2421 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2422 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2423 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2425 /* Count number of gp and fp argument registers used. */
2426 words = current_function_args_info.words;
2427 n_gpr = current_function_args_info.regno;
2428 n_fpr = current_function_args_info.sse_regno;
2430 if (TARGET_DEBUG_ARG)
2431 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2432 (int)words, (int)n_gpr, (int)n_fpr);
2434 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2435 build_int_2 (n_gpr * 8, 0));
2436 TREE_SIDE_EFFECTS (t) = 1;
2437 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2439 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2440 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2441 TREE_SIDE_EFFECTS (t) = 1;
2442 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2444 /* Find the overflow area. */
2445 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2447 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2448 build_int_2 (words * UNITS_PER_WORD, 0));
2449 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2450 TREE_SIDE_EFFECTS (t) = 1;
2451 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2453 /* Find the register save area.
2454 Prologue of the function save it right above stack frame. */
2455 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2456 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2457 TREE_SIDE_EFFECTS (t) = 1;
2458 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2461 /* Implement va_arg. */
2463 ix86_va_arg (valist, type)
2466 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2467 tree f_gpr, f_fpr, f_ovf, f_sav;
2468 tree gpr, fpr, ovf, sav, t;
2470 rtx lab_false, lab_over = NULL_RTX;
2474 /* Only 64bit target needs something special. */
2477 return std_expand_builtin_va_arg (valist, type);
2480 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2481 f_fpr = TREE_CHAIN (f_gpr);
2482 f_ovf = TREE_CHAIN (f_fpr);
2483 f_sav = TREE_CHAIN (f_ovf);
2485 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2486 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2487 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2488 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2489 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2491 size = int_size_in_bytes (type);
2492 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2494 container = construct_container (TYPE_MODE (type), type, 0,
2495 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2497 * Pull the value out of the saved registers ...
2500 addr_rtx = gen_reg_rtx (Pmode);
2504 rtx int_addr_rtx, sse_addr_rtx;
2505 int needed_intregs, needed_sseregs;
2508 lab_over = gen_label_rtx ();
2509 lab_false = gen_label_rtx ();
2511 examine_argument (TYPE_MODE (type), type, 0,
2512 &needed_intregs, &needed_sseregs);
2515 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2516 || TYPE_ALIGN (type) > 128);
2518 /* In case we are passing structure, verify that it is consetuctive block
2519 on the register save area. If not we need to do moves. */
2520 if (!need_temp && !REG_P (container))
2522 /* Verify that all registers are strictly consetuctive */
2523 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2527 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2529 rtx slot = XVECEXP (container, 0, i);
2530 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2531 || INTVAL (XEXP (slot, 1)) != i * 16)
2539 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2541 rtx slot = XVECEXP (container, 0, i);
2542 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2543 || INTVAL (XEXP (slot, 1)) != i * 8)
2550 int_addr_rtx = addr_rtx;
2551 sse_addr_rtx = addr_rtx;
2555 int_addr_rtx = gen_reg_rtx (Pmode);
2556 sse_addr_rtx = gen_reg_rtx (Pmode);
2558 /* First ensure that we fit completely in registers. */
2561 emit_cmp_and_jump_insns (expand_expr
2562 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2563 GEN_INT ((REGPARM_MAX - needed_intregs +
2564 1) * 8), GE, const1_rtx, SImode,
2569 emit_cmp_and_jump_insns (expand_expr
2570 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2571 GEN_INT ((SSE_REGPARM_MAX -
2572 needed_sseregs + 1) * 16 +
2573 REGPARM_MAX * 8), GE, const1_rtx,
2574 SImode, 1, lab_false);
2577 /* Compute index to start of area used for integer regs. */
2580 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2581 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2582 if (r != int_addr_rtx)
2583 emit_move_insn (int_addr_rtx, r);
2587 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2588 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2589 if (r != sse_addr_rtx)
2590 emit_move_insn (sse_addr_rtx, r);
2597 /* Never use the memory itself, as it has the alias set. */
2598 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2599 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2600 set_mem_alias_set (mem, get_varargs_alias_set ());
2601 set_mem_align (mem, BITS_PER_UNIT);
2603 for (i = 0; i < XVECLEN (container, 0); i++)
2605 rtx slot = XVECEXP (container, 0, i);
2606 rtx reg = XEXP (slot, 0);
2607 enum machine_mode mode = GET_MODE (reg);
2613 if (SSE_REGNO_P (REGNO (reg)))
2615 src_addr = sse_addr_rtx;
2616 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2620 src_addr = int_addr_rtx;
2621 src_offset = REGNO (reg) * 8;
2623 src_mem = gen_rtx_MEM (mode, src_addr);
2624 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2625 src_mem = adjust_address (src_mem, mode, src_offset);
2626 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2627 emit_move_insn (dest_mem, src_mem);
2634 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2635 build_int_2 (needed_intregs * 8, 0));
2636 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2637 TREE_SIDE_EFFECTS (t) = 1;
2638 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2643 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2644 build_int_2 (needed_sseregs * 16, 0));
2645 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2646 TREE_SIDE_EFFECTS (t) = 1;
2647 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2650 emit_jump_insn (gen_jump (lab_over));
2652 emit_label (lab_false);
2655 /* ... otherwise out of the overflow area. */
2657 /* Care for on-stack alignment if needed. */
2658 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2662 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2663 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2664 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2668 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2670 emit_move_insn (addr_rtx, r);
2673 build (PLUS_EXPR, TREE_TYPE (t), t,
2674 build_int_2 (rsize * UNITS_PER_WORD, 0));
2675 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2676 TREE_SIDE_EFFECTS (t) = 1;
2677 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2680 emit_label (lab_over);
2685 /* Return nonzero if OP is general operand representable on x86_64. */
2688 x86_64_general_operand (op, mode)
2690 enum machine_mode mode;
2693 return general_operand (op, mode);
2694 if (nonimmediate_operand (op, mode))
2696 return x86_64_sign_extended_value (op);
2699 /* Return nonzero if OP is general operand representable on x86_64
2700 as either sign extended or zero extended constant. */
2703 x86_64_szext_general_operand (op, mode)
2705 enum machine_mode mode;
2708 return general_operand (op, mode);
2709 if (nonimmediate_operand (op, mode))
2711 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2714 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2717 x86_64_nonmemory_operand (op, mode)
2719 enum machine_mode mode;
2722 return nonmemory_operand (op, mode);
2723 if (register_operand (op, mode))
2725 return x86_64_sign_extended_value (op);
2728 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2731 x86_64_movabs_operand (op, mode)
2733 enum machine_mode mode;
2735 if (!TARGET_64BIT || !flag_pic)
2736 return nonmemory_operand (op, mode);
2737 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2739 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2744 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2747 x86_64_szext_nonmemory_operand (op, mode)
2749 enum machine_mode mode;
2752 return nonmemory_operand (op, mode);
2753 if (register_operand (op, mode))
2755 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2758 /* Return nonzero if OP is immediate operand representable on x86_64. */
2761 x86_64_immediate_operand (op, mode)
2763 enum machine_mode mode;
2766 return immediate_operand (op, mode);
2767 return x86_64_sign_extended_value (op);
2770 /* Return nonzero if OP is immediate operand representable on x86_64. */
2773 x86_64_zext_immediate_operand (op, mode)
2775 enum machine_mode mode ATTRIBUTE_UNUSED;
2777 return x86_64_zero_extended_value (op);
2780 /* Return nonzero if OP is (const_int 1), else return zero. */
2783 const_int_1_operand (op, mode)
2785 enum machine_mode mode ATTRIBUTE_UNUSED;
2787 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2790 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2791 reference and a constant. */
2794 symbolic_operand (op, mode)
2796 enum machine_mode mode ATTRIBUTE_UNUSED;
2798 switch (GET_CODE (op))
2806 if (GET_CODE (op) == SYMBOL_REF
2807 || GET_CODE (op) == LABEL_REF
2808 || (GET_CODE (op) == UNSPEC
2809 && (XINT (op, 1) == 6
2810 || XINT (op, 1) == 7
2811 || XINT (op, 1) == 15)))
2813 if (GET_CODE (op) != PLUS
2814 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2818 if (GET_CODE (op) == SYMBOL_REF
2819 || GET_CODE (op) == LABEL_REF)
2821 /* Only @GOTOFF gets offsets. */
2822 if (GET_CODE (op) != UNSPEC
2823 || XINT (op, 1) != 7)
2826 op = XVECEXP (op, 0, 0);
2827 if (GET_CODE (op) == SYMBOL_REF
2828 || GET_CODE (op) == LABEL_REF)
2837 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2840 pic_symbolic_operand (op, mode)
2842 enum machine_mode mode ATTRIBUTE_UNUSED;
2844 if (GET_CODE (op) != CONST)
2849 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2854 if (GET_CODE (op) == UNSPEC)
2856 if (GET_CODE (op) != PLUS
2857 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2860 if (GET_CODE (op) == UNSPEC)
2866 /* Return true if OP is a symbolic operand that resolves locally. */
2869 local_symbolic_operand (op, mode)
2871 enum machine_mode mode ATTRIBUTE_UNUSED;
2873 if (GET_CODE (op) == LABEL_REF)
2876 if (GET_CODE (op) == CONST
2877 && GET_CODE (XEXP (op, 0)) == PLUS
2878 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2879 op = XEXP (XEXP (op, 0), 0);
2881 if (GET_CODE (op) != SYMBOL_REF)
2884 /* These we've been told are local by varasm and encode_section_info
2886 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2889 /* There is, however, a not insubstantial body of code in the rest of
2890 the compiler that assumes it can just stick the results of
2891 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2892 /* ??? This is a hack. Should update the body of the compiler to
2893 always create a DECL an invoke ENCODE_SECTION_INFO. */
2894 if (strncmp (XSTR (op, 0), internal_label_prefix,
2895 internal_label_prefix_len) == 0)
2901 /* Test for a valid operand for a call instruction. Don't allow the
2902 arg pointer register or virtual regs since they may decay into
2903 reg + const, which the patterns can't handle. */
2906 call_insn_operand (op, mode)
2908 enum machine_mode mode ATTRIBUTE_UNUSED;
2910 /* Disallow indirect through a virtual register. This leads to
2911 compiler aborts when trying to eliminate them. */
2912 if (GET_CODE (op) == REG
2913 && (op == arg_pointer_rtx
2914 || op == frame_pointer_rtx
2915 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2916 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2919 /* Disallow `call 1234'. Due to varying assembler lameness this
2920 gets either rejected or translated to `call .+1234'. */
2921 if (GET_CODE (op) == CONST_INT)
2924 /* Explicitly allow SYMBOL_REF even if pic. */
2925 if (GET_CODE (op) == SYMBOL_REF)
2928 /* Half-pic doesn't allow anything but registers and constants.
2929 We've just taken care of the later. */
2931 return register_operand (op, Pmode);
2933 /* Otherwise we can allow any general_operand in the address. */
2934 return general_operand (op, Pmode);
2938 constant_call_address_operand (op, mode)
2940 enum machine_mode mode ATTRIBUTE_UNUSED;
2942 if (GET_CODE (op) == CONST
2943 && GET_CODE (XEXP (op, 0)) == PLUS
2944 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2945 op = XEXP (XEXP (op, 0), 0);
2946 return GET_CODE (op) == SYMBOL_REF;
2949 /* Match exactly zero and one. */
2952 const0_operand (op, mode)
2954 enum machine_mode mode;
2956 return op == CONST0_RTX (mode);
2960 const1_operand (op, mode)
2962 enum machine_mode mode ATTRIBUTE_UNUSED;
2964 return op == const1_rtx;
2967 /* Match 2, 4, or 8. Used for leal multiplicands. */
2970 const248_operand (op, mode)
2972 enum machine_mode mode ATTRIBUTE_UNUSED;
2974 return (GET_CODE (op) == CONST_INT
2975 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2978 /* True if this is a constant appropriate for an increment or decremenmt. */
2981 incdec_operand (op, mode)
2983 enum machine_mode mode ATTRIBUTE_UNUSED;
2985 /* On Pentium4, the inc and dec operations causes extra dependency on flag
2986 registers, since carry flag is not set. */
2987 if (TARGET_PENTIUM4 && !optimize_size)
2989 return op == const1_rtx || op == constm1_rtx;
2992 /* Return nonzero if OP is acceptable as operand of DImode shift
2996 shiftdi_operand (op, mode)
2998 enum machine_mode mode ATTRIBUTE_UNUSED;
3001 return nonimmediate_operand (op, mode);
3003 return register_operand (op, mode);
3006 /* Return false if this is the stack pointer, or any other fake
3007 register eliminable to the stack pointer. Otherwise, this is
3010 This is used to prevent esp from being used as an index reg.
3011 Which would only happen in pathological cases. */
3014 reg_no_sp_operand (op, mode)
3016 enum machine_mode mode;
3019 if (GET_CODE (t) == SUBREG)
3021 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3024 return register_operand (op, mode);
3028 mmx_reg_operand (op, mode)
3030 enum machine_mode mode ATTRIBUTE_UNUSED;
3032 return MMX_REG_P (op);
3035 /* Return false if this is any eliminable register. Otherwise
3039 general_no_elim_operand (op, mode)
3041 enum machine_mode mode;
3044 if (GET_CODE (t) == SUBREG)
3046 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3047 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3048 || t == virtual_stack_dynamic_rtx)
3051 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3052 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3055 return general_operand (op, mode);
3058 /* Return false if this is any eliminable register. Otherwise
3059 register_operand or const_int. */
3062 nonmemory_no_elim_operand (op, mode)
3064 enum machine_mode mode;
3067 if (GET_CODE (t) == SUBREG)
3069 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3070 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3071 || t == virtual_stack_dynamic_rtx)
3074 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3077 /* Return true if op is a Q_REGS class register. */
3080 q_regs_operand (op, mode)
3082 enum machine_mode mode;
3084 if (mode != VOIDmode && GET_MODE (op) != mode)
3086 if (GET_CODE (op) == SUBREG)
3087 op = SUBREG_REG (op);
3088 return QI_REG_P (op);
3091 /* Return true if op is a NON_Q_REGS class register. */
3094 non_q_regs_operand (op, mode)
3096 enum machine_mode mode;
3098 if (mode != VOIDmode && GET_MODE (op) != mode)
3100 if (GET_CODE (op) == SUBREG)
3101 op = SUBREG_REG (op);
3102 return NON_QI_REG_P (op);
3105 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3108 sse_comparison_operator (op, mode)
3110 enum machine_mode mode ATTRIBUTE_UNUSED;
3112 enum rtx_code code = GET_CODE (op);
3115 /* Operations supported directly. */
3125 /* These are equivalent to ones above in non-IEEE comparisons. */
3132 return !TARGET_IEEE_FP;
3137 /* Return 1 if OP is a valid comparison operator in valid mode. */
3139 ix86_comparison_operator (op, mode)
3141 enum machine_mode mode;
3143 enum machine_mode inmode;
3144 enum rtx_code code = GET_CODE (op);
3145 if (mode != VOIDmode && GET_MODE (op) != mode)
3147 if (GET_RTX_CLASS (code) != '<')
3149 inmode = GET_MODE (XEXP (op, 0));
3151 if (inmode == CCFPmode || inmode == CCFPUmode)
3153 enum rtx_code second_code, bypass_code;
3154 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3155 return (bypass_code == NIL && second_code == NIL);
3162 if (inmode == CCmode || inmode == CCGCmode
3163 || inmode == CCGOCmode || inmode == CCNOmode)
3166 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3167 if (inmode == CCmode)
3171 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3179 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3182 fcmov_comparison_operator (op, mode)
3184 enum machine_mode mode;
3186 enum machine_mode inmode;
3187 enum rtx_code code = GET_CODE (op);
3188 if (mode != VOIDmode && GET_MODE (op) != mode)
3190 if (GET_RTX_CLASS (code) != '<')
3192 inmode = GET_MODE (XEXP (op, 0));
3193 if (inmode == CCFPmode || inmode == CCFPUmode)
3195 enum rtx_code second_code, bypass_code;
3196 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3197 if (bypass_code != NIL || second_code != NIL)
3199 code = ix86_fp_compare_code_to_integer (code);
3201 /* i387 supports just limited amount of conditional codes. */
3204 case LTU: case GTU: case LEU: case GEU:
3205 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3208 case ORDERED: case UNORDERED:
3216 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3219 promotable_binary_operator (op, mode)
3221 enum machine_mode mode ATTRIBUTE_UNUSED;
3223 switch (GET_CODE (op))
3226 /* Modern CPUs have same latency for HImode and SImode multiply,
3227 but 386 and 486 do HImode multiply faster. */
3228 return ix86_cpu > PROCESSOR_I486;
3240 /* Nearly general operand, but accept any const_double, since we wish
3241 to be able to drop them into memory rather than have them get pulled
3245 cmp_fp_expander_operand (op, mode)
3247 enum machine_mode mode;
3249 if (mode != VOIDmode && mode != GET_MODE (op))
3251 if (GET_CODE (op) == CONST_DOUBLE)
3253 return general_operand (op, mode);
3256 /* Match an SI or HImode register for a zero_extract. */
3259 ext_register_operand (op, mode)
3261 enum machine_mode mode ATTRIBUTE_UNUSED;
3264 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3265 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3268 if (!register_operand (op, VOIDmode))
3271 /* Be curefull to accept only registers having upper parts. */
3272 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3273 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3276 /* Return 1 if this is a valid binary floating-point operation.
3277 OP is the expression matched, and MODE is its mode. */
3280 binary_fp_operator (op, mode)
3282 enum machine_mode mode;
3284 if (mode != VOIDmode && mode != GET_MODE (op))
3287 switch (GET_CODE (op))
3293 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3301 mult_operator(op, mode)
3303 enum machine_mode mode ATTRIBUTE_UNUSED;
3305 return GET_CODE (op) == MULT;
3309 div_operator(op, mode)
3311 enum machine_mode mode ATTRIBUTE_UNUSED;
3313 return GET_CODE (op) == DIV;
3317 arith_or_logical_operator (op, mode)
3319 enum machine_mode mode;
3321 return ((mode == VOIDmode || GET_MODE (op) == mode)
3322 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3323 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3326 /* Returns 1 if OP is memory operand with a displacement. */
3329 memory_displacement_operand (op, mode)
3331 enum machine_mode mode;
3333 struct ix86_address parts;
3335 if (! memory_operand (op, mode))
3338 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3341 return parts.disp != NULL_RTX;
3344 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3345 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3347 ??? It seems likely that this will only work because cmpsi is an
3348 expander, and no actual insns use this. */
3351 cmpsi_operand (op, mode)
3353 enum machine_mode mode;
3355 if (nonimmediate_operand (op, mode))
3358 if (GET_CODE (op) == AND
3359 && GET_MODE (op) == SImode
3360 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3361 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3362 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3363 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3364 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3365 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3371 /* Returns 1 if OP is memory operand that can not be represented by the
3375 long_memory_operand (op, mode)
3377 enum machine_mode mode;
3379 if (! memory_operand (op, mode))
3382 return memory_address_length (op) != 0;
3385 /* Return nonzero if the rtx is known aligned. */
3388 aligned_operand (op, mode)
3390 enum machine_mode mode;
3392 struct ix86_address parts;
3394 if (!general_operand (op, mode))
3397 /* Registers and immediate operands are always "aligned". */
3398 if (GET_CODE (op) != MEM)
3401 /* Don't even try to do any aligned optimizations with volatiles. */
3402 if (MEM_VOLATILE_P (op))
3407 /* Pushes and pops are only valid on the stack pointer. */
3408 if (GET_CODE (op) == PRE_DEC
3409 || GET_CODE (op) == POST_INC)
3412 /* Decode the address. */
3413 if (! ix86_decompose_address (op, &parts))
3416 /* Look for some component that isn't known to be aligned. */
3420 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3425 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3430 if (GET_CODE (parts.disp) != CONST_INT
3431 || (INTVAL (parts.disp) & 3) != 0)
3435 /* Didn't find one -- this must be an aligned address. */
3439 /* Return true if the constant is something that can be loaded with
3440 a special instruction. Only handle 0.0 and 1.0; others are less
3444 standard_80387_constant_p (x)
3447 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3449 /* Note that on the 80387, other constants, such as pi, that we should support
3450 too. On some machines, these are much slower to load as standard constant,
3451 than to load from doubles in memory. */
3452 if (x == CONST0_RTX (GET_MODE (x)))
3454 if (x == CONST1_RTX (GET_MODE (x)))
3459 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3462 standard_sse_constant_p (x)
3465 if (GET_CODE (x) != CONST_DOUBLE)
3467 return (x == CONST0_RTX (GET_MODE (x)));
3470 /* Returns 1 if OP contains a symbol reference */
3473 symbolic_reference_mentioned_p (op)
3476 register const char *fmt;
3479 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3482 fmt = GET_RTX_FORMAT (GET_CODE (op));
3483 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3489 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3490 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3494 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3501 /* Return 1 if it is appropriate to emit `ret' instructions in the
3502 body of a function. Do this only if the epilogue is simple, needing a
3503 couple of insns. Prior to reloading, we can't tell how many registers
3504 must be saved, so return 0 then. Return 0 if there is no frame
3505 marker to de-allocate.
3507 If NON_SAVING_SETJMP is defined and true, then it is not possible
3508 for the epilogue to be simple, so return 0. This is a special case
3509 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3510 until final, but jump_optimize may need to know sooner if a
3514 ix86_can_use_return_insn_p ()
3516 struct ix86_frame frame;
3518 #ifdef NON_SAVING_SETJMP
3519 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3523 if (! reload_completed || frame_pointer_needed)
3526 /* Don't allow more than 32 pop, since that's all we can do
3527 with one instruction. */
3528 if (current_function_pops_args
3529 && current_function_args_size >= 32768)
3532 ix86_compute_frame_layout (&frame);
3533 return frame.to_allocate == 0 && frame.nregs == 0;
3536 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3538 x86_64_sign_extended_value (value)
3541 switch (GET_CODE (value))
3543 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3544 to be at least 32 and this all acceptable constants are
3545 represented as CONST_INT. */
3547 if (HOST_BITS_PER_WIDE_INT == 32)
3551 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3552 return trunc_int_for_mode (val, SImode) == val;
3556 /* For certain code models, the symbolic references are known to fit. */
3558 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3560 /* For certain code models, the code is near as well. */
3562 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3564 /* We also may accept the offsetted memory references in certain special
3567 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3568 && XVECLEN (XEXP (value, 0), 0) == 1
3569 && XINT (XEXP (value, 0), 1) == 15)
3571 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3573 rtx op1 = XEXP (XEXP (value, 0), 0);
3574 rtx op2 = XEXP (XEXP (value, 0), 1);
3575 HOST_WIDE_INT offset;
3577 if (ix86_cmodel == CM_LARGE)
3579 if (GET_CODE (op2) != CONST_INT)
3581 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3582 switch (GET_CODE (op1))
3585 /* For CM_SMALL assume that latest object is 1MB before
3586 end of 31bits boundary. We may also accept pretty
3587 large negative constants knowing that all objects are
3588 in the positive half of address space. */
3589 if (ix86_cmodel == CM_SMALL
3590 && offset < 1024*1024*1024
3591 && trunc_int_for_mode (offset, SImode) == offset)
3593 /* For CM_KERNEL we know that all object resist in the
3594 negative half of 32bits address space. We may not
3595 accept negative offsets, since they may be just off
3596 and we may accept pretty large positive ones. */
3597 if (ix86_cmodel == CM_KERNEL
3599 && trunc_int_for_mode (offset, SImode) == offset)
3603 /* These conditions are similar to SYMBOL_REF ones, just the
3604 constraints for code models differ. */
3605 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3606 && offset < 1024*1024*1024
3607 && trunc_int_for_mode (offset, SImode) == offset)
3609 if (ix86_cmodel == CM_KERNEL
3611 && trunc_int_for_mode (offset, SImode) == offset)
3624 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3626 x86_64_zero_extended_value (value)
3629 switch (GET_CODE (value))
3632 if (HOST_BITS_PER_WIDE_INT == 32)
3633 return (GET_MODE (value) == VOIDmode
3634 && !CONST_DOUBLE_HIGH (value));
3638 if (HOST_BITS_PER_WIDE_INT == 32)
3639 return INTVAL (value) >= 0;
3641 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3644 /* For certain code models, the symbolic references are known to fit. */
3646 return ix86_cmodel == CM_SMALL;
3648 /* For certain code models, the code is near as well. */
3650 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3652 /* We also may accept the offsetted memory references in certain special
3655 if (GET_CODE (XEXP (value, 0)) == PLUS)
3657 rtx op1 = XEXP (XEXP (value, 0), 0);
3658 rtx op2 = XEXP (XEXP (value, 0), 1);
3660 if (ix86_cmodel == CM_LARGE)
3662 switch (GET_CODE (op1))
3666 /* For small code model we may accept pretty large positive
3667 offsets, since one bit is available for free. Negative
3668 offsets are limited by the size of NULL pointer area
3669 specified by the ABI. */
3670 if (ix86_cmodel == CM_SMALL
3671 && GET_CODE (op2) == CONST_INT
3672 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3673 && (trunc_int_for_mode (INTVAL (op2), SImode)
3676 /* ??? For the kernel, we may accept adjustment of
3677 -0x10000000, since we know that it will just convert
3678 negative address space to positive, but perhaps this
3679 is not worthwhile. */
3682 /* These conditions are similar to SYMBOL_REF ones, just the
3683 constraints for code models differ. */
3684 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3685 && GET_CODE (op2) == CONST_INT
3686 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3687 && (trunc_int_for_mode (INTVAL (op2), SImode)
3701 /* Value should be nonzero if functions must have frame pointers.
3702 Zero means the frame pointer need not be set up (and parms may
3703 be accessed via the stack pointer) in functions that seem suitable. */
3706 ix86_frame_pointer_required ()
3708 /* If we accessed previous frames, then the generated code expects
3709 to be able to access the saved ebp value in our frame. */
3710 if (cfun->machine->accesses_prev_frame)
3713 /* Several x86 os'es need a frame pointer for other reasons,
3714 usually pertaining to setjmp. */
3715 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3718 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3719 the frame pointer by default. Turn it back on now if we've not
3720 got a leaf function. */
3721 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3727 /* Record that the current function accesses previous call frames. */
3730 ix86_setup_frame_addresses ()
3732 cfun->machine->accesses_prev_frame = 1;
3735 static char pic_label_name[32];
3737 /* This function generates code for -fpic that loads %ebx with
3738 the return address of the caller and then returns. */
3741 ix86_asm_file_end (file)
3746 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3749 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3750 to updating relocations to a section being discarded such that this
3751 doesn't work. Ought to detect this at configure time. */
3753 /* The trick here is to create a linkonce section containing the
3754 pic label thunk, but to refer to it with an internal label.
3755 Because the label is internal, we don't have inter-dso name
3756 binding issues on hosts that don't support ".hidden".
3758 In order to use these macros, however, we must create a fake
3760 if (targetm.have_named_sections)
3762 tree decl = build_decl (FUNCTION_DECL,
3763 get_identifier ("i686.get_pc_thunk"),
3765 DECL_ONE_ONLY (decl) = 1;
3766 UNIQUE_SECTION (decl, 0);
3767 named_section (decl, NULL);
3774 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3775 internal (non-global) label that's being emitted, it didn't make
3776 sense to have .type information for local labels. This caused
3777 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3778 me debug info for a label that you're declaring non-global?) this
3779 was changed to call ASM_OUTPUT_LABEL() instead. */
3781 ASM_OUTPUT_LABEL (file, pic_label_name);
3783 xops[0] = pic_offset_table_rtx;
3784 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3785 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3786 output_asm_insn ("ret", xops);
3790 load_pic_register ()
3797 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3799 if (TARGET_DEEP_BRANCH_PREDICTION)
3801 if (! pic_label_name[0])
3802 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3803 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3807 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3810 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3812 if (! TARGET_DEEP_BRANCH_PREDICTION)
3813 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3815 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3818 /* Generate an "push" pattern for input ARG. */
3824 return gen_rtx_SET (VOIDmode,
3826 gen_rtx_PRE_DEC (Pmode,
3827 stack_pointer_rtx)),
3831 /* Return 1 if we need to save REGNO. */
3833 ix86_save_reg (regno, maybe_eh_return)
3835 int maybe_eh_return;
3839 && regno == PIC_OFFSET_TABLE_REGNUM
3840 && (current_function_uses_pic_offset_table
3841 || current_function_uses_const_pool
3842 || current_function_calls_eh_return))
3845 if (current_function_calls_eh_return && maybe_eh_return)
3850 unsigned test = EH_RETURN_DATA_REGNO(i);
3851 if (test == INVALID_REGNUM)
3853 if (test == (unsigned) regno)
3858 return (regs_ever_live[regno]
3859 && !call_used_regs[regno]
3860 && !fixed_regs[regno]
3861 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3864 /* Return number of registers to be saved on the stack. */
3872 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3873 if (ix86_save_reg (regno, true))
3878 /* Return the offset between two registers, one to be eliminated, and the other
3879 its replacement, at the start of a routine. */
3882 ix86_initial_elimination_offset (from, to)
3886 struct ix86_frame frame;
3887 ix86_compute_frame_layout (&frame);
3889 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3890 return frame.hard_frame_pointer_offset;
3891 else if (from == FRAME_POINTER_REGNUM
3892 && to == HARD_FRAME_POINTER_REGNUM)
3893 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3896 if (to != STACK_POINTER_REGNUM)
3898 else if (from == ARG_POINTER_REGNUM)
3899 return frame.stack_pointer_offset;
3900 else if (from != FRAME_POINTER_REGNUM)
3903 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3907 /* Fill structure ix86_frame about frame of currently computed function. */
3910 ix86_compute_frame_layout (frame)
3911 struct ix86_frame *frame;
3913 HOST_WIDE_INT total_size;
3914 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3916 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3917 HOST_WIDE_INT size = get_frame_size ();
3919 frame->nregs = ix86_nsaved_regs ();
3922 /* Skip return value and save base pointer. */
3923 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3925 frame->hard_frame_pointer_offset = offset;
3927 /* Do some sanity checking of stack_alignment_needed and
3928 preferred_alignment, since i386 port is the only using those features
3929 that may break easily. */
3931 if (size && !stack_alignment_needed)
3933 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3935 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3937 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3940 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3941 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3943 /* Register save area */
3944 offset += frame->nregs * UNITS_PER_WORD;
3947 if (ix86_save_varrargs_registers)
3949 offset += X86_64_VARARGS_SIZE;
3950 frame->va_arg_size = X86_64_VARARGS_SIZE;
3953 frame->va_arg_size = 0;
3955 /* Align start of frame for local function. */
3956 frame->padding1 = ((offset + stack_alignment_needed - 1)
3957 & -stack_alignment_needed) - offset;
3959 offset += frame->padding1;
3961 /* Frame pointer points here. */
3962 frame->frame_pointer_offset = offset;
3966 /* Add outgoing arguments area. */
3967 if (ACCUMULATE_OUTGOING_ARGS)
3969 offset += current_function_outgoing_args_size;
3970 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3973 frame->outgoing_arguments_size = 0;
3975 /* Align stack boundary. */
3976 frame->padding2 = ((offset + preferred_alignment - 1)
3977 & -preferred_alignment) - offset;
3979 offset += frame->padding2;
3981 /* We've reached end of stack frame. */
3982 frame->stack_pointer_offset = offset;
3984 /* Size prologue needs to allocate. */
3985 frame->to_allocate =
3986 (size + frame->padding1 + frame->padding2
3987 + frame->outgoing_arguments_size + frame->va_arg_size);
3989 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
3990 && current_function_is_leaf)
3992 frame->red_zone_size = frame->to_allocate;
3993 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
3994 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
3997 frame->red_zone_size = 0;
3998 frame->to_allocate -= frame->red_zone_size;
3999 frame->stack_pointer_offset -= frame->red_zone_size;
4001 fprintf (stderr, "nregs: %i\n", frame->nregs);
4002 fprintf (stderr, "size: %i\n", size);
4003 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4004 fprintf (stderr, "padding1: %i\n", frame->padding1);
4005 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4006 fprintf (stderr, "padding2: %i\n", frame->padding2);
4007 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4008 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4009 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4010 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4011 frame->hard_frame_pointer_offset);
4012 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4016 /* Emit code to save registers in the prologue. */
4019 ix86_emit_save_regs ()
4024 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4025 if (ix86_save_reg (regno, true))
4027 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4028 RTX_FRAME_RELATED_P (insn) = 1;
4032 /* Emit code to save registers using MOV insns. First register
4033 is restored from POINTER + OFFSET. */
4035 ix86_emit_save_regs_using_mov (pointer, offset)
4037 HOST_WIDE_INT offset;
4042 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4043 if (ix86_save_reg (regno, true))
4045 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4047 gen_rtx_REG (Pmode, regno));
4048 RTX_FRAME_RELATED_P (insn) = 1;
4049 offset += UNITS_PER_WORD;
4053 /* Expand the prologue into a bunch of separate insns. */
4056 ix86_expand_prologue ()
4059 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4060 || current_function_uses_const_pool)
4062 struct ix86_frame frame;
4064 HOST_WIDE_INT allocate;
4068 use_fast_prologue_epilogue
4069 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4070 if (TARGET_PROLOGUE_USING_MOVE)
4071 use_mov = use_fast_prologue_epilogue;
4073 ix86_compute_frame_layout (&frame);
4075 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4076 slower on all targets. Also sdb doesn't like it. */
4078 if (frame_pointer_needed)
4080 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4081 RTX_FRAME_RELATED_P (insn) = 1;
4083 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4084 RTX_FRAME_RELATED_P (insn) = 1;
4087 allocate = frame.to_allocate;
4088 /* In case we are dealing only with single register and empty frame,
4089 push is equivalent of the mov+add sequence. */
4090 if (allocate == 0 && frame.nregs <= 1)
4094 ix86_emit_save_regs ();
4096 allocate += frame.nregs * UNITS_PER_WORD;
4100 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4102 insn = emit_insn (gen_pro_epilogue_adjust_stack
4103 (stack_pointer_rtx, stack_pointer_rtx,
4104 GEN_INT (-allocate)));
4105 RTX_FRAME_RELATED_P (insn) = 1;
4109 /* ??? Is this only valid for Win32? */
4116 arg0 = gen_rtx_REG (SImode, 0);
4117 emit_move_insn (arg0, GEN_INT (allocate));
4119 sym = gen_rtx_MEM (FUNCTION_MODE,
4120 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4121 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4123 CALL_INSN_FUNCTION_USAGE (insn)
4124 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4125 CALL_INSN_FUNCTION_USAGE (insn));
4129 if (!frame_pointer_needed || !frame.to_allocate)
4130 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4132 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4133 -frame.nregs * UNITS_PER_WORD);
4136 #ifdef SUBTARGET_PROLOGUE
4141 load_pic_register ();
4143 /* If we are profiling, make sure no instructions are scheduled before
4144 the call to mcount. However, if -fpic, the above call will have
4146 if (profile_flag && ! pic_reg_used)
4147 emit_insn (gen_blockage ());
4150 /* Emit code to restore saved registers using MOV insns. First register
4151 is restored from POINTER + OFFSET. */
4153 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4156 int maybe_eh_return;
4160 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4161 if (ix86_save_reg (regno, maybe_eh_return))
4163 emit_move_insn (gen_rtx_REG (Pmode, regno),
4164 adjust_address (gen_rtx_MEM (Pmode, pointer),
4166 offset += UNITS_PER_WORD;
4170 /* Restore function stack, frame, and registers. */
4173 ix86_expand_epilogue (style)
4177 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4178 struct ix86_frame frame;
4179 HOST_WIDE_INT offset;
4181 ix86_compute_frame_layout (&frame);
4183 /* Calculate start of saved registers relative to ebp. Special care
4184 must be taken for the normal return case of a function using
4185 eh_return: the eax and edx registers are marked as saved, but not
4186 restored along this path. */
4187 offset = frame.nregs;
4188 if (current_function_calls_eh_return && style != 2)
4190 offset *= -UNITS_PER_WORD;
4192 /* If we're only restoring one register and sp is not valid then
4193 using a move instruction to restore the register since it's
4194 less work than reloading sp and popping the register.
4196 The default code result in stack adjustment using add/lea instruction,
4197 while this code results in LEAVE instruction (or discrete equivalent),
4198 so it is profitable in some other cases as well. Especially when there
4199 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4200 and there is exactly one register to pop. This heruistic may need some
4201 tuning in future. */
4202 if ((!sp_valid && frame.nregs <= 1)
4203 || (TARGET_EPILOGUE_USING_MOVE
4204 && use_fast_prologue_epilogue
4205 && (frame.nregs > 1 || frame.to_allocate))
4206 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4207 || (frame_pointer_needed && TARGET_USE_LEAVE
4208 && use_fast_prologue_epilogue && frame.nregs == 1)
4209 || current_function_calls_eh_return)
4211 /* Restore registers. We can use ebp or esp to address the memory
4212 locations. If both are available, default to ebp, since offsets
4213 are known to be small. Only exception is esp pointing directly to the
4214 end of block of saved registers, where we may simplify addressing
4217 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4218 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4219 frame.to_allocate, style == 2);
4221 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4222 offset, style == 2);
4224 /* eh_return epilogues need %ecx added to the stack pointer. */
4227 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4229 if (frame_pointer_needed)
4231 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4232 tmp = plus_constant (tmp, UNITS_PER_WORD);
4233 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4235 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4236 emit_move_insn (hard_frame_pointer_rtx, tmp);
4238 emit_insn (gen_pro_epilogue_adjust_stack
4239 (stack_pointer_rtx, sa, const0_rtx));
4243 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4244 tmp = plus_constant (tmp, (frame.to_allocate
4245 + frame.nregs * UNITS_PER_WORD));
4246 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4249 else if (!frame_pointer_needed)
4250 emit_insn (gen_pro_epilogue_adjust_stack
4251 (stack_pointer_rtx, stack_pointer_rtx,
4252 GEN_INT (frame.to_allocate
4253 + frame.nregs * UNITS_PER_WORD)));
4254 /* If not an i386, mov & pop is faster than "leave". */
4255 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4256 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4259 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4260 hard_frame_pointer_rtx,
4263 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4265 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4270 /* First step is to deallocate the stack frame so that we can
4271 pop the registers. */
4274 if (!frame_pointer_needed)
4276 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4277 hard_frame_pointer_rtx,
4280 else if (frame.to_allocate)
4281 emit_insn (gen_pro_epilogue_adjust_stack
4282 (stack_pointer_rtx, stack_pointer_rtx,
4283 GEN_INT (frame.to_allocate)));
4285 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4286 if (ix86_save_reg (regno, false))
4289 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4291 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4293 if (frame_pointer_needed)
4295 /* Leave results in shorter dependency chains on CPUs that are
4296 able to grok it fast. */
4297 if (TARGET_USE_LEAVE)
4298 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4299 else if (TARGET_64BIT)
4300 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4302 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4306 /* Sibcall epilogues don't want a return instruction. */
4310 if (current_function_pops_args && current_function_args_size)
4312 rtx popc = GEN_INT (current_function_pops_args);
4314 /* i386 can only pop 64K bytes. If asked to pop more, pop
4315 return address, do explicit add, and jump indirectly to the
4318 if (current_function_pops_args >= 65536)
4320 rtx ecx = gen_rtx_REG (SImode, 2);
4322 /* There are is no "pascal" calling convention in 64bit ABI. */
4326 emit_insn (gen_popsi1 (ecx));
4327 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4328 emit_jump_insn (gen_return_indirect_internal (ecx));
4331 emit_jump_insn (gen_return_pop_internal (popc));
4334 emit_jump_insn (gen_return_internal ());
4337 /* Extract the parts of an RTL expression that is a valid memory address
4338 for an instruction. Return false if the structure of the address is
4342 ix86_decompose_address (addr, out)
4344 struct ix86_address *out;
4346 rtx base = NULL_RTX;
4347 rtx index = NULL_RTX;
4348 rtx disp = NULL_RTX;
4349 HOST_WIDE_INT scale = 1;
4350 rtx scale_rtx = NULL_RTX;
4352 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4354 else if (GET_CODE (addr) == PLUS)
4356 rtx op0 = XEXP (addr, 0);
4357 rtx op1 = XEXP (addr, 1);
4358 enum rtx_code code0 = GET_CODE (op0);
4359 enum rtx_code code1 = GET_CODE (op1);
4361 if (code0 == REG || code0 == SUBREG)
4363 if (code1 == REG || code1 == SUBREG)
4364 index = op0, base = op1; /* index + base */
4366 base = op0, disp = op1; /* base + displacement */
4368 else if (code0 == MULT)
4370 index = XEXP (op0, 0);
4371 scale_rtx = XEXP (op0, 1);
4372 if (code1 == REG || code1 == SUBREG)
4373 base = op1; /* index*scale + base */
4375 disp = op1; /* index*scale + disp */
4377 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4379 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4380 scale_rtx = XEXP (XEXP (op0, 0), 1);
4381 base = XEXP (op0, 1);
4384 else if (code0 == PLUS)
4386 index = XEXP (op0, 0); /* index + base + disp */
4387 base = XEXP (op0, 1);
4393 else if (GET_CODE (addr) == MULT)
4395 index = XEXP (addr, 0); /* index*scale */
4396 scale_rtx = XEXP (addr, 1);
4398 else if (GET_CODE (addr) == ASHIFT)
4402 /* We're called for lea too, which implements ashift on occasion. */
4403 index = XEXP (addr, 0);
4404 tmp = XEXP (addr, 1);
4405 if (GET_CODE (tmp) != CONST_INT)
4407 scale = INTVAL (tmp);
4408 if ((unsigned HOST_WIDE_INT) scale > 3)
4413 disp = addr; /* displacement */
4415 /* Extract the integral value of scale. */
4418 if (GET_CODE (scale_rtx) != CONST_INT)
4420 scale = INTVAL (scale_rtx);
4423 /* Allow arg pointer and stack pointer as index if there is not scaling */
4424 if (base && index && scale == 1
4425 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4426 || index == stack_pointer_rtx))
4433 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4434 if ((base == hard_frame_pointer_rtx
4435 || base == frame_pointer_rtx
4436 || base == arg_pointer_rtx) && !disp)
4439 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4440 Avoid this by transforming to [%esi+0]. */
4441 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4442 && base && !index && !disp
4444 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4447 /* Special case: encode reg+reg instead of reg*2. */
4448 if (!base && index && scale && scale == 2)
4449 base = index, scale = 1;
4451 /* Special case: scaling cannot be encoded without base or displacement. */
4452 if (!base && !disp && index && scale != 1)
4463 /* Return cost of the memory address x.
4464 For i386, it is better to use a complex address than let gcc copy
4465 the address into a reg and make a new pseudo. But not if the address
4466 requires to two regs - that would mean more pseudos with longer
4469 ix86_address_cost (x)
4472 struct ix86_address parts;
4475 if (!ix86_decompose_address (x, &parts))
4478 /* More complex memory references are better. */
4479 if (parts.disp && parts.disp != const0_rtx)
4482 /* Attempt to minimize number of registers in the address. */
4484 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4486 && (!REG_P (parts.index)
4487 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4491 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4493 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4494 && parts.base != parts.index)
4497 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4498 since it's predecode logic can't detect the length of instructions
4499 and it degenerates to vector decoded. Increase cost of such
4500 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4501 to split such addresses or even refuse such addresses at all.
4503 Following addressing modes are affected:
4508 The first and last case may be avoidable by explicitly coding the zero in
4509 memory address, but I don't have AMD-K6 machine handy to check this
4513 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4514 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4515 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4521 /* If X is a machine specific address (i.e. a symbol or label being
4522 referenced as a displacement from the GOT implemented using an
4523 UNSPEC), then return the base term. Otherwise return X. */
4526 ix86_find_base_term (x)
4533 if (GET_CODE (x) != CONST)
4536 if (GET_CODE (term) == PLUS
4537 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4538 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4539 term = XEXP (term, 0);
4540 if (GET_CODE (term) != UNSPEC
4541 || XVECLEN (term, 0) != 1
4542 || XINT (term, 1) != 15)
4545 term = XVECEXP (term, 0, 0);
4547 if (GET_CODE (term) != SYMBOL_REF
4548 && GET_CODE (term) != LABEL_REF)
4554 if (GET_CODE (x) != PLUS
4555 || XEXP (x, 0) != pic_offset_table_rtx
4556 || GET_CODE (XEXP (x, 1)) != CONST)
4559 term = XEXP (XEXP (x, 1), 0);
4561 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4562 term = XEXP (term, 0);
4564 if (GET_CODE (term) != UNSPEC
4565 || XVECLEN (term, 0) != 1
4566 || XINT (term, 1) != 7)
4569 term = XVECEXP (term, 0, 0);
4571 if (GET_CODE (term) != SYMBOL_REF
4572 && GET_CODE (term) != LABEL_REF)
4578 /* Determine if a given CONST RTX is a valid memory displacement
4582 legitimate_pic_address_disp_p (disp)
4585 /* In 64bit mode we can allow direct addresses of symbols and labels
4586 when they are not dynamic symbols. */
4590 if (GET_CODE (disp) == CONST)
4592 /* ??? Handle PIC code models */
4593 if (GET_CODE (x) == PLUS
4594 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4595 && ix86_cmodel == CM_SMALL_PIC
4596 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4597 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4599 if (local_symbolic_operand (x, Pmode))
4602 if (GET_CODE (disp) != CONST)
4604 disp = XEXP (disp, 0);
4608 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4609 of GOT tables. We should not need these anyway. */
4610 if (GET_CODE (disp) != UNSPEC
4611 || XVECLEN (disp, 0) != 1
4612 || XINT (disp, 1) != 15)
4615 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4616 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4621 if (GET_CODE (disp) == PLUS)
4623 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4625 disp = XEXP (disp, 0);
4628 if (GET_CODE (disp) != UNSPEC
4629 || XVECLEN (disp, 0) != 1)
4632 /* Must be @GOT or @GOTOFF. */
4633 switch (XINT (disp, 1))
4636 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4638 case 7: /* @GOTOFF */
4639 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4645 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4646 memory address for an instruction. The MODE argument is the machine mode
4647 for the MEM expression that wants to use this address.
4649 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4650 convert common non-canonical forms to canonical form so that they will
4654 legitimate_address_p (mode, addr, strict)
4655 enum machine_mode mode;
4659 struct ix86_address parts;
4660 rtx base, index, disp;
4661 HOST_WIDE_INT scale;
4662 const char *reason = NULL;
4663 rtx reason_rtx = NULL_RTX;
4665 if (TARGET_DEBUG_ADDR)
4668 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4669 GET_MODE_NAME (mode), strict);
4673 if (! ix86_decompose_address (addr, &parts))
4675 reason = "decomposition failed";
4680 index = parts.index;
4682 scale = parts.scale;
4684 /* Validate base register.
4686 Don't allow SUBREG's here, it can lead to spill failures when the base
4687 is one word out of a two word structure, which is represented internally
4694 if (GET_CODE (base) != REG)
4696 reason = "base is not a register";
4700 if (GET_MODE (base) != Pmode)
4702 reason = "base is not in Pmode";
4706 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4707 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4709 reason = "base is not valid";
4714 /* Validate index register.
4716 Don't allow SUBREG's here, it can lead to spill failures when the index
4717 is one word out of a two word structure, which is represented internally
4724 if (GET_CODE (index) != REG)
4726 reason = "index is not a register";
4730 if (GET_MODE (index) != Pmode)
4732 reason = "index is not in Pmode";
4736 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4737 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4739 reason = "index is not valid";
4744 /* Validate scale factor. */
4747 reason_rtx = GEN_INT (scale);
4750 reason = "scale without index";
4754 if (scale != 2 && scale != 4 && scale != 8)
4756 reason = "scale is not a valid multiplier";
4761 /* Validate displacement. */
4766 if (!CONSTANT_ADDRESS_P (disp))
4768 reason = "displacement is not constant";
4774 if (!x86_64_sign_extended_value (disp))
4776 reason = "displacement is out of range";
4782 if (GET_CODE (disp) == CONST_DOUBLE)
4784 reason = "displacement is a const_double";
4789 if (flag_pic && SYMBOLIC_CONST (disp))
4791 if (TARGET_64BIT && (index || base))
4793 reason = "non-constant pic memory reference";
4796 if (! legitimate_pic_address_disp_p (disp))
4798 reason = "displacement is an invalid pic construct";
4802 /* This code used to verify that a symbolic pic displacement
4803 includes the pic_offset_table_rtx register.
4805 While this is good idea, unfortunately these constructs may
4806 be created by "adds using lea" optimization for incorrect
4815 This code is nonsensical, but results in addressing
4816 GOT table with pic_offset_table_rtx base. We can't
4817 just refuse it easily, since it gets matched by
4818 "addsi3" pattern, that later gets split to lea in the
4819 case output register differs from input. While this
4820 can be handled by separate addsi pattern for this case
4821 that never results in lea, this seems to be easier and
4822 correct fix for crash to disable this test. */
4824 else if (HALF_PIC_P ())
4826 if (! HALF_PIC_ADDRESS_P (disp)
4827 || (base != NULL_RTX || index != NULL_RTX))
4829 reason = "displacement is an invalid half-pic reference";
4835 /* Everything looks valid. */
4836 if (TARGET_DEBUG_ADDR)
4837 fprintf (stderr, "Success.\n");
4841 if (TARGET_DEBUG_ADDR)
4843 fprintf (stderr, "Error: %s\n", reason);
4844 debug_rtx (reason_rtx);
4849 /* Return an unique alias set for the GOT. */
4851 static HOST_WIDE_INT
4852 ix86_GOT_alias_set ()
4854 static HOST_WIDE_INT set = -1;
4856 set = new_alias_set ();
4860 /* Return a legitimate reference for ORIG (an address) using the
4861 register REG. If REG is 0, a new pseudo is generated.
4863 There are two types of references that must be handled:
4865 1. Global data references must load the address from the GOT, via
4866 the PIC reg. An insn is emitted to do this load, and the reg is
4869 2. Static data references, constant pool addresses, and code labels
4870 compute the address as an offset from the GOT, whose base is in
4871 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4872 differentiate them from global data objects. The returned
4873 address is the PIC reg + an unspec constant.
4875 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4876 reg also appears in the address. */
4879 legitimize_pic_address (orig, reg)
4887 if (local_symbolic_operand (addr, Pmode))
4889 /* In 64bit mode we can address such objects directly. */
4894 /* This symbol may be referenced via a displacement from the PIC
4895 base address (@GOTOFF). */
4897 current_function_uses_pic_offset_table = 1;
4898 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4899 new = gen_rtx_CONST (Pmode, new);
4900 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4904 emit_move_insn (reg, new);
4909 else if (GET_CODE (addr) == SYMBOL_REF)
4913 current_function_uses_pic_offset_table = 1;
4914 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4915 new = gen_rtx_CONST (Pmode, new);
4916 new = gen_rtx_MEM (Pmode, new);
4917 RTX_UNCHANGING_P (new) = 1;
4918 set_mem_alias_set (new, ix86_GOT_alias_set ());
4921 reg = gen_reg_rtx (Pmode);
4922 /* Use directly gen_movsi, otherwise the address is loaded
4923 into register for CSE. We don't want to CSE this addresses,
4924 instead we CSE addresses from the GOT table, so skip this. */
4925 emit_insn (gen_movsi (reg, new));
4930 /* This symbol must be referenced via a load from the
4931 Global Offset Table (@GOT). */
4933 current_function_uses_pic_offset_table = 1;
4934 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4935 new = gen_rtx_CONST (Pmode, new);
4936 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4937 new = gen_rtx_MEM (Pmode, new);
4938 RTX_UNCHANGING_P (new) = 1;
4939 set_mem_alias_set (new, ix86_GOT_alias_set ());
4942 reg = gen_reg_rtx (Pmode);
4943 emit_move_insn (reg, new);
4949 if (GET_CODE (addr) == CONST)
4951 addr = XEXP (addr, 0);
4952 if (GET_CODE (addr) == UNSPEC)
4954 /* Check that the unspec is one of the ones we generate? */
4956 else if (GET_CODE (addr) != PLUS)
4959 if (GET_CODE (addr) == PLUS)
4961 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4963 /* Check first to see if this is a constant offset from a @GOTOFF
4964 symbol reference. */
4965 if (local_symbolic_operand (op0, Pmode)
4966 && GET_CODE (op1) == CONST_INT)
4970 current_function_uses_pic_offset_table = 1;
4971 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4972 new = gen_rtx_PLUS (Pmode, new, op1);
4973 new = gen_rtx_CONST (Pmode, new);
4974 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4978 emit_move_insn (reg, new);
4984 /* ??? We need to limit offsets here. */
4989 base = legitimize_pic_address (XEXP (addr, 0), reg);
4990 new = legitimize_pic_address (XEXP (addr, 1),
4991 base == reg ? NULL_RTX : reg);
4993 if (GET_CODE (new) == CONST_INT)
4994 new = plus_constant (base, INTVAL (new));
4997 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
4999 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5000 new = XEXP (new, 1);
5002 new = gen_rtx_PLUS (Pmode, base, new);
5010 /* Try machine-dependent ways of modifying an illegitimate address
5011 to be legitimate. If we find one, return the new, valid address.
5012 This macro is used in only one place: `memory_address' in explow.c.
5014 OLDX is the address as it was before break_out_memory_refs was called.
5015 In some cases it is useful to look at this to decide what needs to be done.
5017 MODE and WIN are passed so that this macro can use
5018 GO_IF_LEGITIMATE_ADDRESS.
5020 It is always safe for this macro to do nothing. It exists to recognize
5021 opportunities to optimize the output.
5023 For the 80386, we handle X+REG by loading X into a register R and
5024 using R+REG. R will go in a general reg and indexing will be used.
5025 However, if REG is a broken-out memory address or multiplication,
5026 nothing needs to be done because REG can certainly go in a general reg.
5028 When -fpic is used, special handling is needed for symbolic references.
5029 See comments by legitimize_pic_address in i386.c for details. */
5032 legitimize_address (x, oldx, mode)
5034 register rtx oldx ATTRIBUTE_UNUSED;
5035 enum machine_mode mode;
5040 if (TARGET_DEBUG_ADDR)
5042 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5043 GET_MODE_NAME (mode));
5047 if (flag_pic && SYMBOLIC_CONST (x))
5048 return legitimize_pic_address (x, 0);
5050 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5051 if (GET_CODE (x) == ASHIFT
5052 && GET_CODE (XEXP (x, 1)) == CONST_INT
5053 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5056 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5057 GEN_INT (1 << log));
5060 if (GET_CODE (x) == PLUS)
5062 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5064 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5065 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5066 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5069 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5070 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5071 GEN_INT (1 << log));
5074 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5075 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5076 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5079 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5080 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5081 GEN_INT (1 << log));
5084 /* Put multiply first if it isn't already. */
5085 if (GET_CODE (XEXP (x, 1)) == MULT)
5087 rtx tmp = XEXP (x, 0);
5088 XEXP (x, 0) = XEXP (x, 1);
5093 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5094 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5095 created by virtual register instantiation, register elimination, and
5096 similar optimizations. */
5097 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5100 x = gen_rtx_PLUS (Pmode,
5101 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5102 XEXP (XEXP (x, 1), 0)),
5103 XEXP (XEXP (x, 1), 1));
5107 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5108 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5109 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5110 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5111 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5112 && CONSTANT_P (XEXP (x, 1)))
5115 rtx other = NULL_RTX;
5117 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5119 constant = XEXP (x, 1);
5120 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5122 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5124 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5125 other = XEXP (x, 1);
5133 x = gen_rtx_PLUS (Pmode,
5134 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5135 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5136 plus_constant (other, INTVAL (constant)));
5140 if (changed && legitimate_address_p (mode, x, FALSE))
5143 if (GET_CODE (XEXP (x, 0)) == MULT)
5146 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5149 if (GET_CODE (XEXP (x, 1)) == MULT)
5152 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5156 && GET_CODE (XEXP (x, 1)) == REG
5157 && GET_CODE (XEXP (x, 0)) == REG)
5160 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5163 x = legitimize_pic_address (x, 0);
5166 if (changed && legitimate_address_p (mode, x, FALSE))
5169 if (GET_CODE (XEXP (x, 0)) == REG)
5171 register rtx temp = gen_reg_rtx (Pmode);
5172 register rtx val = force_operand (XEXP (x, 1), temp);
5174 emit_move_insn (temp, val);
5180 else if (GET_CODE (XEXP (x, 1)) == REG)
5182 register rtx temp = gen_reg_rtx (Pmode);
5183 register rtx val = force_operand (XEXP (x, 0), temp);
5185 emit_move_insn (temp, val);
5195 /* Print an integer constant expression in assembler syntax. Addition
5196 and subtraction are the only arithmetic that may appear in these
5197 expressions. FILE is the stdio stream to write to, X is the rtx, and
5198 CODE is the operand print code from the output string. */
5201 output_pic_addr_const (file, x, code)
5208 switch (GET_CODE (x))
5218 assemble_name (file, XSTR (x, 0));
5219 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5220 fputs ("@PLT", file);
5227 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5228 assemble_name (asm_out_file, buf);
5232 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5236 /* This used to output parentheses around the expression,
5237 but that does not work on the 386 (either ATT or BSD assembler). */
5238 output_pic_addr_const (file, XEXP (x, 0), code);
5242 if (GET_MODE (x) == VOIDmode)
5244 /* We can use %d if the number is <32 bits and positive. */
5245 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5246 fprintf (file, "0x%lx%08lx",
5247 (unsigned long) CONST_DOUBLE_HIGH (x),
5248 (unsigned long) CONST_DOUBLE_LOW (x));
5250 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5253 /* We can't handle floating point constants;
5254 PRINT_OPERAND must handle them. */
5255 output_operand_lossage ("floating constant misused");
5259 /* Some assemblers need integer constants to appear first. */
5260 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5262 output_pic_addr_const (file, XEXP (x, 0), code);
5264 output_pic_addr_const (file, XEXP (x, 1), code);
5266 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5268 output_pic_addr_const (file, XEXP (x, 1), code);
5270 output_pic_addr_const (file, XEXP (x, 0), code);
5277 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
5278 output_pic_addr_const (file, XEXP (x, 0), code);
5280 output_pic_addr_const (file, XEXP (x, 1), code);
5281 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
5285 if (XVECLEN (x, 0) != 1)
5287 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5288 switch (XINT (x, 1))
5291 fputs ("@GOT", file);
5294 fputs ("@GOTOFF", file);
5297 fputs ("@PLT", file);
5300 fputs ("@GOTPCREL(%RIP)", file);
5303 output_operand_lossage ("invalid UNSPEC as operand");
5309 output_operand_lossage ("invalid expression as operand");
5313 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5314 We need to handle our special PIC relocations. */
5317 i386_dwarf_output_addr_const (file, x)
5322 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5326 fprintf (file, "%s", ASM_LONG);
5329 output_pic_addr_const (file, x, '\0');
5331 output_addr_const (file, x);
5335 /* In the name of slightly smaller debug output, and to cater to
5336 general assembler losage, recognize PIC+GOTOFF and turn it back
5337 into a direct symbol reference. */
5340 i386_simplify_dwarf_addr (orig_x)
5347 if (GET_CODE (x) != CONST
5348 || GET_CODE (XEXP (x, 0)) != UNSPEC
5349 || XINT (XEXP (x, 0), 1) != 15)
5351 return XVECEXP (XEXP (x, 0), 0, 0);
5354 if (GET_CODE (x) != PLUS
5355 || GET_CODE (XEXP (x, 0)) != REG
5356 || GET_CODE (XEXP (x, 1)) != CONST)
5359 x = XEXP (XEXP (x, 1), 0);
5360 if (GET_CODE (x) == UNSPEC
5361 && (XINT (x, 1) == 6
5362 || XINT (x, 1) == 7))
5363 return XVECEXP (x, 0, 0);
5365 if (GET_CODE (x) == PLUS
5366 && GET_CODE (XEXP (x, 0)) == UNSPEC
5367 && GET_CODE (XEXP (x, 1)) == CONST_INT
5368 && (XINT (XEXP (x, 0), 1) == 6
5369 || XINT (XEXP (x, 0), 1) == 7))
5370 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5376 put_condition_code (code, mode, reverse, fp, file)
5378 enum machine_mode mode;
5384 if (mode == CCFPmode || mode == CCFPUmode)
5386 enum rtx_code second_code, bypass_code;
5387 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5388 if (bypass_code != NIL || second_code != NIL)
5390 code = ix86_fp_compare_code_to_integer (code);
5394 code = reverse_condition (code);
5405 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5410 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5411 Those same assemblers have the same but opposite losage on cmov. */
5414 suffix = fp ? "nbe" : "a";
5417 if (mode == CCNOmode || mode == CCGOCmode)
5419 else if (mode == CCmode || mode == CCGCmode)
5430 if (mode == CCNOmode || mode == CCGOCmode)
5432 else if (mode == CCmode || mode == CCGCmode)
5441 suffix = fp ? "nb" : "ae";
5444 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5454 suffix = fp ? "u" : "p";
5457 suffix = fp ? "nu" : "np";
5462 fputs (suffix, file);
5466 print_reg (x, code, file)
5471 if (REGNO (x) == ARG_POINTER_REGNUM
5472 || REGNO (x) == FRAME_POINTER_REGNUM
5473 || REGNO (x) == FLAGS_REG
5474 || REGNO (x) == FPSR_REG)
5477 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
5480 if (code == 'w' || MMX_REG_P (x))
5482 else if (code == 'b')
5484 else if (code == 'k')
5486 else if (code == 'q')
5488 else if (code == 'y')
5490 else if (code == 'h')
5493 code = GET_MODE_SIZE (GET_MODE (x));
5495 /* Irritatingly, AMD extended registers use different naming convention
5496 from the normal registers. */
5497 if (REX_INT_REG_P (x))
5504 error ("extended registers have no high halves");
5507 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5510 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5513 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5516 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5519 error ("unsupported operand size for extended register");
5527 if (STACK_TOP_P (x))
5529 fputs ("st(0)", file);
5536 if (! ANY_FP_REG_P (x))
5537 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5541 fputs (hi_reg_name[REGNO (x)], file);
5544 fputs (qi_reg_name[REGNO (x)], file);
5547 fputs (qi_high_reg_name[REGNO (x)], file);
5555 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5556 C -- print opcode suffix for set/cmov insn.
5557 c -- like C, but print reversed condition
5558 F,f -- likewise, but for floating-point.
5559 R -- print the prefix for register names.
5560 z -- print the opcode suffix for the size of the current operand.
5561 * -- print a star (in certain assembler syntax)
5562 A -- print an absolute memory reference.
5563 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5564 s -- print a shift double count, followed by the assemblers argument
5566 b -- print the QImode name of the register for the indicated operand.
5567 %b0 would print %al if operands[0] is reg 0.
5568 w -- likewise, print the HImode name of the register.
5569 k -- likewise, print the SImode name of the register.
5570 q -- likewise, print the DImode name of the register.
5571 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5572 y -- print "st(0)" instead of "st" as a register.
5573 D -- print condition for SSE cmp instruction.
5574 P -- if PIC, print an @PLT suffix.
5575 X -- don't print any sort of PIC '@' suffix for a symbol.
5579 print_operand (file, x, code)
5589 if (ASSEMBLER_DIALECT == 0)
5594 if (ASSEMBLER_DIALECT == 0)
5596 else if (ASSEMBLER_DIALECT == 1)
5598 /* Intel syntax. For absolute addresses, registers should not
5599 be surrounded by braces. */
5600 if (GET_CODE (x) != REG)
5603 PRINT_OPERAND (file, x, 0);
5609 PRINT_OPERAND (file, x, 0);
5614 if (ASSEMBLER_DIALECT == 0)
5619 if (ASSEMBLER_DIALECT == 0)
5624 if (ASSEMBLER_DIALECT == 0)
5629 if (ASSEMBLER_DIALECT == 0)
5634 if (ASSEMBLER_DIALECT == 0)
5639 if (ASSEMBLER_DIALECT == 0)
5644 /* 387 opcodes don't get size suffixes if the operands are
5647 if (STACK_REG_P (x))
5650 /* this is the size of op from size of operand */
5651 switch (GET_MODE_SIZE (GET_MODE (x)))
5654 #ifdef HAVE_GAS_FILDS_FISTS
5660 if (GET_MODE (x) == SFmode)
5675 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5677 #ifdef GAS_MNEMONICS
5703 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5705 PRINT_OPERAND (file, x, 0);
5711 /* Little bit of braindamage here. The SSE compare instructions
5712 does use completely different names for the comparisons that the
5713 fp conditional moves. */
5714 switch (GET_CODE (x))
5729 fputs ("unord", file);
5733 fputs ("neq", file);
5737 fputs ("nlt", file);
5741 fputs ("nle", file);
5744 fputs ("ord", file);
5752 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5755 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5758 /* Like above, but reverse condition */
5760 /* Check to see if argument to %c is really a constant
5761 and not a condition code which needs to be reversed. */
5762 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5764 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5767 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5770 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5776 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5779 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5782 int pred_val = INTVAL (XEXP (x, 0));
5784 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5785 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5787 int taken = pred_val > REG_BR_PROB_BASE / 2;
5788 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5790 /* Emit hints only in the case default branch prediction
5791 heruistics would fail. */
5792 if (taken != cputaken)
5794 /* We use 3e (DS) prefix for taken branches and
5795 2e (CS) prefix for not taken branches. */
5797 fputs ("ds ; ", file);
5799 fputs ("cs ; ", file);
5808 sprintf (str, "invalid operand code `%c'", code);
5809 output_operand_lossage (str);
5814 if (GET_CODE (x) == REG)
5816 PRINT_REG (x, code, file);
5819 else if (GET_CODE (x) == MEM)
5821 /* No `byte ptr' prefix for call instructions. */
5822 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
5825 switch (GET_MODE_SIZE (GET_MODE (x)))
5827 case 1: size = "BYTE"; break;
5828 case 2: size = "WORD"; break;
5829 case 4: size = "DWORD"; break;
5830 case 8: size = "QWORD"; break;
5831 case 12: size = "XWORD"; break;
5832 case 16: size = "XMMWORD"; break;
5837 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5840 else if (code == 'w')
5842 else if (code == 'k')
5846 fputs (" PTR ", file);
5850 if (flag_pic && CONSTANT_ADDRESS_P (x))
5851 output_pic_addr_const (file, x, code);
5852 /* Avoid (%rip) for call operands. */
5853 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5854 && GET_CODE (x) != CONST_INT)
5855 output_addr_const (file, x);
5860 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5865 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5866 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5868 if (ASSEMBLER_DIALECT == 0)
5870 fprintf (file, "0x%lx", l);
5873 /* These float cases don't actually occur as immediate operands. */
5874 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5879 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5880 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5881 fprintf (file, "%s", dstr);
5884 else if (GET_CODE (x) == CONST_DOUBLE
5885 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5890 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5891 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5892 fprintf (file, "%s", dstr);
5898 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5900 if (ASSEMBLER_DIALECT == 0)
5903 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5904 || GET_CODE (x) == LABEL_REF)
5906 if (ASSEMBLER_DIALECT == 0)
5909 fputs ("OFFSET FLAT:", file);
5912 if (GET_CODE (x) == CONST_INT)
5913 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5915 output_pic_addr_const (file, x, code);
5917 output_addr_const (file, x);
5921 /* Print a memory operand whose address is ADDR. */
5924 print_operand_address (file, addr)
5928 struct ix86_address parts;
5929 rtx base, index, disp;
5932 if (! ix86_decompose_address (addr, &parts))
5936 index = parts.index;
5938 scale = parts.scale;
5940 if (!base && !index)
5942 /* Displacement only requires special attention. */
5944 if (GET_CODE (disp) == CONST_INT)
5946 if (ASSEMBLER_DIALECT != 0)
5948 if (USER_LABEL_PREFIX[0] == 0)
5950 fputs ("ds:", file);
5952 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
5955 output_pic_addr_const (file, addr, 0);
5957 output_addr_const (file, addr);
5959 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5960 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5961 fputs ("(%rip)", file);
5965 if (ASSEMBLER_DIALECT == 0)
5970 output_pic_addr_const (file, disp, 0);
5971 else if (GET_CODE (disp) == LABEL_REF)
5972 output_asm_label (disp);
5974 output_addr_const (file, disp);
5979 PRINT_REG (base, 0, file);
5983 PRINT_REG (index, 0, file);
5985 fprintf (file, ",%d", scale);
5991 rtx offset = NULL_RTX;
5995 /* Pull out the offset of a symbol; print any symbol itself. */
5996 if (GET_CODE (disp) == CONST
5997 && GET_CODE (XEXP (disp, 0)) == PLUS
5998 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6000 offset = XEXP (XEXP (disp, 0), 1);
6001 disp = gen_rtx_CONST (VOIDmode,
6002 XEXP (XEXP (disp, 0), 0));
6006 output_pic_addr_const (file, disp, 0);
6007 else if (GET_CODE (disp) == LABEL_REF)
6008 output_asm_label (disp);
6009 else if (GET_CODE (disp) == CONST_INT)
6012 output_addr_const (file, disp);
6018 PRINT_REG (base, 0, file);
6021 if (INTVAL (offset) >= 0)
6023 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6027 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6034 PRINT_REG (index, 0, file);
6036 fprintf (file, "*%d", scale);
6043 /* Split one or more DImode RTL references into pairs of SImode
6044 references. The RTL can be REG, offsettable MEM, integer constant, or
6045 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6046 split and "num" is its length. lo_half and hi_half are output arrays
6047 that parallel "operands". */
6050 split_di (operands, num, lo_half, hi_half)
6053 rtx lo_half[], hi_half[];
6057 rtx op = operands[num];
6059 /* simplify_subreg refuse to split volatile memory addresses,
6060 but we still have to handle it. */
6061 if (GET_CODE (op) == MEM)
6063 lo_half[num] = adjust_address (op, SImode, 0);
6064 hi_half[num] = adjust_address (op, SImode, 4);
6068 lo_half[num] = simplify_gen_subreg (SImode, op,
6069 GET_MODE (op) == VOIDmode
6070 ? DImode : GET_MODE (op), 0);
6071 hi_half[num] = simplify_gen_subreg (SImode, op,
6072 GET_MODE (op) == VOIDmode
6073 ? DImode : GET_MODE (op), 4);
6077 /* Split one or more TImode RTL references into pairs of SImode
6078 references. The RTL can be REG, offsettable MEM, integer constant, or
6079 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6080 split and "num" is its length. lo_half and hi_half are output arrays
6081 that parallel "operands". */
6084 split_ti (operands, num, lo_half, hi_half)
6087 rtx lo_half[], hi_half[];
6091 rtx op = operands[num];
6093 /* simplify_subreg refuse to split volatile memory addresses, but we
6094 still have to handle it. */
6095 if (GET_CODE (op) == MEM)
6097 lo_half[num] = adjust_address (op, DImode, 0);
6098 hi_half[num] = adjust_address (op, DImode, 8);
6102 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6103 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6108 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6109 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6110 is the expression of the binary operation. The output may either be
6111 emitted here, or returned to the caller, like all output_* functions.
6113 There is no guarantee that the operands are the same mode, as they
6114 might be within FLOAT or FLOAT_EXTEND expressions. */
6116 #ifndef SYSV386_COMPAT
6117 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6118 wants to fix the assemblers because that causes incompatibility
6119 with gcc. No-one wants to fix gcc because that causes
6120 incompatibility with assemblers... You can use the option of
6121 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6122 #define SYSV386_COMPAT 1
6126 output_387_binary_op (insn, operands)
6130 static char buf[30];
6133 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6135 #ifdef ENABLE_CHECKING
6136 /* Even if we do not want to check the inputs, this documents input
6137 constraints. Which helps in understanding the following code. */
6138 if (STACK_REG_P (operands[0])
6139 && ((REG_P (operands[1])
6140 && REGNO (operands[0]) == REGNO (operands[1])
6141 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6142 || (REG_P (operands[2])
6143 && REGNO (operands[0]) == REGNO (operands[2])
6144 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6145 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6151 switch (GET_CODE (operands[3]))
6154 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6155 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6163 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6164 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6172 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6173 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6181 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6182 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6196 if (GET_MODE (operands[0]) == SFmode)
6197 strcat (buf, "ss\t{%2, %0|%0, %2}");
6199 strcat (buf, "sd\t{%2, %0|%0, %2}");
6204 switch (GET_CODE (operands[3]))
6208 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6210 rtx temp = operands[2];
6211 operands[2] = operands[1];
6215 /* know operands[0] == operands[1]. */
6217 if (GET_CODE (operands[2]) == MEM)
6223 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6225 if (STACK_TOP_P (operands[0]))
6226 /* How is it that we are storing to a dead operand[2]?
6227 Well, presumably operands[1] is dead too. We can't
6228 store the result to st(0) as st(0) gets popped on this
6229 instruction. Instead store to operands[2] (which I
6230 think has to be st(1)). st(1) will be popped later.
6231 gcc <= 2.8.1 didn't have this check and generated
6232 assembly code that the Unixware assembler rejected. */
6233 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6235 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6239 if (STACK_TOP_P (operands[0]))
6240 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6242 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6247 if (GET_CODE (operands[1]) == MEM)
6253 if (GET_CODE (operands[2]) == MEM)
6259 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6262 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6263 derived assemblers, confusingly reverse the direction of
6264 the operation for fsub{r} and fdiv{r} when the
6265 destination register is not st(0). The Intel assembler
6266 doesn't have this brain damage. Read !SYSV386_COMPAT to
6267 figure out what the hardware really does. */
6268 if (STACK_TOP_P (operands[0]))
6269 p = "{p\t%0, %2|rp\t%2, %0}";
6271 p = "{rp\t%2, %0|p\t%0, %2}";
6273 if (STACK_TOP_P (operands[0]))
6274 /* As above for fmul/fadd, we can't store to st(0). */
6275 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6277 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6282 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6285 if (STACK_TOP_P (operands[0]))
6286 p = "{rp\t%0, %1|p\t%1, %0}";
6288 p = "{p\t%1, %0|rp\t%0, %1}";
6290 if (STACK_TOP_P (operands[0]))
6291 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6293 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6298 if (STACK_TOP_P (operands[0]))
6300 if (STACK_TOP_P (operands[1]))
6301 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6303 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6306 else if (STACK_TOP_P (operands[1]))
6309 p = "{\t%1, %0|r\t%0, %1}";
6311 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6317 p = "{r\t%2, %0|\t%0, %2}";
6319 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6332 /* Output code to initialize control word copies used by
6333 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6334 is set to control word rounding downwards. */
6336 emit_i387_cw_initialization (normal, round_down)
6337 rtx normal, round_down;
6339 rtx reg = gen_reg_rtx (HImode);
6341 emit_insn (gen_x86_fnstcw_1 (normal));
6342 emit_move_insn (reg, normal);
6343 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6345 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6347 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6348 emit_move_insn (round_down, reg);
6351 /* Output code for INSN to convert a float to a signed int. OPERANDS
6352 are the insn operands. The output may be [HSD]Imode and the input
6353 operand may be [SDX]Fmode. */
6356 output_fix_trunc (insn, operands)
6360 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6361 int dimode_p = GET_MODE (operands[0]) == DImode;
6363 /* Jump through a hoop or two for DImode, since the hardware has no
6364 non-popping instruction. We used to do this a different way, but
6365 that was somewhat fragile and broke with post-reload splitters. */
6366 if (dimode_p && !stack_top_dies)
6367 output_asm_insn ("fld\t%y1", operands);
6369 if (!STACK_TOP_P (operands[1]))
6372 if (GET_CODE (operands[0]) != MEM)
6375 output_asm_insn ("fldcw\t%3", operands);
6376 if (stack_top_dies || dimode_p)
6377 output_asm_insn ("fistp%z0\t%0", operands);
6379 output_asm_insn ("fist%z0\t%0", operands);
6380 output_asm_insn ("fldcw\t%2", operands);
6385 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6386 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6387 when fucom should be used. */
6390 output_fp_compare (insn, operands, eflags_p, unordered_p)
6393 int eflags_p, unordered_p;
6396 rtx cmp_op0 = operands[0];
6397 rtx cmp_op1 = operands[1];
6398 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6403 cmp_op1 = operands[2];
6407 if (GET_MODE (operands[0]) == SFmode)
6409 return "ucomiss\t{%1, %0|%0, %1}";
6411 return "comiss\t{%1, %0|%0, %y}";
6414 return "ucomisd\t{%1, %0|%0, %1}";
6416 return "comisd\t{%1, %0|%0, %y}";
6419 if (! STACK_TOP_P (cmp_op0))
6422 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6424 if (STACK_REG_P (cmp_op1)
6426 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6427 && REGNO (cmp_op1) != FIRST_STACK_REG)
6429 /* If both the top of the 387 stack dies, and the other operand
6430 is also a stack register that dies, then this must be a
6431 `fcompp' float compare */
6435 /* There is no double popping fcomi variant. Fortunately,
6436 eflags is immune from the fstp's cc clobbering. */
6438 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6440 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6448 return "fucompp\n\tfnstsw\t%0";
6450 return "fcompp\n\tfnstsw\t%0";
6463 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6465 static const char * const alt[24] =
6477 "fcomi\t{%y1, %0|%0, %y1}",
6478 "fcomip\t{%y1, %0|%0, %y1}",
6479 "fucomi\t{%y1, %0|%0, %y1}",
6480 "fucomip\t{%y1, %0|%0, %y1}",
6487 "fcom%z2\t%y2\n\tfnstsw\t%0",
6488 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6489 "fucom%z2\t%y2\n\tfnstsw\t%0",
6490 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6492 "ficom%z2\t%y2\n\tfnstsw\t%0",
6493 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6501 mask = eflags_p << 3;
6502 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6503 mask |= unordered_p << 1;
6504 mask |= stack_top_dies;
6517 ix86_output_addr_vec_elt (file, value)
6521 const char *directive = ASM_LONG;
6526 directive = ASM_QUAD;
6532 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6536 ix86_output_addr_diff_elt (file, value, rel)
6541 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6542 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6543 else if (HAVE_AS_GOTOFF_IN_DATA)
6544 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6546 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6547 ASM_LONG, LPREFIX, value);
6550 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6554 ix86_expand_clear (dest)
6559 /* We play register width games, which are only valid after reload. */
6560 if (!reload_completed)
6563 /* Avoid HImode and its attendant prefix byte. */
6564 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6565 dest = gen_rtx_REG (SImode, REGNO (dest));
6567 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6569 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6570 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6572 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6573 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6580 ix86_expand_move (mode, operands)
6581 enum machine_mode mode;
6584 int strict = (reload_in_progress || reload_completed);
6587 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6589 /* Emit insns to move operands[1] into operands[0]. */
6591 if (GET_CODE (operands[0]) == MEM)
6592 operands[1] = force_reg (Pmode, operands[1]);
6595 rtx temp = operands[0];
6596 if (GET_CODE (temp) != REG)
6597 temp = gen_reg_rtx (Pmode);
6598 temp = legitimize_pic_address (operands[1], temp);
6599 if (temp == operands[0])
6606 if (GET_CODE (operands[0]) == MEM
6607 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6608 || !push_operand (operands[0], mode))
6609 && GET_CODE (operands[1]) == MEM)
6610 operands[1] = force_reg (mode, operands[1]);
6612 if (push_operand (operands[0], mode)
6613 && ! general_no_elim_operand (operands[1], mode))
6614 operands[1] = copy_to_mode_reg (mode, operands[1]);
6616 /* Force large constants in 64bit compilation into register
6617 to get them CSEed. */
6618 if (TARGET_64BIT && mode == DImode
6619 && immediate_operand (operands[1], mode)
6620 && !x86_64_zero_extended_value (operands[1])
6621 && !register_operand (operands[0], mode)
6622 && optimize && !reload_completed && !reload_in_progress)
6623 operands[1] = copy_to_mode_reg (mode, operands[1]);
6625 if (FLOAT_MODE_P (mode))
6627 /* If we are loading a floating point constant to a register,
6628 force the value to memory now, since we'll get better code
6629 out the back end. */
6633 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6634 && register_operand (operands[0], mode))
6635 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6639 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6644 /* Attempt to expand a binary operator. Make the expansion closer to the
6645 actual machine, then just general_operand, which will allow 3 separate
6646 memory references (one output, two input) in a single insn. */
6649 ix86_expand_binary_operator (code, mode, operands)
6651 enum machine_mode mode;
6654 int matching_memory;
6655 rtx src1, src2, dst, op, clob;
6661 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6662 if (GET_RTX_CLASS (code) == 'c'
6663 && (rtx_equal_p (dst, src2)
6664 || immediate_operand (src1, mode)))
6671 /* If the destination is memory, and we do not have matching source
6672 operands, do things in registers. */
6673 matching_memory = 0;
6674 if (GET_CODE (dst) == MEM)
6676 if (rtx_equal_p (dst, src1))
6677 matching_memory = 1;
6678 else if (GET_RTX_CLASS (code) == 'c'
6679 && rtx_equal_p (dst, src2))
6680 matching_memory = 2;
6682 dst = gen_reg_rtx (mode);
6685 /* Both source operands cannot be in memory. */
6686 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6688 if (matching_memory != 2)
6689 src2 = force_reg (mode, src2);
6691 src1 = force_reg (mode, src1);
6694 /* If the operation is not commutable, source 1 cannot be a constant
6695 or non-matching memory. */
6696 if ((CONSTANT_P (src1)
6697 || (!matching_memory && GET_CODE (src1) == MEM))
6698 && GET_RTX_CLASS (code) != 'c')
6699 src1 = force_reg (mode, src1);
6701 /* If optimizing, copy to regs to improve CSE */
6702 if (optimize && ! no_new_pseudos)
6704 if (GET_CODE (dst) == MEM)
6705 dst = gen_reg_rtx (mode);
6706 if (GET_CODE (src1) == MEM)
6707 src1 = force_reg (mode, src1);
6708 if (GET_CODE (src2) == MEM)
6709 src2 = force_reg (mode, src2);
6712 /* Emit the instruction. */
6714 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6715 if (reload_in_progress)
6717 /* Reload doesn't know about the flags register, and doesn't know that
6718 it doesn't want to clobber it. We can only do this with PLUS. */
6725 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6726 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6729 /* Fix up the destination if needed. */
6730 if (dst != operands[0])
6731 emit_move_insn (operands[0], dst);
6734 /* Return TRUE or FALSE depending on whether the binary operator meets the
6735 appropriate constraints. */
6738 ix86_binary_operator_ok (code, mode, operands)
6740 enum machine_mode mode ATTRIBUTE_UNUSED;
6743 /* Both source operands cannot be in memory. */
6744 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6746 /* If the operation is not commutable, source 1 cannot be a constant. */
6747 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6749 /* If the destination is memory, we must have a matching source operand. */
6750 if (GET_CODE (operands[0]) == MEM
6751 && ! (rtx_equal_p (operands[0], operands[1])
6752 || (GET_RTX_CLASS (code) == 'c'
6753 && rtx_equal_p (operands[0], operands[2]))))
6755 /* If the operation is not commutable and the source 1 is memory, we must
6756 have a matching destination. */
6757 if (GET_CODE (operands[1]) == MEM
6758 && GET_RTX_CLASS (code) != 'c'
6759 && ! rtx_equal_p (operands[0], operands[1]))
6764 /* Attempt to expand a unary operator. Make the expansion closer to the
6765 actual machine, then just general_operand, which will allow 2 separate
6766 memory references (one output, one input) in a single insn. */
6769 ix86_expand_unary_operator (code, mode, operands)
6771 enum machine_mode mode;
6774 int matching_memory;
6775 rtx src, dst, op, clob;
6780 /* If the destination is memory, and we do not have matching source
6781 operands, do things in registers. */
6782 matching_memory = 0;
6783 if (GET_CODE (dst) == MEM)
6785 if (rtx_equal_p (dst, src))
6786 matching_memory = 1;
6788 dst = gen_reg_rtx (mode);
6791 /* When source operand is memory, destination must match. */
6792 if (!matching_memory && GET_CODE (src) == MEM)
6793 src = force_reg (mode, src);
6795 /* If optimizing, copy to regs to improve CSE */
6796 if (optimize && ! no_new_pseudos)
6798 if (GET_CODE (dst) == MEM)
6799 dst = gen_reg_rtx (mode);
6800 if (GET_CODE (src) == MEM)
6801 src = force_reg (mode, src);
6804 /* Emit the instruction. */
6806 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6807 if (reload_in_progress || code == NOT)
6809 /* Reload doesn't know about the flags register, and doesn't know that
6810 it doesn't want to clobber it. */
6817 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6818 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6821 /* Fix up the destination if needed. */
6822 if (dst != operands[0])
6823 emit_move_insn (operands[0], dst);
6826 /* Return TRUE or FALSE depending on whether the unary operator meets the
6827 appropriate constraints. */
6830 ix86_unary_operator_ok (code, mode, operands)
6831 enum rtx_code code ATTRIBUTE_UNUSED;
6832 enum machine_mode mode ATTRIBUTE_UNUSED;
6833 rtx operands[2] ATTRIBUTE_UNUSED;
6835 /* If one of operands is memory, source and destination must match. */
6836 if ((GET_CODE (operands[0]) == MEM
6837 || GET_CODE (operands[1]) == MEM)
6838 && ! rtx_equal_p (operands[0], operands[1]))
6843 /* Return TRUE or FALSE depending on whether the first SET in INSN
6844 has source and destination with matching CC modes, and that the
6845 CC mode is at least as constrained as REQ_MODE. */
6848 ix86_match_ccmode (insn, req_mode)
6850 enum machine_mode req_mode;
6853 enum machine_mode set_mode;
6855 set = PATTERN (insn);
6856 if (GET_CODE (set) == PARALLEL)
6857 set = XVECEXP (set, 0, 0);
6858 if (GET_CODE (set) != SET)
6860 if (GET_CODE (SET_SRC (set)) != COMPARE)
6863 set_mode = GET_MODE (SET_DEST (set));
6867 if (req_mode != CCNOmode
6868 && (req_mode != CCmode
6869 || XEXP (SET_SRC (set), 1) != const0_rtx))
6873 if (req_mode == CCGCmode)
6877 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6881 if (req_mode == CCZmode)
6891 return (GET_MODE (SET_SRC (set)) == set_mode);
6894 /* Generate insn patterns to do an integer compare of OPERANDS. */
6897 ix86_expand_int_compare (code, op0, op1)
6901 enum machine_mode cmpmode;
6904 cmpmode = SELECT_CC_MODE (code, op0, op1);
6905 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6907 /* This is very simple, but making the interface the same as in the
6908 FP case makes the rest of the code easier. */
6909 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
6910 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
6912 /* Return the test that should be put into the flags user, i.e.
6913 the bcc, scc, or cmov instruction. */
6914 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
6917 /* Figure out whether to use ordered or unordered fp comparisons.
6918 Return the appropriate mode to use. */
6921 ix86_fp_compare_mode (code)
6922 enum rtx_code code ATTRIBUTE_UNUSED;
6924 /* ??? In order to make all comparisons reversible, we do all comparisons
6925 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6926 all forms trapping and nontrapping comparisons, we can make inequality
6927 comparisons trapping again, since it results in better code when using
6928 FCOM based compares. */
6929 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
6933 ix86_cc_mode (code, op0, op1)
6937 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6938 return ix86_fp_compare_mode (code);
6941 /* Only zero flag is needed. */
6943 case NE: /* ZF!=0 */
6945 /* Codes needing carry flag. */
6946 case GEU: /* CF=0 */
6947 case GTU: /* CF=0 & ZF=0 */
6948 case LTU: /* CF=1 */
6949 case LEU: /* CF=1 | ZF=1 */
6951 /* Codes possibly doable only with sign flag when
6952 comparing against zero. */
6953 case GE: /* SF=OF or SF=0 */
6954 case LT: /* SF<>OF or SF=1 */
6955 if (op1 == const0_rtx)
6958 /* For other cases Carry flag is not required. */
6960 /* Codes doable only with sign flag when comparing
6961 against zero, but we miss jump instruction for it
6962 so we need to use relational tests agains overflow
6963 that thus needs to be zero. */
6964 case GT: /* ZF=0 & SF=OF */
6965 case LE: /* ZF=1 | SF<>OF */
6966 if (op1 == const0_rtx)
6970 /* strcmp pattern do (use flags) and combine may ask us for proper
6979 /* Return true if we should use an FCOMI instruction for this fp comparison. */
6982 ix86_use_fcomi_compare (code)
6983 enum rtx_code code ATTRIBUTE_UNUSED;
6985 enum rtx_code swapped_code = swap_condition (code);
6986 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
6987 || (ix86_fp_comparison_cost (swapped_code)
6988 == ix86_fp_comparison_fcomi_cost (swapped_code)));
6991 /* Swap, force into registers, or otherwise massage the two operands
6992 to a fp comparison. The operands are updated in place; the new
6993 comparsion code is returned. */
6995 static enum rtx_code
6996 ix86_prepare_fp_compare_args (code, pop0, pop1)
7000 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7001 rtx op0 = *pop0, op1 = *pop1;
7002 enum machine_mode op_mode = GET_MODE (op0);
7003 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7005 /* All of the unordered compare instructions only work on registers.
7006 The same is true of the XFmode compare instructions. The same is
7007 true of the fcomi compare instructions. */
7010 && (fpcmp_mode == CCFPUmode
7011 || op_mode == XFmode
7012 || op_mode == TFmode
7013 || ix86_use_fcomi_compare (code)))
7015 op0 = force_reg (op_mode, op0);
7016 op1 = force_reg (op_mode, op1);
7020 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7021 things around if they appear profitable, otherwise force op0
7024 if (standard_80387_constant_p (op0) == 0
7025 || (GET_CODE (op0) == MEM
7026 && ! (standard_80387_constant_p (op1) == 0
7027 || GET_CODE (op1) == MEM)))
7030 tmp = op0, op0 = op1, op1 = tmp;
7031 code = swap_condition (code);
7034 if (GET_CODE (op0) != REG)
7035 op0 = force_reg (op_mode, op0);
7037 if (CONSTANT_P (op1))
7039 if (standard_80387_constant_p (op1))
7040 op1 = force_reg (op_mode, op1);
7042 op1 = validize_mem (force_const_mem (op_mode, op1));
7046 /* Try to rearrange the comparison to make it cheaper. */
7047 if (ix86_fp_comparison_cost (code)
7048 > ix86_fp_comparison_cost (swap_condition (code))
7049 && (GET_CODE (op0) == REG || !reload_completed))
7052 tmp = op0, op0 = op1, op1 = tmp;
7053 code = swap_condition (code);
7054 if (GET_CODE (op0) != REG)
7055 op0 = force_reg (op_mode, op0);
7063 /* Convert comparison codes we use to represent FP comparison to integer
7064 code that will result in proper branch. Return UNKNOWN if no such code
7066 static enum rtx_code
7067 ix86_fp_compare_code_to_integer (code)
7097 /* Split comparison code CODE into comparisons we can do using branch
7098 instructions. BYPASS_CODE is comparison code for branch that will
7099 branch around FIRST_CODE and SECOND_CODE. If some of branches
7100 is not required, set value to NIL.
7101 We never require more than two branches. */
7103 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7104 enum rtx_code code, *bypass_code, *first_code, *second_code;
7110 /* The fcomi comparison sets flags as follows:
7120 case GT: /* GTU - CF=0 & ZF=0 */
7121 case GE: /* GEU - CF=0 */
7122 case ORDERED: /* PF=0 */
7123 case UNORDERED: /* PF=1 */
7124 case UNEQ: /* EQ - ZF=1 */
7125 case UNLT: /* LTU - CF=1 */
7126 case UNLE: /* LEU - CF=1 | ZF=1 */
7127 case LTGT: /* EQ - ZF=0 */
7129 case LT: /* LTU - CF=1 - fails on unordered */
7131 *bypass_code = UNORDERED;
7133 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7135 *bypass_code = UNORDERED;
7137 case EQ: /* EQ - ZF=1 - fails on unordered */
7139 *bypass_code = UNORDERED;
7141 case NE: /* NE - ZF=0 - fails on unordered */
7143 *second_code = UNORDERED;
7145 case UNGE: /* GEU - CF=0 - fails on unordered */
7147 *second_code = UNORDERED;
7149 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7151 *second_code = UNORDERED;
7156 if (!TARGET_IEEE_FP)
7163 /* Return cost of comparison done fcom + arithmetics operations on AX.
7164 All following functions do use number of instructions as an cost metrics.
7165 In future this should be tweaked to compute bytes for optimize_size and
7166 take into account performance of various instructions on various CPUs. */
7168 ix86_fp_comparison_arithmetics_cost (code)
7171 if (!TARGET_IEEE_FP)
7173 /* The cost of code output by ix86_expand_fp_compare. */
7201 /* Return cost of comparison done using fcomi operation.
7202 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7204 ix86_fp_comparison_fcomi_cost (code)
7207 enum rtx_code bypass_code, first_code, second_code;
7208 /* Return arbitarily high cost when instruction is not supported - this
7209 prevents gcc from using it. */
7212 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7213 return (bypass_code != NIL || second_code != NIL) + 2;
7216 /* Return cost of comparison done using sahf operation.
7217 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7219 ix86_fp_comparison_sahf_cost (code)
7222 enum rtx_code bypass_code, first_code, second_code;
7223 /* Return arbitarily high cost when instruction is not preferred - this
7224 avoids gcc from using it. */
7225 if (!TARGET_USE_SAHF && !optimize_size)
7227 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7228 return (bypass_code != NIL || second_code != NIL) + 3;
7231 /* Compute cost of the comparison done using any method.
7232 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7234 ix86_fp_comparison_cost (code)
7237 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7240 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7241 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7243 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7244 if (min > sahf_cost)
7246 if (min > fcomi_cost)
7251 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7254 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7256 rtx op0, op1, scratch;
7260 enum machine_mode fpcmp_mode, intcmp_mode;
7262 int cost = ix86_fp_comparison_cost (code);
7263 enum rtx_code bypass_code, first_code, second_code;
7265 fpcmp_mode = ix86_fp_compare_mode (code);
7266 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7269 *second_test = NULL_RTX;
7271 *bypass_test = NULL_RTX;
7273 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7275 /* Do fcomi/sahf based test when profitable. */
7276 if ((bypass_code == NIL || bypass_test)
7277 && (second_code == NIL || second_test)
7278 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7282 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7283 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7289 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7290 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7292 scratch = gen_reg_rtx (HImode);
7293 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7294 emit_insn (gen_x86_sahf_1 (scratch));
7297 /* The FP codes work out to act like unsigned. */
7298 intcmp_mode = fpcmp_mode;
7300 if (bypass_code != NIL)
7301 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7302 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7304 if (second_code != NIL)
7305 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7306 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7311 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7312 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7313 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7315 scratch = gen_reg_rtx (HImode);
7316 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7318 /* In the unordered case, we have to check C2 for NaN's, which
7319 doesn't happen to work out to anything nice combination-wise.
7320 So do some bit twiddling on the value we've got in AH to come
7321 up with an appropriate set of condition codes. */
7323 intcmp_mode = CCNOmode;
7328 if (code == GT || !TARGET_IEEE_FP)
7330 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7335 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7336 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7337 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7338 intcmp_mode = CCmode;
7344 if (code == LT && TARGET_IEEE_FP)
7346 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7347 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7348 intcmp_mode = CCmode;
7353 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7359 if (code == GE || !TARGET_IEEE_FP)
7361 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7366 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7367 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7374 if (code == LE && TARGET_IEEE_FP)
7376 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7377 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7378 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7379 intcmp_mode = CCmode;
7384 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7390 if (code == EQ && TARGET_IEEE_FP)
7392 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7393 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7394 intcmp_mode = CCmode;
7399 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7406 if (code == NE && TARGET_IEEE_FP)
7408 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7409 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7415 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7421 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7425 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7434 /* Return the test that should be put into the flags user, i.e.
7435 the bcc, scc, or cmov instruction. */
7436 return gen_rtx_fmt_ee (code, VOIDmode,
7437 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7442 ix86_expand_compare (code, second_test, bypass_test)
7444 rtx *second_test, *bypass_test;
7447 op0 = ix86_compare_op0;
7448 op1 = ix86_compare_op1;
7451 *second_test = NULL_RTX;
7453 *bypass_test = NULL_RTX;
7455 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7456 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7457 second_test, bypass_test);
7459 ret = ix86_expand_int_compare (code, op0, op1);
7464 /* Return true if the CODE will result in nontrivial jump sequence. */
7466 ix86_fp_jump_nontrivial_p (code)
7469 enum rtx_code bypass_code, first_code, second_code;
7472 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7473 return bypass_code != NIL || second_code != NIL;
7477 ix86_expand_branch (code, label)
7483 switch (GET_MODE (ix86_compare_op0))
7489 tmp = ix86_expand_compare (code, NULL, NULL);
7490 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7491 gen_rtx_LABEL_REF (VOIDmode, label),
7493 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7503 enum rtx_code bypass_code, first_code, second_code;
7505 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7508 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7510 /* Check whether we will use the natural sequence with one jump. If
7511 so, we can expand jump early. Otherwise delay expansion by
7512 creating compound insn to not confuse optimizers. */
7513 if (bypass_code == NIL && second_code == NIL
7516 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7517 gen_rtx_LABEL_REF (VOIDmode, label),
7522 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7523 ix86_compare_op0, ix86_compare_op1);
7524 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7525 gen_rtx_LABEL_REF (VOIDmode, label),
7527 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7529 use_fcomi = ix86_use_fcomi_compare (code);
7530 vec = rtvec_alloc (3 + !use_fcomi);
7531 RTVEC_ELT (vec, 0) = tmp;
7533 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7535 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7538 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7540 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7548 /* Expand DImode branch into multiple compare+branch. */
7550 rtx lo[2], hi[2], label2;
7551 enum rtx_code code1, code2, code3;
7553 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7555 tmp = ix86_compare_op0;
7556 ix86_compare_op0 = ix86_compare_op1;
7557 ix86_compare_op1 = tmp;
7558 code = swap_condition (code);
7560 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7561 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7563 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7564 avoid two branches. This costs one extra insn, so disable when
7565 optimizing for size. */
7567 if ((code == EQ || code == NE)
7569 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7574 if (hi[1] != const0_rtx)
7575 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7576 NULL_RTX, 0, OPTAB_WIDEN);
7579 if (lo[1] != const0_rtx)
7580 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7581 NULL_RTX, 0, OPTAB_WIDEN);
7583 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7584 NULL_RTX, 0, OPTAB_WIDEN);
7586 ix86_compare_op0 = tmp;
7587 ix86_compare_op1 = const0_rtx;
7588 ix86_expand_branch (code, label);
7592 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7593 op1 is a constant and the low word is zero, then we can just
7594 examine the high word. */
7596 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7599 case LT: case LTU: case GE: case GEU:
7600 ix86_compare_op0 = hi[0];
7601 ix86_compare_op1 = hi[1];
7602 ix86_expand_branch (code, label);
7608 /* Otherwise, we need two or three jumps. */
7610 label2 = gen_label_rtx ();
7613 code2 = swap_condition (code);
7614 code3 = unsigned_condition (code);
7618 case LT: case GT: case LTU: case GTU:
7621 case LE: code1 = LT; code2 = GT; break;
7622 case GE: code1 = GT; code2 = LT; break;
7623 case LEU: code1 = LTU; code2 = GTU; break;
7624 case GEU: code1 = GTU; code2 = LTU; break;
7626 case EQ: code1 = NIL; code2 = NE; break;
7627 case NE: code2 = NIL; break;
7635 * if (hi(a) < hi(b)) goto true;
7636 * if (hi(a) > hi(b)) goto false;
7637 * if (lo(a) < lo(b)) goto true;
7641 ix86_compare_op0 = hi[0];
7642 ix86_compare_op1 = hi[1];
7645 ix86_expand_branch (code1, label);
7647 ix86_expand_branch (code2, label2);
7649 ix86_compare_op0 = lo[0];
7650 ix86_compare_op1 = lo[1];
7651 ix86_expand_branch (code3, label);
7654 emit_label (label2);
7663 /* Split branch based on floating point condition. */
7665 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7667 rtx op1, op2, target1, target2, tmp;
7670 rtx label = NULL_RTX;
7672 int bypass_probability = -1, second_probability = -1, probability = -1;
7675 if (target2 != pc_rtx)
7678 code = reverse_condition_maybe_unordered (code);
7683 condition = ix86_expand_fp_compare (code, op1, op2,
7684 tmp, &second, &bypass);
7686 if (split_branch_probability >= 0)
7688 /* Distribute the probabilities across the jumps.
7689 Assume the BYPASS and SECOND to be always test
7691 probability = split_branch_probability;
7693 /* Value of 1 is low enough to make no need for probability
7694 to be updated. Later we may run some experiments and see
7695 if unordered values are more frequent in practice. */
7697 bypass_probability = 1;
7699 second_probability = 1;
7701 if (bypass != NULL_RTX)
7703 label = gen_label_rtx ();
7704 i = emit_jump_insn (gen_rtx_SET
7706 gen_rtx_IF_THEN_ELSE (VOIDmode,
7708 gen_rtx_LABEL_REF (VOIDmode,
7711 if (bypass_probability >= 0)
7713 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7714 GEN_INT (bypass_probability),
7717 i = emit_jump_insn (gen_rtx_SET
7719 gen_rtx_IF_THEN_ELSE (VOIDmode,
7720 condition, target1, target2)));
7721 if (probability >= 0)
7723 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7724 GEN_INT (probability),
7726 if (second != NULL_RTX)
7728 i = emit_jump_insn (gen_rtx_SET
7730 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7732 if (second_probability >= 0)
7734 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7735 GEN_INT (second_probability),
7738 if (label != NULL_RTX)
7743 ix86_expand_setcc (code, dest)
7747 rtx ret, tmp, tmpreg;
7748 rtx second_test, bypass_test;
7750 if (GET_MODE (ix86_compare_op0) == DImode
7752 return 0; /* FAIL */
7754 if (GET_MODE (dest) != QImode)
7757 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7758 PUT_MODE (ret, QImode);
7763 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7764 if (bypass_test || second_test)
7766 rtx test = second_test;
7768 rtx tmp2 = gen_reg_rtx (QImode);
7775 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7777 PUT_MODE (test, QImode);
7778 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7781 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7783 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7786 return 1; /* DONE */
7790 ix86_expand_int_movcc (operands)
7793 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7794 rtx compare_seq, compare_op;
7795 rtx second_test, bypass_test;
7796 enum machine_mode mode = GET_MODE (operands[0]);
7798 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7799 In case comparsion is done with immediate, we can convert it to LTU or
7800 GEU by altering the integer. */
7802 if ((code == LEU || code == GTU)
7803 && GET_CODE (ix86_compare_op1) == CONST_INT
7805 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
7806 && GET_CODE (operands[2]) == CONST_INT
7807 && GET_CODE (operands[3]) == CONST_INT)
7813 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7817 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7818 compare_seq = gen_sequence ();
7821 compare_code = GET_CODE (compare_op);
7823 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7824 HImode insns, we'd be swallowed in word prefix ops. */
7827 && (mode != DImode || TARGET_64BIT)
7828 && GET_CODE (operands[2]) == CONST_INT
7829 && GET_CODE (operands[3]) == CONST_INT)
7831 rtx out = operands[0];
7832 HOST_WIDE_INT ct = INTVAL (operands[2]);
7833 HOST_WIDE_INT cf = INTVAL (operands[3]);
7836 if ((compare_code == LTU || compare_code == GEU)
7837 && !second_test && !bypass_test)
7840 /* Detect overlap between destination and compare sources. */
7843 /* To simplify rest of code, restrict to the GEU case. */
7844 if (compare_code == LTU)
7849 compare_code = reverse_condition (compare_code);
7850 code = reverse_condition (code);
7854 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7855 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7856 tmp = gen_reg_rtx (mode);
7858 emit_insn (compare_seq);
7860 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7862 emit_insn (gen_x86_movsicc_0_m1 (tmp));
7874 tmp = expand_simple_binop (mode, PLUS,
7876 tmp, 1, OPTAB_DIRECT);
7887 tmp = expand_simple_binop (mode, IOR,
7889 tmp, 1, OPTAB_DIRECT);
7891 else if (diff == -1 && ct)
7901 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
7903 tmp = expand_simple_binop (mode, PLUS,
7905 tmp, 1, OPTAB_DIRECT);
7912 * andl cf - ct, dest
7917 tmp = expand_simple_binop (mode, AND,
7919 GEN_INT (trunc_int_for_mode
7921 tmp, 1, OPTAB_DIRECT);
7923 tmp = expand_simple_binop (mode, PLUS,
7925 tmp, 1, OPTAB_DIRECT);
7929 emit_move_insn (out, tmp);
7931 return 1; /* DONE */
7938 tmp = ct, ct = cf, cf = tmp;
7940 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
7942 /* We may be reversing unordered compare to normal compare, that
7943 is not valid in general (we may convert non-trapping condition
7944 to trapping one), however on i386 we currently emit all
7945 comparisons unordered. */
7946 compare_code = reverse_condition_maybe_unordered (compare_code);
7947 code = reverse_condition_maybe_unordered (code);
7951 compare_code = reverse_condition (compare_code);
7952 code = reverse_condition (code);
7955 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
7956 || diff == 3 || diff == 5 || diff == 9)
7957 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
7963 * lea cf(dest*(ct-cf)),dest
7967 * This also catches the degenerate setcc-only case.
7973 out = emit_store_flag (out, code, ix86_compare_op0,
7974 ix86_compare_op1, VOIDmode, 0, 1);
7977 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
7978 done in proper mode to match. */
7985 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
7989 tmp = gen_rtx_PLUS (mode, tmp, out1);
7995 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
7999 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8005 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8006 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8008 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8009 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8013 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8015 if (out != operands[0])
8016 emit_move_insn (operands[0], out);
8018 return 1; /* DONE */
8022 * General case: Jumpful:
8023 * xorl dest,dest cmpl op1, op2
8024 * cmpl op1, op2 movl ct, dest
8026 * decl dest movl cf, dest
8027 * andl (cf-ct),dest 1:
8032 * This is reasonably steep, but branch mispredict costs are
8033 * high on modern cpus, so consider failing only if optimizing
8036 * %%% Parameterize branch_cost on the tuning architecture, then
8037 * use that. The 80386 couldn't care less about mispredicts.
8040 if (!optimize_size && !TARGET_CMOVE)
8046 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8048 /* We may be reversing unordered compare to normal compare,
8049 that is not valid in general (we may convert non-trapping
8050 condition to trapping one), however on i386 we currently
8051 emit all comparisons unordered. */
8052 compare_code = reverse_condition_maybe_unordered (compare_code);
8053 code = reverse_condition_maybe_unordered (code);
8057 compare_code = reverse_condition (compare_code);
8058 code = reverse_condition (code);
8062 out = emit_store_flag (out, code, ix86_compare_op0,
8063 ix86_compare_op1, VOIDmode, 0, 1);
8065 out = expand_simple_binop (mode, PLUS,
8067 out, 1, OPTAB_DIRECT);
8068 out = expand_simple_binop (mode, AND,
8070 GEN_INT (trunc_int_for_mode
8072 out, 1, OPTAB_DIRECT);
8073 out = expand_simple_binop (mode, PLUS,
8075 out, 1, OPTAB_DIRECT);
8076 if (out != operands[0])
8077 emit_move_insn (operands[0], out);
8079 return 1; /* DONE */
8085 /* Try a few things more with specific constants and a variable. */
8088 rtx var, orig_out, out, tmp;
8091 return 0; /* FAIL */
8093 /* If one of the two operands is an interesting constant, load a
8094 constant with the above and mask it in with a logical operation. */
8096 if (GET_CODE (operands[2]) == CONST_INT)
8099 if (INTVAL (operands[2]) == 0)
8100 operands[3] = constm1_rtx, op = and_optab;
8101 else if (INTVAL (operands[2]) == -1)
8102 operands[3] = const0_rtx, op = ior_optab;
8104 return 0; /* FAIL */
8106 else if (GET_CODE (operands[3]) == CONST_INT)
8109 if (INTVAL (operands[3]) == 0)
8110 operands[2] = constm1_rtx, op = and_optab;
8111 else if (INTVAL (operands[3]) == -1)
8112 operands[2] = const0_rtx, op = ior_optab;
8114 return 0; /* FAIL */
8117 return 0; /* FAIL */
8119 orig_out = operands[0];
8120 tmp = gen_reg_rtx (mode);
8123 /* Recurse to get the constant loaded. */
8124 if (ix86_expand_int_movcc (operands) == 0)
8125 return 0; /* FAIL */
8127 /* Mask in the interesting variable. */
8128 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8130 if (out != orig_out)
8131 emit_move_insn (orig_out, out);
8133 return 1; /* DONE */
8137 * For comparison with above,
8147 if (! nonimmediate_operand (operands[2], mode))
8148 operands[2] = force_reg (mode, operands[2]);
8149 if (! nonimmediate_operand (operands[3], mode))
8150 operands[3] = force_reg (mode, operands[3]);
8152 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8154 rtx tmp = gen_reg_rtx (mode);
8155 emit_move_insn (tmp, operands[3]);
8158 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8160 rtx tmp = gen_reg_rtx (mode);
8161 emit_move_insn (tmp, operands[2]);
8164 if (! register_operand (operands[2], VOIDmode)
8165 && ! register_operand (operands[3], VOIDmode))
8166 operands[2] = force_reg (mode, operands[2]);
8168 emit_insn (compare_seq);
8169 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8170 gen_rtx_IF_THEN_ELSE (mode,
8171 compare_op, operands[2],
8174 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8175 gen_rtx_IF_THEN_ELSE (mode,
8180 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8181 gen_rtx_IF_THEN_ELSE (mode,
8186 return 1; /* DONE */
8190 ix86_expand_fp_movcc (operands)
8195 rtx compare_op, second_test, bypass_test;
8197 /* For SF/DFmode conditional moves based on comparisons
8198 in same mode, we may want to use SSE min/max instructions. */
8199 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8200 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8201 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8202 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8204 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8205 /* We may be called from the post-reload splitter. */
8206 && (!REG_P (operands[0])
8207 || SSE_REG_P (operands[0])
8208 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8210 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8211 code = GET_CODE (operands[1]);
8213 /* See if we have (cross) match between comparison operands and
8214 conditional move operands. */
8215 if (rtx_equal_p (operands[2], op1))
8220 code = reverse_condition_maybe_unordered (code);
8222 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8224 /* Check for min operation. */
8227 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8228 if (memory_operand (op0, VOIDmode))
8229 op0 = force_reg (GET_MODE (operands[0]), op0);
8230 if (GET_MODE (operands[0]) == SFmode)
8231 emit_insn (gen_minsf3 (operands[0], op0, op1));
8233 emit_insn (gen_mindf3 (operands[0], op0, op1));
8236 /* Check for max operation. */
8239 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8240 if (memory_operand (op0, VOIDmode))
8241 op0 = force_reg (GET_MODE (operands[0]), op0);
8242 if (GET_MODE (operands[0]) == SFmode)
8243 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8245 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8249 /* Manage condition to be sse_comparison_operator. In case we are
8250 in non-ieee mode, try to canonicalize the destination operand
8251 to be first in the comparison - this helps reload to avoid extra
8253 if (!sse_comparison_operator (operands[1], VOIDmode)
8254 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8256 rtx tmp = ix86_compare_op0;
8257 ix86_compare_op0 = ix86_compare_op1;
8258 ix86_compare_op1 = tmp;
8259 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8260 VOIDmode, ix86_compare_op0,
8263 /* Similary try to manage result to be first operand of conditional
8264 move. We also don't support the NE comparison on SSE, so try to
8266 if ((rtx_equal_p (operands[0], operands[3])
8267 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8268 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8270 rtx tmp = operands[2];
8271 operands[2] = operands[3];
8273 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8274 (GET_CODE (operands[1])),
8275 VOIDmode, ix86_compare_op0,
8278 if (GET_MODE (operands[0]) == SFmode)
8279 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8280 operands[2], operands[3],
8281 ix86_compare_op0, ix86_compare_op1));
8283 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8284 operands[2], operands[3],
8285 ix86_compare_op0, ix86_compare_op1));
8289 /* The floating point conditional move instructions don't directly
8290 support conditions resulting from a signed integer comparison. */
8292 code = GET_CODE (operands[1]);
8293 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8295 /* The floating point conditional move instructions don't directly
8296 support signed integer comparisons. */
8298 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8300 if (second_test != NULL || bypass_test != NULL)
8302 tmp = gen_reg_rtx (QImode);
8303 ix86_expand_setcc (code, tmp);
8305 ix86_compare_op0 = tmp;
8306 ix86_compare_op1 = const0_rtx;
8307 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8309 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8311 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8312 emit_move_insn (tmp, operands[3]);
8315 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8317 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8318 emit_move_insn (tmp, operands[2]);
8322 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8323 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8328 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8329 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8334 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8335 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8343 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8344 works for floating pointer parameters and nonoffsetable memories.
8345 For pushes, it returns just stack offsets; the values will be saved
8346 in the right order. Maximally three parts are generated. */
8349 ix86_split_to_parts (operand, parts, mode)
8352 enum machine_mode mode;
8357 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8359 size = (GET_MODE_SIZE (mode) + 4) / 8;
8361 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8363 if (size < 2 || size > 3)
8366 /* Optimize constant pool reference to immediates. This is used by fp moves,
8367 that force all constants to memory to allow combining. */
8369 if (GET_CODE (operand) == MEM
8370 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8371 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8372 operand = get_pool_constant (XEXP (operand, 0));
8374 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8376 /* The only non-offsetable memories we handle are pushes. */
8377 if (! push_operand (operand, VOIDmode))
8380 operand = copy_rtx (operand);
8381 PUT_MODE (operand, Pmode);
8382 parts[0] = parts[1] = parts[2] = operand;
8384 else if (!TARGET_64BIT)
8387 split_di (&operand, 1, &parts[0], &parts[1]);
8390 if (REG_P (operand))
8392 if (!reload_completed)
8394 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8395 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8397 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8399 else if (offsettable_memref_p (operand))
8401 operand = adjust_address (operand, SImode, 0);
8403 parts[1] = adjust_address (operand, SImode, 4);
8405 parts[2] = adjust_address (operand, SImode, 8);
8407 else if (GET_CODE (operand) == CONST_DOUBLE)
8412 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8417 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8418 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8421 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8426 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8427 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8436 split_ti (&operand, 1, &parts[0], &parts[1]);
8437 if (mode == XFmode || mode == TFmode)
8439 if (REG_P (operand))
8441 if (!reload_completed)
8443 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8444 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8446 else if (offsettable_memref_p (operand))
8448 operand = adjust_address (operand, DImode, 0);
8450 parts[1] = adjust_address (operand, SImode, 8);
8452 else if (GET_CODE (operand) == CONST_DOUBLE)
8457 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8458 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8459 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8460 if (HOST_BITS_PER_WIDE_INT >= 64)
8462 = GEN_INT (trunc_int_for_mode
8463 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8464 + ((((HOST_WIDE_INT)l[1]) << 31) << 1),
8467 parts[0] = immed_double_const (l[0], l[1], DImode);
8468 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8478 /* Emit insns to perform a move or push of DI, DF, and XF values.
8479 Return false when normal moves are needed; true when all required
8480 insns have been emitted. Operands 2-4 contain the input values
8481 int the correct order; operands 5-7 contain the output values. */
8484 ix86_split_long_move (operands)
8491 enum machine_mode mode = GET_MODE (operands[0]);
8493 /* The DFmode expanders may ask us to move double.
8494 For 64bit target this is single move. By hiding the fact
8495 here we simplify i386.md splitters. */
8496 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8498 /* Optimize constant pool reference to immediates. This is used by
8499 fp moves, that force all constants to memory to allow combining. */
8501 if (GET_CODE (operands[1]) == MEM
8502 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8503 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8504 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8505 if (push_operand (operands[0], VOIDmode))
8507 operands[0] = copy_rtx (operands[0]);
8508 PUT_MODE (operands[0], Pmode);
8511 operands[0] = gen_lowpart (DImode, operands[0]);
8512 operands[1] = gen_lowpart (DImode, operands[1]);
8513 emit_move_insn (operands[0], operands[1]);
8517 /* The only non-offsettable memory we handle is push. */
8518 if (push_operand (operands[0], VOIDmode))
8520 else if (GET_CODE (operands[0]) == MEM
8521 && ! offsettable_memref_p (operands[0]))
8524 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8525 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8527 /* When emitting push, take care for source operands on the stack. */
8528 if (push && GET_CODE (operands[1]) == MEM
8529 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8532 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8533 XEXP (part[1][2], 0));
8534 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8535 XEXP (part[1][1], 0));
8538 /* We need to do copy in the right order in case an address register
8539 of the source overlaps the destination. */
8540 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8542 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8544 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8547 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8550 /* Collision in the middle part can be handled by reordering. */
8551 if (collisions == 1 && nparts == 3
8552 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8555 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8556 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8559 /* If there are more collisions, we can't handle it by reordering.
8560 Do an lea to the last part and use only one colliding move. */
8561 else if (collisions > 1)
8564 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8565 XEXP (part[1][0], 0)));
8566 part[1][0] = change_address (part[1][0],
8567 TARGET_64BIT ? DImode : SImode,
8568 part[0][nparts - 1]);
8569 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8571 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8581 /* We use only first 12 bytes of TFmode value, but for pushing we
8582 are required to adjust stack as if we were pushing real 16byte
8584 if (mode == TFmode && !TARGET_64BIT)
8585 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8587 emit_move_insn (part[0][2], part[1][2]);
8592 /* In 64bit mode we don't have 32bit push available. In case this is
8593 register, it is OK - we will just use larger counterpart. We also
8594 retype memory - these comes from attempt to avoid REX prefix on
8595 moving of second half of TFmode value. */
8596 if (GET_MODE (part[1][1]) == SImode)
8598 if (GET_CODE (part[1][1]) == MEM)
8599 part[1][1] = adjust_address (part[1][1], DImode, 0);
8600 else if (REG_P (part[1][1]))
8601 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8604 if (GET_MODE (part[1][0]) == SImode)
8605 part[1][0] = part[1][1];
8608 emit_move_insn (part[0][1], part[1][1]);
8609 emit_move_insn (part[0][0], part[1][0]);
8613 /* Choose correct order to not overwrite the source before it is copied. */
8614 if ((REG_P (part[0][0])
8615 && REG_P (part[1][1])
8616 && (REGNO (part[0][0]) == REGNO (part[1][1])
8618 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8620 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8624 operands[2] = part[0][2];
8625 operands[3] = part[0][1];
8626 operands[4] = part[0][0];
8627 operands[5] = part[1][2];
8628 operands[6] = part[1][1];
8629 operands[7] = part[1][0];
8633 operands[2] = part[0][1];
8634 operands[3] = part[0][0];
8635 operands[5] = part[1][1];
8636 operands[6] = part[1][0];
8643 operands[2] = part[0][0];
8644 operands[3] = part[0][1];
8645 operands[4] = part[0][2];
8646 operands[5] = part[1][0];
8647 operands[6] = part[1][1];
8648 operands[7] = part[1][2];
8652 operands[2] = part[0][0];
8653 operands[3] = part[0][1];
8654 operands[5] = part[1][0];
8655 operands[6] = part[1][1];
8658 emit_move_insn (operands[2], operands[5]);
8659 emit_move_insn (operands[3], operands[6]);
8661 emit_move_insn (operands[4], operands[7]);
8667 ix86_split_ashldi (operands, scratch)
8668 rtx *operands, scratch;
8670 rtx low[2], high[2];
8673 if (GET_CODE (operands[2]) == CONST_INT)
8675 split_di (operands, 2, low, high);
8676 count = INTVAL (operands[2]) & 63;
8680 emit_move_insn (high[0], low[1]);
8681 emit_move_insn (low[0], const0_rtx);
8684 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8688 if (!rtx_equal_p (operands[0], operands[1]))
8689 emit_move_insn (operands[0], operands[1]);
8690 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8691 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8696 if (!rtx_equal_p (operands[0], operands[1]))
8697 emit_move_insn (operands[0], operands[1]);
8699 split_di (operands, 1, low, high);
8701 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8702 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8704 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8706 if (! no_new_pseudos)
8707 scratch = force_reg (SImode, const0_rtx);
8709 emit_move_insn (scratch, const0_rtx);
8711 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8715 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8720 ix86_split_ashrdi (operands, scratch)
8721 rtx *operands, scratch;
8723 rtx low[2], high[2];
8726 if (GET_CODE (operands[2]) == CONST_INT)
8728 split_di (operands, 2, low, high);
8729 count = INTVAL (operands[2]) & 63;
8733 emit_move_insn (low[0], high[1]);
8735 if (! reload_completed)
8736 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8739 emit_move_insn (high[0], low[0]);
8740 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8744 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8748 if (!rtx_equal_p (operands[0], operands[1]))
8749 emit_move_insn (operands[0], operands[1]);
8750 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8751 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8756 if (!rtx_equal_p (operands[0], operands[1]))
8757 emit_move_insn (operands[0], operands[1]);
8759 split_di (operands, 1, low, high);
8761 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8762 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8764 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8766 if (! no_new_pseudos)
8767 scratch = gen_reg_rtx (SImode);
8768 emit_move_insn (scratch, high[0]);
8769 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8770 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8774 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8779 ix86_split_lshrdi (operands, scratch)
8780 rtx *operands, scratch;
8782 rtx low[2], high[2];
8785 if (GET_CODE (operands[2]) == CONST_INT)
8787 split_di (operands, 2, low, high);
8788 count = INTVAL (operands[2]) & 63;
8792 emit_move_insn (low[0], high[1]);
8793 emit_move_insn (high[0], const0_rtx);
8796 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8800 if (!rtx_equal_p (operands[0], operands[1]))
8801 emit_move_insn (operands[0], operands[1]);
8802 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8803 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8808 if (!rtx_equal_p (operands[0], operands[1]))
8809 emit_move_insn (operands[0], operands[1]);
8811 split_di (operands, 1, low, high);
8813 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8814 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8816 /* Heh. By reversing the arguments, we can reuse this pattern. */
8817 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8819 if (! no_new_pseudos)
8820 scratch = force_reg (SImode, const0_rtx);
8822 emit_move_insn (scratch, const0_rtx);
8824 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8828 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8832 /* Helper function for the string operations below. Dest VARIABLE whether
8833 it is aligned to VALUE bytes. If true, jump to the label. */
8835 ix86_expand_aligntest (variable, value)
8839 rtx label = gen_label_rtx ();
8840 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8841 if (GET_MODE (variable) == DImode)
8842 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8844 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8845 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8850 /* Adjust COUNTER by the VALUE. */
8852 ix86_adjust_counter (countreg, value)
8854 HOST_WIDE_INT value;
8856 if (GET_MODE (countreg) == DImode)
8857 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8859 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8862 /* Zero extend possibly SImode EXP to Pmode register. */
8864 ix86_zero_extend_to_Pmode (exp)
8868 if (GET_MODE (exp) == VOIDmode)
8869 return force_reg (Pmode, exp);
8870 if (GET_MODE (exp) == Pmode)
8871 return copy_to_mode_reg (Pmode, exp);
8872 r = gen_reg_rtx (Pmode);
8873 emit_insn (gen_zero_extendsidi2 (r, exp));
8877 /* Expand string move (memcpy) operation. Use i386 string operations when
8878 profitable. expand_clrstr contains similar code. */
8880 ix86_expand_movstr (dst, src, count_exp, align_exp)
8881 rtx dst, src, count_exp, align_exp;
8883 rtx srcreg, destreg, countreg;
8884 enum machine_mode counter_mode;
8885 HOST_WIDE_INT align = 0;
8886 unsigned HOST_WIDE_INT count = 0;
8891 if (GET_CODE (align_exp) == CONST_INT)
8892 align = INTVAL (align_exp);
8894 /* This simple hack avoids all inlining code and simplifies code below. */
8895 if (!TARGET_ALIGN_STRINGOPS)
8898 if (GET_CODE (count_exp) == CONST_INT)
8899 count = INTVAL (count_exp);
8901 /* Figure out proper mode for counter. For 32bits it is always SImode,
8902 for 64bits use SImode when possible, otherwise DImode.
8903 Set count to number of bytes copied when known at compile time. */
8904 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8905 || x86_64_zero_extended_value (count_exp))
8906 counter_mode = SImode;
8908 counter_mode = DImode;
8910 if (counter_mode != SImode && counter_mode != DImode)
8913 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
8914 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
8916 emit_insn (gen_cld ());
8918 /* When optimizing for size emit simple rep ; movsb instruction for
8919 counts not divisible by 4. */
8921 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
8923 countreg = ix86_zero_extend_to_Pmode (count_exp);
8925 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
8926 destreg, srcreg, countreg));
8928 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
8929 destreg, srcreg, countreg));
8932 /* For constant aligned (or small unaligned) copies use rep movsl
8933 followed by code copying the rest. For PentiumPro ensure 8 byte
8934 alignment to allow rep movsl acceleration. */
8938 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
8939 || optimize_size || count < (unsigned int)64))
8941 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
8942 if (count & ~(size - 1))
8944 countreg = copy_to_mode_reg (counter_mode,
8945 GEN_INT ((count >> (size == 4 ? 2 : 3))
8946 & (TARGET_64BIT ? -1 : 0x3fffffff)));
8947 countreg = ix86_zero_extend_to_Pmode (countreg);
8951 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
8952 destreg, srcreg, countreg));
8954 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
8955 destreg, srcreg, countreg));
8958 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
8959 destreg, srcreg, countreg));
8961 if (size == 8 && (count & 0x04))
8962 emit_insn (gen_strmovsi (destreg, srcreg));
8964 emit_insn (gen_strmovhi (destreg, srcreg));
8966 emit_insn (gen_strmovqi (destreg, srcreg));
8968 /* The generic code based on the glibc implementation:
8969 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
8970 allowing accelerated copying there)
8971 - copy the data using rep movsl
8978 /* In case we don't know anything about the alignment, default to
8979 library version, since it is usually equally fast and result in
8981 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
8987 if (TARGET_SINGLE_STRINGOP)
8988 emit_insn (gen_cld ());
8990 countreg2 = gen_reg_rtx (Pmode);
8991 countreg = copy_to_mode_reg (counter_mode, count_exp);
8993 /* We don't use loops to align destination and to copy parts smaller
8994 than 4 bytes, because gcc is able to optimize such code better (in
8995 the case the destination or the count really is aligned, gcc is often
8996 able to predict the branches) and also it is friendlier to the
8997 hardware branch prediction.
8999 Using loops is benefical for generic case, because we can
9000 handle small counts using the loops. Many CPUs (such as Athlon)
9001 have large REP prefix setup costs.
9003 This is quite costy. Maybe we can revisit this decision later or
9004 add some customizability to this code. */
9007 && align < (TARGET_PENTIUMPRO && (count == 0
9008 || count >= (unsigned int)260)
9009 ? 8 : UNITS_PER_WORD))
9011 label = gen_label_rtx ();
9012 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9013 LEU, 0, counter_mode, 1, label);
9017 rtx label = ix86_expand_aligntest (destreg, 1);
9018 emit_insn (gen_strmovqi (destreg, srcreg));
9019 ix86_adjust_counter (countreg, 1);
9021 LABEL_NUSES (label) = 1;
9025 rtx label = ix86_expand_aligntest (destreg, 2);
9026 emit_insn (gen_strmovhi (destreg, srcreg));
9027 ix86_adjust_counter (countreg, 2);
9029 LABEL_NUSES (label) = 1;
9032 && ((TARGET_PENTIUMPRO && (count == 0
9033 || count >= (unsigned int)260))
9036 rtx label = ix86_expand_aligntest (destreg, 4);
9037 emit_insn (gen_strmovsi (destreg, srcreg));
9038 ix86_adjust_counter (countreg, 4);
9040 LABEL_NUSES (label) = 1;
9043 if (!TARGET_SINGLE_STRINGOP)
9044 emit_insn (gen_cld ());
9047 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9049 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9050 destreg, srcreg, countreg2));
9054 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9055 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9056 destreg, srcreg, countreg2));
9062 LABEL_NUSES (label) = 1;
9064 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9065 emit_insn (gen_strmovsi (destreg, srcreg));
9066 if ((align <= 4 || count == 0) && TARGET_64BIT)
9068 rtx label = ix86_expand_aligntest (countreg, 4);
9069 emit_insn (gen_strmovsi (destreg, srcreg));
9071 LABEL_NUSES (label) = 1;
9073 if (align > 2 && count != 0 && (count & 2))
9074 emit_insn (gen_strmovhi (destreg, srcreg));
9075 if (align <= 2 || count == 0)
9077 rtx label = ix86_expand_aligntest (countreg, 2);
9078 emit_insn (gen_strmovhi (destreg, srcreg));
9080 LABEL_NUSES (label) = 1;
9082 if (align > 1 && count != 0 && (count & 1))
9083 emit_insn (gen_strmovqi (destreg, srcreg));
9084 if (align <= 1 || count == 0)
9086 rtx label = ix86_expand_aligntest (countreg, 1);
9087 emit_insn (gen_strmovqi (destreg, srcreg));
9089 LABEL_NUSES (label) = 1;
9093 insns = get_insns ();
9096 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9101 /* Expand string clear operation (bzero). Use i386 string operations when
9102 profitable. expand_movstr contains similar code. */
9104 ix86_expand_clrstr (src, count_exp, align_exp)
9105 rtx src, count_exp, align_exp;
9107 rtx destreg, zeroreg, countreg;
9108 enum machine_mode counter_mode;
9109 HOST_WIDE_INT align = 0;
9110 unsigned HOST_WIDE_INT count = 0;
9112 if (GET_CODE (align_exp) == CONST_INT)
9113 align = INTVAL (align_exp);
9115 /* This simple hack avoids all inlining code and simplifies code below. */
9116 if (!TARGET_ALIGN_STRINGOPS)
9119 if (GET_CODE (count_exp) == CONST_INT)
9120 count = INTVAL (count_exp);
9121 /* Figure out proper mode for counter. For 32bits it is always SImode,
9122 for 64bits use SImode when possible, otherwise DImode.
9123 Set count to number of bytes copied when known at compile time. */
9124 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9125 || x86_64_zero_extended_value (count_exp))
9126 counter_mode = SImode;
9128 counter_mode = DImode;
9130 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9132 emit_insn (gen_cld ());
9134 /* When optimizing for size emit simple rep ; movsb instruction for
9135 counts not divisible by 4. */
9137 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9139 countreg = ix86_zero_extend_to_Pmode (count_exp);
9140 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9142 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9143 destreg, countreg));
9145 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9146 destreg, countreg));
9150 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9151 || optimize_size || count < (unsigned int)64))
9153 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9154 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9155 if (count & ~(size - 1))
9157 countreg = copy_to_mode_reg (counter_mode,
9158 GEN_INT ((count >> (size == 4 ? 2 : 3))
9159 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9160 countreg = ix86_zero_extend_to_Pmode (countreg);
9164 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9165 destreg, countreg));
9167 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9168 destreg, countreg));
9171 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9172 destreg, countreg));
9174 if (size == 8 && (count & 0x04))
9175 emit_insn (gen_strsetsi (destreg,
9176 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9178 emit_insn (gen_strsethi (destreg,
9179 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9181 emit_insn (gen_strsetqi (destreg,
9182 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9189 /* In case we don't know anything about the alignment, default to
9190 library version, since it is usually equally fast and result in
9192 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9195 if (TARGET_SINGLE_STRINGOP)
9196 emit_insn (gen_cld ());
9198 countreg2 = gen_reg_rtx (Pmode);
9199 countreg = copy_to_mode_reg (counter_mode, count_exp);
9200 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9203 && align < (TARGET_PENTIUMPRO && (count == 0
9204 || count >= (unsigned int)260)
9205 ? 8 : UNITS_PER_WORD))
9207 label = gen_label_rtx ();
9208 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9209 LEU, 0, counter_mode, 1, label);
9213 rtx label = ix86_expand_aligntest (destreg, 1);
9214 emit_insn (gen_strsetqi (destreg,
9215 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9216 ix86_adjust_counter (countreg, 1);
9218 LABEL_NUSES (label) = 1;
9222 rtx label = ix86_expand_aligntest (destreg, 2);
9223 emit_insn (gen_strsethi (destreg,
9224 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9225 ix86_adjust_counter (countreg, 2);
9227 LABEL_NUSES (label) = 1;
9229 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9230 || count >= (unsigned int)260))
9232 rtx label = ix86_expand_aligntest (destreg, 4);
9233 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9234 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9236 ix86_adjust_counter (countreg, 4);
9238 LABEL_NUSES (label) = 1;
9241 if (!TARGET_SINGLE_STRINGOP)
9242 emit_insn (gen_cld ());
9245 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9247 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9248 destreg, countreg2));
9252 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9253 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9254 destreg, countreg2));
9260 LABEL_NUSES (label) = 1;
9262 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9263 emit_insn (gen_strsetsi (destreg,
9264 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9265 if (TARGET_64BIT && (align <= 4 || count == 0))
9267 rtx label = ix86_expand_aligntest (destreg, 2);
9268 emit_insn (gen_strsetsi (destreg,
9269 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9271 LABEL_NUSES (label) = 1;
9273 if (align > 2 && count != 0 && (count & 2))
9274 emit_insn (gen_strsethi (destreg,
9275 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9276 if (align <= 2 || count == 0)
9278 rtx label = ix86_expand_aligntest (destreg, 2);
9279 emit_insn (gen_strsethi (destreg,
9280 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9282 LABEL_NUSES (label) = 1;
9284 if (align > 1 && count != 0 && (count & 1))
9285 emit_insn (gen_strsetqi (destreg,
9286 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9287 if (align <= 1 || count == 0)
9289 rtx label = ix86_expand_aligntest (destreg, 1);
9290 emit_insn (gen_strsetqi (destreg,
9291 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9293 LABEL_NUSES (label) = 1;
9298 /* Expand strlen. */
9300 ix86_expand_strlen (out, src, eoschar, align)
9301 rtx out, src, eoschar, align;
9303 rtx addr, scratch1, scratch2, scratch3, scratch4;
9305 /* The generic case of strlen expander is long. Avoid it's
9306 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9308 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9309 && !TARGET_INLINE_ALL_STRINGOPS
9311 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9314 addr = force_reg (Pmode, XEXP (src, 0));
9315 scratch1 = gen_reg_rtx (Pmode);
9317 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9320 /* Well it seems that some optimizer does not combine a call like
9321 foo(strlen(bar), strlen(bar));
9322 when the move and the subtraction is done here. It does calculate
9323 the length just once when these instructions are done inside of
9324 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9325 often used and I use one fewer register for the lifetime of
9326 output_strlen_unroll() this is better. */
9328 emit_move_insn (out, addr);
9330 ix86_expand_strlensi_unroll_1 (out, align);
9332 /* strlensi_unroll_1 returns the address of the zero at the end of
9333 the string, like memchr(), so compute the length by subtracting
9334 the start address. */
9336 emit_insn (gen_subdi3 (out, out, addr));
9338 emit_insn (gen_subsi3 (out, out, addr));
9342 scratch2 = gen_reg_rtx (Pmode);
9343 scratch3 = gen_reg_rtx (Pmode);
9344 scratch4 = force_reg (Pmode, constm1_rtx);
9346 emit_move_insn (scratch3, addr);
9347 eoschar = force_reg (QImode, eoschar);
9349 emit_insn (gen_cld ());
9352 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9353 align, scratch4, scratch3));
9354 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9355 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9359 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9360 align, scratch4, scratch3));
9361 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9362 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9368 /* Expand the appropriate insns for doing strlen if not just doing
9371 out = result, initialized with the start address
9372 align_rtx = alignment of the address.
9373 scratch = scratch register, initialized with the startaddress when
9374 not aligned, otherwise undefined
9376 This is just the body. It needs the initialisations mentioned above and
9377 some address computing at the end. These things are done in i386.md. */
9380 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9385 rtx align_2_label = NULL_RTX;
9386 rtx align_3_label = NULL_RTX;
9387 rtx align_4_label = gen_label_rtx ();
9388 rtx end_0_label = gen_label_rtx ();
9390 rtx tmpreg = gen_reg_rtx (SImode);
9391 rtx scratch = gen_reg_rtx (SImode);
9394 if (GET_CODE (align_rtx) == CONST_INT)
9395 align = INTVAL (align_rtx);
9397 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9399 /* Is there a known alignment and is it less than 4? */
9402 rtx scratch1 = gen_reg_rtx (Pmode);
9403 emit_move_insn (scratch1, out);
9404 /* Is there a known alignment and is it not 2? */
9407 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9408 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9410 /* Leave just the 3 lower bits. */
9411 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9412 NULL_RTX, 0, OPTAB_WIDEN);
9414 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9415 Pmode, 1, align_4_label);
9416 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9417 Pmode, 1, align_2_label);
9418 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9419 Pmode, 1, align_3_label);
9423 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9424 check if is aligned to 4 - byte. */
9426 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9427 NULL_RTX, 0, OPTAB_WIDEN);
9429 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9430 Pmode, 1, align_4_label);
9433 mem = gen_rtx_MEM (QImode, out);
9435 /* Now compare the bytes. */
9437 /* Compare the first n unaligned byte on a byte per byte basis. */
9438 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9439 QImode, 1, end_0_label);
9441 /* Increment the address. */
9443 emit_insn (gen_adddi3 (out, out, const1_rtx));
9445 emit_insn (gen_addsi3 (out, out, const1_rtx));
9447 /* Not needed with an alignment of 2 */
9450 emit_label (align_2_label);
9452 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9456 emit_insn (gen_adddi3 (out, out, const1_rtx));
9458 emit_insn (gen_addsi3 (out, out, const1_rtx));
9460 emit_label (align_3_label);
9463 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9467 emit_insn (gen_adddi3 (out, out, const1_rtx));
9469 emit_insn (gen_addsi3 (out, out, const1_rtx));
9472 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9473 align this loop. It gives only huge programs, but does not help to
9475 emit_label (align_4_label);
9477 mem = gen_rtx_MEM (SImode, out);
9478 emit_move_insn (scratch, mem);
9480 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9482 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9484 /* This formula yields a nonzero result iff one of the bytes is zero.
9485 This saves three branches inside loop and many cycles. */
9487 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9488 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9489 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9490 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9491 GEN_INT (trunc_int_for_mode
9492 (0x80808080, SImode))));
9493 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9498 rtx reg = gen_reg_rtx (SImode);
9499 rtx reg2 = gen_reg_rtx (Pmode);
9500 emit_move_insn (reg, tmpreg);
9501 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9503 /* If zero is not in the first two bytes, move two bytes forward. */
9504 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9505 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9506 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9507 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9508 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9511 /* Emit lea manually to avoid clobbering of flags. */
9512 emit_insn (gen_rtx_SET (SImode, reg2,
9513 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9515 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9516 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9517 emit_insn (gen_rtx_SET (VOIDmode, out,
9518 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9525 rtx end_2_label = gen_label_rtx ();
9526 /* Is zero in the first two bytes? */
9528 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9529 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9530 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9531 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9532 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9534 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9535 JUMP_LABEL (tmp) = end_2_label;
9537 /* Not in the first two. Move two bytes forward. */
9538 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9540 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9542 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9544 emit_label (end_2_label);
9548 /* Avoid branch in fixing the byte. */
9549 tmpreg = gen_lowpart (QImode, tmpreg);
9550 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9552 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9554 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9556 emit_label (end_0_label);
9559 /* Clear stack slot assignments remembered from previous functions.
9560 This is called from INIT_EXPANDERS once before RTL is emitted for each
9564 ix86_init_machine_status (p)
9567 p->machine = (struct machine_function *)
9568 xcalloc (1, sizeof (struct machine_function));
9571 /* Mark machine specific bits of P for GC. */
9573 ix86_mark_machine_status (p)
9576 struct machine_function *machine = p->machine;
9577 enum machine_mode mode;
9583 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9584 mode = (enum machine_mode) ((int) mode + 1))
9585 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9586 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9590 ix86_free_machine_status (p)
9597 /* Return a MEM corresponding to a stack slot with mode MODE.
9598 Allocate a new slot if necessary.
9600 The RTL for a function can have several slots available: N is
9601 which slot to use. */
9604 assign_386_stack_local (mode, n)
9605 enum machine_mode mode;
9608 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9611 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9612 ix86_stack_locals[(int) mode][n]
9613 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9615 return ix86_stack_locals[(int) mode][n];
9618 /* Calculate the length of the memory address in the instruction
9619 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9622 memory_address_length (addr)
9625 struct ix86_address parts;
9626 rtx base, index, disp;
9629 if (GET_CODE (addr) == PRE_DEC
9630 || GET_CODE (addr) == POST_INC
9631 || GET_CODE (addr) == PRE_MODIFY
9632 || GET_CODE (addr) == POST_MODIFY)
9635 if (! ix86_decompose_address (addr, &parts))
9639 index = parts.index;
9643 /* Register Indirect. */
9644 if (base && !index && !disp)
9646 /* Special cases: ebp and esp need the two-byte modrm form. */
9647 if (addr == stack_pointer_rtx
9648 || addr == arg_pointer_rtx
9649 || addr == frame_pointer_rtx
9650 || addr == hard_frame_pointer_rtx)
9654 /* Direct Addressing. */
9655 else if (disp && !base && !index)
9660 /* Find the length of the displacement constant. */
9663 if (GET_CODE (disp) == CONST_INT
9664 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9670 /* An index requires the two-byte modrm form. */
9678 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9679 expect that insn have 8bit immediate alternative. */
9681 ix86_attr_length_immediate_default (insn, shortform)
9687 extract_insn_cached (insn);
9688 for (i = recog_data.n_operands - 1; i >= 0; --i)
9689 if (CONSTANT_P (recog_data.operand[i]))
9694 && GET_CODE (recog_data.operand[i]) == CONST_INT
9695 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9699 switch (get_attr_mode (insn))
9710 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9715 fatal_insn ("unknown insn mode", insn);
9721 /* Compute default value for "length_address" attribute. */
9723 ix86_attr_length_address_default (insn)
9727 extract_insn_cached (insn);
9728 for (i = recog_data.n_operands - 1; i >= 0; --i)
9729 if (GET_CODE (recog_data.operand[i]) == MEM)
9731 return memory_address_length (XEXP (recog_data.operand[i], 0));
9737 /* Return the maximum number of instructions a cpu can issue. */
9744 case PROCESSOR_PENTIUM:
9748 case PROCESSOR_PENTIUMPRO:
9749 case PROCESSOR_PENTIUM4:
9750 case PROCESSOR_ATHLON:
9758 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9759 by DEP_INSN and nothing set by DEP_INSN. */
9762 ix86_flags_dependant (insn, dep_insn, insn_type)
9764 enum attr_type insn_type;
9768 /* Simplify the test for uninteresting insns. */
9769 if (insn_type != TYPE_SETCC
9770 && insn_type != TYPE_ICMOV
9771 && insn_type != TYPE_FCMOV
9772 && insn_type != TYPE_IBR)
9775 if ((set = single_set (dep_insn)) != 0)
9777 set = SET_DEST (set);
9780 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9781 && XVECLEN (PATTERN (dep_insn), 0) == 2
9782 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9783 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9785 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9786 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9791 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9794 /* This test is true if the dependent insn reads the flags but
9795 not any other potentially set register. */
9796 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9799 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9805 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9806 address with operands set by DEP_INSN. */
9809 ix86_agi_dependant (insn, dep_insn, insn_type)
9811 enum attr_type insn_type;
9815 if (insn_type == TYPE_LEA
9818 addr = PATTERN (insn);
9819 if (GET_CODE (addr) == SET)
9821 else if (GET_CODE (addr) == PARALLEL
9822 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9823 addr = XVECEXP (addr, 0, 0);
9826 addr = SET_SRC (addr);
9831 extract_insn_cached (insn);
9832 for (i = recog_data.n_operands - 1; i >= 0; --i)
9833 if (GET_CODE (recog_data.operand[i]) == MEM)
9835 addr = XEXP (recog_data.operand[i], 0);
9842 return modified_in_p (addr, dep_insn);
9846 ix86_adjust_cost (insn, link, dep_insn, cost)
9847 rtx insn, link, dep_insn;
9850 enum attr_type insn_type, dep_insn_type;
9851 enum attr_memory memory, dep_memory;
9853 int dep_insn_code_number;
9855 /* Anti and output depenancies have zero cost on all CPUs. */
9856 if (REG_NOTE_KIND (link) != 0)
9859 dep_insn_code_number = recog_memoized (dep_insn);
9861 /* If we can't recognize the insns, we can't really do anything. */
9862 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9865 insn_type = get_attr_type (insn);
9866 dep_insn_type = get_attr_type (dep_insn);
9870 case PROCESSOR_PENTIUM:
9871 /* Address Generation Interlock adds a cycle of latency. */
9872 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9875 /* ??? Compares pair with jump/setcc. */
9876 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9879 /* Floating point stores require value to be ready one cycle ealier. */
9880 if (insn_type == TYPE_FMOV
9881 && get_attr_memory (insn) == MEMORY_STORE
9882 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9886 case PROCESSOR_PENTIUMPRO:
9887 memory = get_attr_memory (insn);
9888 dep_memory = get_attr_memory (dep_insn);
9890 /* Since we can't represent delayed latencies of load+operation,
9891 increase the cost here for non-imov insns. */
9892 if (dep_insn_type != TYPE_IMOV
9893 && dep_insn_type != TYPE_FMOV
9894 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
9897 /* INT->FP conversion is expensive. */
9898 if (get_attr_fp_int_src (dep_insn))
9901 /* There is one cycle extra latency between an FP op and a store. */
9902 if (insn_type == TYPE_FMOV
9903 && (set = single_set (dep_insn)) != NULL_RTX
9904 && (set2 = single_set (insn)) != NULL_RTX
9905 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9906 && GET_CODE (SET_DEST (set2)) == MEM)
9909 /* Show ability of reorder buffer to hide latency of load by executing
9910 in parallel with previous instruction in case
9911 previous instruction is not needed to compute the address. */
9912 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9913 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9915 /* Claim moves to take one cycle, as core can issue one load
9916 at time and the next load can start cycle later. */
9917 if (dep_insn_type == TYPE_IMOV
9918 || dep_insn_type == TYPE_FMOV)
9926 memory = get_attr_memory (insn);
9927 dep_memory = get_attr_memory (dep_insn);
9928 /* The esp dependency is resolved before the instruction is really
9930 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
9931 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
9934 /* Since we can't represent delayed latencies of load+operation,
9935 increase the cost here for non-imov insns. */
9936 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9937 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
9939 /* INT->FP conversion is expensive. */
9940 if (get_attr_fp_int_src (dep_insn))
9943 /* Show ability of reorder buffer to hide latency of load by executing
9944 in parallel with previous instruction in case
9945 previous instruction is not needed to compute the address. */
9946 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9947 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9949 /* Claim moves to take one cycle, as core can issue one load
9950 at time and the next load can start cycle later. */
9951 if (dep_insn_type == TYPE_IMOV
9952 || dep_insn_type == TYPE_FMOV)
9961 case PROCESSOR_ATHLON:
9962 memory = get_attr_memory (insn);
9963 dep_memory = get_attr_memory (dep_insn);
9965 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9967 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
9972 /* Show ability of reorder buffer to hide latency of load by executing
9973 in parallel with previous instruction in case
9974 previous instruction is not needed to compute the address. */
9975 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9976 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9978 /* Claim moves to take one cycle, as core can issue one load
9979 at time and the next load can start cycle later. */
9980 if (dep_insn_type == TYPE_IMOV
9981 || dep_insn_type == TYPE_FMOV)
9998 struct ppro_sched_data
10001 int issued_this_cycle;
10006 ix86_safe_length (insn)
10009 if (recog_memoized (insn) >= 0)
10010 return get_attr_length(insn);
10016 ix86_safe_length_prefix (insn)
10019 if (recog_memoized (insn) >= 0)
10020 return get_attr_length(insn);
10025 static enum attr_memory
10026 ix86_safe_memory (insn)
10029 if (recog_memoized (insn) >= 0)
10030 return get_attr_memory(insn);
10032 return MEMORY_UNKNOWN;
10035 static enum attr_pent_pair
10036 ix86_safe_pent_pair (insn)
10039 if (recog_memoized (insn) >= 0)
10040 return get_attr_pent_pair(insn);
10042 return PENT_PAIR_NP;
10045 static enum attr_ppro_uops
10046 ix86_safe_ppro_uops (insn)
10049 if (recog_memoized (insn) >= 0)
10050 return get_attr_ppro_uops (insn);
10052 return PPRO_UOPS_MANY;
10056 ix86_dump_ppro_packet (dump)
10059 if (ix86_sched_data.ppro.decode[0])
10061 fprintf (dump, "PPRO packet: %d",
10062 INSN_UID (ix86_sched_data.ppro.decode[0]));
10063 if (ix86_sched_data.ppro.decode[1])
10064 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10065 if (ix86_sched_data.ppro.decode[2])
10066 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10067 fputc ('\n', dump);
10071 /* We're beginning a new block. Initialize data structures as necessary. */
10074 ix86_sched_init (dump, sched_verbose, veclen)
10075 FILE *dump ATTRIBUTE_UNUSED;
10076 int sched_verbose ATTRIBUTE_UNUSED;
10077 int veclen ATTRIBUTE_UNUSED;
10079 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10082 /* Shift INSN to SLOT, and shift everything else down. */
10085 ix86_reorder_insn (insnp, slot)
10092 insnp[0] = insnp[1];
10093 while (++insnp != slot);
10098 /* Find an instruction with given pairability and minimal amount of cycles
10099 lost by the fact that the CPU waits for both pipelines to finish before
10100 reading next instructions. Also take care that both instructions together
10101 can not exceed 7 bytes. */
10104 ix86_pent_find_pair (e_ready, ready, type, first)
10107 enum attr_pent_pair type;
10110 int mincycles, cycles;
10111 enum attr_pent_pair tmp;
10112 enum attr_memory memory;
10113 rtx *insnp, *bestinsnp = NULL;
10115 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10118 memory = ix86_safe_memory (first);
10119 cycles = result_ready_cost (first);
10120 mincycles = INT_MAX;
10122 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10123 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10124 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10126 enum attr_memory second_memory;
10127 int secondcycles, currentcycles;
10129 second_memory = ix86_safe_memory (*insnp);
10130 secondcycles = result_ready_cost (*insnp);
10131 currentcycles = abs (cycles - secondcycles);
10133 if (secondcycles >= 1 && cycles >= 1)
10135 /* Two read/modify/write instructions together takes two
10137 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10138 currentcycles += 2;
10140 /* Read modify/write instruction followed by read/modify
10141 takes one cycle longer. */
10142 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10143 && tmp != PENT_PAIR_UV
10144 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10145 currentcycles += 1;
10147 if (currentcycles < mincycles)
10148 bestinsnp = insnp, mincycles = currentcycles;
10154 /* Subroutines of ix86_sched_reorder. */
10157 ix86_sched_reorder_pentium (ready, e_ready)
10161 enum attr_pent_pair pair1, pair2;
10164 /* This wouldn't be necessary if Haifa knew that static insn ordering
10165 is important to which pipe an insn is issued to. So we have to make
10166 some minor rearrangements. */
10168 pair1 = ix86_safe_pent_pair (*e_ready);
10170 /* If the first insn is non-pairable, let it be. */
10171 if (pair1 == PENT_PAIR_NP)
10174 pair2 = PENT_PAIR_NP;
10177 /* If the first insn is UV or PV pairable, search for a PU
10178 insn to go with. */
10179 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10181 insnp = ix86_pent_find_pair (e_ready-1, ready,
10182 PENT_PAIR_PU, *e_ready);
10184 pair2 = PENT_PAIR_PU;
10187 /* If the first insn is PU or UV pairable, search for a PV
10188 insn to go with. */
10189 if (pair2 == PENT_PAIR_NP
10190 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10192 insnp = ix86_pent_find_pair (e_ready-1, ready,
10193 PENT_PAIR_PV, *e_ready);
10195 pair2 = PENT_PAIR_PV;
10198 /* If the first insn is pairable, search for a UV
10199 insn to go with. */
10200 if (pair2 == PENT_PAIR_NP)
10202 insnp = ix86_pent_find_pair (e_ready-1, ready,
10203 PENT_PAIR_UV, *e_ready);
10205 pair2 = PENT_PAIR_UV;
10208 if (pair2 == PENT_PAIR_NP)
10211 /* Found something! Decide if we need to swap the order. */
10212 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10213 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10214 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10215 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10216 ix86_reorder_insn (insnp, e_ready);
10218 ix86_reorder_insn (insnp, e_ready - 1);
10222 ix86_sched_reorder_ppro (ready, e_ready)
10227 enum attr_ppro_uops cur_uops;
10228 int issued_this_cycle;
10232 /* At this point .ppro.decode contains the state of the three
10233 decoders from last "cycle". That is, those insns that were
10234 actually independent. But here we're scheduling for the
10235 decoder, and we may find things that are decodable in the
10238 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10239 issued_this_cycle = 0;
10242 cur_uops = ix86_safe_ppro_uops (*insnp);
10244 /* If the decoders are empty, and we've a complex insn at the
10245 head of the priority queue, let it issue without complaint. */
10246 if (decode[0] == NULL)
10248 if (cur_uops == PPRO_UOPS_MANY)
10250 decode[0] = *insnp;
10254 /* Otherwise, search for a 2-4 uop unsn to issue. */
10255 while (cur_uops != PPRO_UOPS_FEW)
10257 if (insnp == ready)
10259 cur_uops = ix86_safe_ppro_uops (*--insnp);
10262 /* If so, move it to the head of the line. */
10263 if (cur_uops == PPRO_UOPS_FEW)
10264 ix86_reorder_insn (insnp, e_ready);
10266 /* Issue the head of the queue. */
10267 issued_this_cycle = 1;
10268 decode[0] = *e_ready--;
10271 /* Look for simple insns to fill in the other two slots. */
10272 for (i = 1; i < 3; ++i)
10273 if (decode[i] == NULL)
10275 if (ready >= e_ready)
10279 cur_uops = ix86_safe_ppro_uops (*insnp);
10280 while (cur_uops != PPRO_UOPS_ONE)
10282 if (insnp == ready)
10284 cur_uops = ix86_safe_ppro_uops (*--insnp);
10287 /* Found one. Move it to the head of the queue and issue it. */
10288 if (cur_uops == PPRO_UOPS_ONE)
10290 ix86_reorder_insn (insnp, e_ready);
10291 decode[i] = *e_ready--;
10292 issued_this_cycle++;
10296 /* ??? Didn't find one. Ideally, here we would do a lazy split
10297 of 2-uop insns, issue one and queue the other. */
10301 if (issued_this_cycle == 0)
10302 issued_this_cycle = 1;
10303 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10306 /* We are about to being issuing insns for this clock cycle.
10307 Override the default sort algorithm to better slot instructions. */
10309 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10310 FILE *dump ATTRIBUTE_UNUSED;
10311 int sched_verbose ATTRIBUTE_UNUSED;
10314 int clock_var ATTRIBUTE_UNUSED;
10316 int n_ready = *n_readyp;
10317 rtx *e_ready = ready + n_ready - 1;
10327 case PROCESSOR_PENTIUM:
10328 ix86_sched_reorder_pentium (ready, e_ready);
10331 case PROCESSOR_PENTIUMPRO:
10332 ix86_sched_reorder_ppro (ready, e_ready);
10337 return ix86_issue_rate ();
10340 /* We are about to issue INSN. Return the number of insns left on the
10341 ready queue that can be issued this cycle. */
10344 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10348 int can_issue_more;
10354 return can_issue_more - 1;
10356 case PROCESSOR_PENTIUMPRO:
10358 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10360 if (uops == PPRO_UOPS_MANY)
10363 ix86_dump_ppro_packet (dump);
10364 ix86_sched_data.ppro.decode[0] = insn;
10365 ix86_sched_data.ppro.decode[1] = NULL;
10366 ix86_sched_data.ppro.decode[2] = NULL;
10368 ix86_dump_ppro_packet (dump);
10369 ix86_sched_data.ppro.decode[0] = NULL;
10371 else if (uops == PPRO_UOPS_FEW)
10374 ix86_dump_ppro_packet (dump);
10375 ix86_sched_data.ppro.decode[0] = insn;
10376 ix86_sched_data.ppro.decode[1] = NULL;
10377 ix86_sched_data.ppro.decode[2] = NULL;
10381 for (i = 0; i < 3; ++i)
10382 if (ix86_sched_data.ppro.decode[i] == NULL)
10384 ix86_sched_data.ppro.decode[i] = insn;
10392 ix86_dump_ppro_packet (dump);
10393 ix86_sched_data.ppro.decode[0] = NULL;
10394 ix86_sched_data.ppro.decode[1] = NULL;
10395 ix86_sched_data.ppro.decode[2] = NULL;
10399 return --ix86_sched_data.ppro.issued_this_cycle;
10403 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10404 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10408 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10410 rtx dstref, srcref, dstreg, srcreg;
10414 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10416 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10420 /* Subroutine of above to actually do the updating by recursively walking
10424 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10426 rtx dstref, srcref, dstreg, srcreg;
10428 enum rtx_code code = GET_CODE (x);
10429 const char *format_ptr = GET_RTX_FORMAT (code);
10432 if (code == MEM && XEXP (x, 0) == dstreg)
10433 MEM_COPY_ATTRIBUTES (x, dstref);
10434 else if (code == MEM && XEXP (x, 0) == srcreg)
10435 MEM_COPY_ATTRIBUTES (x, srcref);
10437 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10439 if (*format_ptr == 'e')
10440 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10442 else if (*format_ptr == 'E')
10443 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10444 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10449 /* Compute the alignment given to a constant that is being placed in memory.
10450 EXP is the constant and ALIGN is the alignment that the object would
10452 The value of this function is used instead of that alignment to align
10456 ix86_constant_alignment (exp, align)
10460 if (TREE_CODE (exp) == REAL_CST)
10462 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10464 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10467 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10474 /* Compute the alignment for a static variable.
10475 TYPE is the data type, and ALIGN is the alignment that
10476 the object would ordinarily have. The value of this function is used
10477 instead of that alignment to align the object. */
10480 ix86_data_alignment (type, align)
10484 if (AGGREGATE_TYPE_P (type)
10485 && TYPE_SIZE (type)
10486 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10487 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10488 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10491 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10492 to 16byte boundary. */
10495 if (AGGREGATE_TYPE_P (type)
10496 && TYPE_SIZE (type)
10497 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10498 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10499 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10503 if (TREE_CODE (type) == ARRAY_TYPE)
10505 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10507 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10510 else if (TREE_CODE (type) == COMPLEX_TYPE)
10513 if (TYPE_MODE (type) == DCmode && align < 64)
10515 if (TYPE_MODE (type) == XCmode && align < 128)
10518 else if ((TREE_CODE (type) == RECORD_TYPE
10519 || TREE_CODE (type) == UNION_TYPE
10520 || TREE_CODE (type) == QUAL_UNION_TYPE)
10521 && TYPE_FIELDS (type))
10523 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10525 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10528 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10529 || TREE_CODE (type) == INTEGER_TYPE)
10531 if (TYPE_MODE (type) == DFmode && align < 64)
10533 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10540 /* Compute the alignment for a local variable.
10541 TYPE is the data type, and ALIGN is the alignment that
10542 the object would ordinarily have. The value of this macro is used
10543 instead of that alignment to align the object. */
10546 ix86_local_alignment (type, align)
10550 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10551 to 16byte boundary. */
10554 if (AGGREGATE_TYPE_P (type)
10555 && TYPE_SIZE (type)
10556 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10557 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10558 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10561 if (TREE_CODE (type) == ARRAY_TYPE)
10563 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10565 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10568 else if (TREE_CODE (type) == COMPLEX_TYPE)
10570 if (TYPE_MODE (type) == DCmode && align < 64)
10572 if (TYPE_MODE (type) == XCmode && align < 128)
10575 else if ((TREE_CODE (type) == RECORD_TYPE
10576 || TREE_CODE (type) == UNION_TYPE
10577 || TREE_CODE (type) == QUAL_UNION_TYPE)
10578 && TYPE_FIELDS (type))
10580 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10582 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10585 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10586 || TREE_CODE (type) == INTEGER_TYPE)
10589 if (TYPE_MODE (type) == DFmode && align < 64)
10591 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10597 /* Emit RTL insns to initialize the variable parts of a trampoline.
10598 FNADDR is an RTX for the address of the function's pure code.
10599 CXT is an RTX for the static chain value for the function. */
10601 x86_initialize_trampoline (tramp, fnaddr, cxt)
10602 rtx tramp, fnaddr, cxt;
10606 /* Compute offset from the end of the jmp to the target function. */
10607 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10608 plus_constant (tramp, 10),
10609 NULL_RTX, 1, OPTAB_DIRECT);
10610 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10611 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10612 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10613 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10614 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10615 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10620 /* Try to load address using shorter movl instead of movabs.
10621 We may want to support movq for kernel mode, but kernel does not use
10622 trampolines at the moment. */
10623 if (x86_64_zero_extended_value (fnaddr))
10625 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10626 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10627 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10628 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10629 gen_lowpart (SImode, fnaddr));
10634 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10635 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10636 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10640 /* Load static chain using movabs to r10. */
10641 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10642 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10643 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10646 /* Jump to the r11 */
10647 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10648 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10649 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10650 GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10652 if (offset > TRAMPOLINE_SIZE)
10657 #define def_builtin(MASK, NAME, TYPE, CODE) \
10659 if ((MASK) & target_flags) \
10660 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10663 struct builtin_description
10665 const unsigned int mask;
10666 const enum insn_code icode;
10667 const char *const name;
10668 const enum ix86_builtins code;
10669 const enum rtx_code comparison;
10670 const unsigned int flag;
10673 static const struct builtin_description bdesc_comi[] =
10675 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10676 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10677 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10678 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10679 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10680 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10681 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10682 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10683 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10684 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10685 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10686 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10689 static const struct builtin_description bdesc_2arg[] =
10692 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10693 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10694 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10695 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10696 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10697 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10698 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10699 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10701 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10702 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10703 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10704 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10705 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10706 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10707 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10708 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10709 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10710 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10711 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10712 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10713 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10714 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10715 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10716 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10717 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10718 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10719 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10720 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10721 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10722 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10723 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10724 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10726 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10727 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10728 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10729 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10731 { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
10732 { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
10733 { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
10734 { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
10736 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10737 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10738 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10739 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10740 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10743 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10744 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10745 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10746 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10747 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10748 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10750 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10751 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10752 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10753 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10754 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10755 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10756 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10757 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10759 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10760 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10761 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10763 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10764 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10765 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10766 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10768 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10769 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10771 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10772 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10773 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10774 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10775 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10776 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10778 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10779 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10780 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10781 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10783 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10784 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10785 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10786 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10787 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10788 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10791 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10792 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10793 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10795 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10796 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10798 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10799 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10800 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10801 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10802 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10803 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10805 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10806 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10807 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10808 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10809 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10810 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10812 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10813 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10814 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10815 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10817 { MASK_SSE, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10818 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10822 static const struct builtin_description bdesc_1arg[] =
10824 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10825 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10827 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10828 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10829 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10831 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10832 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10833 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10834 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10839 ix86_init_builtins ()
10842 ix86_init_mmx_sse_builtins ();
10845 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10846 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10849 ix86_init_mmx_sse_builtins ()
10851 const struct builtin_description * d;
10853 tree endlink = void_list_node;
10855 tree pchar_type_node = build_pointer_type (char_type_node);
10856 tree pfloat_type_node = build_pointer_type (float_type_node);
10857 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10858 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10861 tree int_ftype_v4sf_v4sf
10862 = build_function_type (integer_type_node,
10863 tree_cons (NULL_TREE, V4SF_type_node,
10864 tree_cons (NULL_TREE,
10867 tree v4si_ftype_v4sf_v4sf
10868 = build_function_type (V4SI_type_node,
10869 tree_cons (NULL_TREE, V4SF_type_node,
10870 tree_cons (NULL_TREE,
10873 /* MMX/SSE/integer conversions. */
10874 tree int_ftype_v4sf
10875 = build_function_type (integer_type_node,
10876 tree_cons (NULL_TREE, V4SF_type_node,
10878 tree int_ftype_v8qi
10879 = build_function_type (integer_type_node,
10880 tree_cons (NULL_TREE, V8QI_type_node,
10882 tree int_ftype_v2si
10883 = build_function_type (integer_type_node,
10884 tree_cons (NULL_TREE, V2SI_type_node,
10886 tree v2si_ftype_int
10887 = build_function_type (V2SI_type_node,
10888 tree_cons (NULL_TREE, integer_type_node,
10890 tree v4sf_ftype_v4sf_int
10891 = build_function_type (V4SF_type_node,
10892 tree_cons (NULL_TREE, V4SF_type_node,
10893 tree_cons (NULL_TREE, integer_type_node,
10895 tree v4sf_ftype_v4sf_v2si
10896 = build_function_type (V4SF_type_node,
10897 tree_cons (NULL_TREE, V4SF_type_node,
10898 tree_cons (NULL_TREE, V2SI_type_node,
10900 tree int_ftype_v4hi_int
10901 = build_function_type (integer_type_node,
10902 tree_cons (NULL_TREE, V4HI_type_node,
10903 tree_cons (NULL_TREE, integer_type_node,
10905 tree v4hi_ftype_v4hi_int_int
10906 = build_function_type (V4HI_type_node,
10907 tree_cons (NULL_TREE, V4HI_type_node,
10908 tree_cons (NULL_TREE, integer_type_node,
10909 tree_cons (NULL_TREE,
10912 /* Miscellaneous. */
10913 tree v8qi_ftype_v4hi_v4hi
10914 = build_function_type (V8QI_type_node,
10915 tree_cons (NULL_TREE, V4HI_type_node,
10916 tree_cons (NULL_TREE, V4HI_type_node,
10918 tree v4hi_ftype_v2si_v2si
10919 = build_function_type (V4HI_type_node,
10920 tree_cons (NULL_TREE, V2SI_type_node,
10921 tree_cons (NULL_TREE, V2SI_type_node,
10923 tree v4sf_ftype_v4sf_v4sf_int
10924 = build_function_type (V4SF_type_node,
10925 tree_cons (NULL_TREE, V4SF_type_node,
10926 tree_cons (NULL_TREE, V4SF_type_node,
10927 tree_cons (NULL_TREE,
10930 tree v4hi_ftype_v8qi_v8qi
10931 = build_function_type (V4HI_type_node,
10932 tree_cons (NULL_TREE, V8QI_type_node,
10933 tree_cons (NULL_TREE, V8QI_type_node,
10935 tree v2si_ftype_v4hi_v4hi
10936 = build_function_type (V2SI_type_node,
10937 tree_cons (NULL_TREE, V4HI_type_node,
10938 tree_cons (NULL_TREE, V4HI_type_node,
10940 tree v4hi_ftype_v4hi_int
10941 = build_function_type (V4HI_type_node,
10942 tree_cons (NULL_TREE, V4HI_type_node,
10943 tree_cons (NULL_TREE, integer_type_node,
10945 tree v4hi_ftype_v4hi_di
10946 = build_function_type (V4HI_type_node,
10947 tree_cons (NULL_TREE, V4HI_type_node,
10948 tree_cons (NULL_TREE,
10949 long_long_integer_type_node,
10951 tree v2si_ftype_v2si_di
10952 = build_function_type (V2SI_type_node,
10953 tree_cons (NULL_TREE, V2SI_type_node,
10954 tree_cons (NULL_TREE,
10955 long_long_integer_type_node,
10957 tree void_ftype_void
10958 = build_function_type (void_type_node, endlink);
10959 tree void_ftype_pchar_int
10960 = build_function_type (void_type_node,
10961 tree_cons (NULL_TREE, pchar_type_node,
10962 tree_cons (NULL_TREE, integer_type_node,
10964 tree void_ftype_unsigned
10965 = build_function_type (void_type_node,
10966 tree_cons (NULL_TREE, unsigned_type_node,
10968 tree unsigned_ftype_void
10969 = build_function_type (unsigned_type_node, endlink);
10971 = build_function_type (long_long_unsigned_type_node, endlink);
10973 = build_function_type (intTI_type_node, endlink);
10974 tree v2si_ftype_v4sf
10975 = build_function_type (V2SI_type_node,
10976 tree_cons (NULL_TREE, V4SF_type_node,
10978 /* Loads/stores. */
10979 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
10980 tree_cons (NULL_TREE, V8QI_type_node,
10981 tree_cons (NULL_TREE,
10984 tree void_ftype_v8qi_v8qi_pchar
10985 = build_function_type (void_type_node, maskmovq_args);
10986 tree v4sf_ftype_pfloat
10987 = build_function_type (V4SF_type_node,
10988 tree_cons (NULL_TREE, pfloat_type_node,
10990 tree v4sf_ftype_float
10991 = build_function_type (V4SF_type_node,
10992 tree_cons (NULL_TREE, float_type_node,
10994 tree v4sf_ftype_float_float_float_float
10995 = build_function_type (V4SF_type_node,
10996 tree_cons (NULL_TREE, float_type_node,
10997 tree_cons (NULL_TREE, float_type_node,
10998 tree_cons (NULL_TREE,
11000 tree_cons (NULL_TREE,
11003 /* @@@ the type is bogus */
11004 tree v4sf_ftype_v4sf_pv2si
11005 = build_function_type (V4SF_type_node,
11006 tree_cons (NULL_TREE, V4SF_type_node,
11007 tree_cons (NULL_TREE, pv2si_type_node,
11009 tree void_ftype_pv2si_v4sf
11010 = build_function_type (void_type_node,
11011 tree_cons (NULL_TREE, pv2si_type_node,
11012 tree_cons (NULL_TREE, V4SF_type_node,
11014 tree void_ftype_pfloat_v4sf
11015 = build_function_type (void_type_node,
11016 tree_cons (NULL_TREE, pfloat_type_node,
11017 tree_cons (NULL_TREE, V4SF_type_node,
11019 tree void_ftype_pdi_di
11020 = build_function_type (void_type_node,
11021 tree_cons (NULL_TREE, pdi_type_node,
11022 tree_cons (NULL_TREE,
11023 long_long_unsigned_type_node,
11025 /* Normal vector unops. */
11026 tree v4sf_ftype_v4sf
11027 = build_function_type (V4SF_type_node,
11028 tree_cons (NULL_TREE, V4SF_type_node,
11031 /* Normal vector binops. */
11032 tree v4sf_ftype_v4sf_v4sf
11033 = build_function_type (V4SF_type_node,
11034 tree_cons (NULL_TREE, V4SF_type_node,
11035 tree_cons (NULL_TREE, V4SF_type_node,
11037 tree v8qi_ftype_v8qi_v8qi
11038 = build_function_type (V8QI_type_node,
11039 tree_cons (NULL_TREE, V8QI_type_node,
11040 tree_cons (NULL_TREE, V8QI_type_node,
11042 tree v4hi_ftype_v4hi_v4hi
11043 = build_function_type (V4HI_type_node,
11044 tree_cons (NULL_TREE, V4HI_type_node,
11045 tree_cons (NULL_TREE, V4HI_type_node,
11047 tree v2si_ftype_v2si_v2si
11048 = build_function_type (V2SI_type_node,
11049 tree_cons (NULL_TREE, V2SI_type_node,
11050 tree_cons (NULL_TREE, V2SI_type_node,
11052 tree ti_ftype_ti_ti
11053 = build_function_type (intTI_type_node,
11054 tree_cons (NULL_TREE, intTI_type_node,
11055 tree_cons (NULL_TREE, intTI_type_node,
11057 tree di_ftype_di_di
11058 = build_function_type (long_long_unsigned_type_node,
11059 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11060 tree_cons (NULL_TREE,
11061 long_long_unsigned_type_node,
11064 tree v2si_ftype_v2sf
11065 = build_function_type (V2SI_type_node,
11066 tree_cons (NULL_TREE, V2SF_type_node,
11068 tree v2sf_ftype_v2si
11069 = build_function_type (V2SF_type_node,
11070 tree_cons (NULL_TREE, V2SI_type_node,
11072 tree v2si_ftype_v2si
11073 = build_function_type (V2SI_type_node,
11074 tree_cons (NULL_TREE, V2SI_type_node,
11076 tree v2sf_ftype_v2sf
11077 = build_function_type (V2SF_type_node,
11078 tree_cons (NULL_TREE, V2SF_type_node,
11080 tree v2sf_ftype_v2sf_v2sf
11081 = build_function_type (V2SF_type_node,
11082 tree_cons (NULL_TREE, V2SF_type_node,
11083 tree_cons (NULL_TREE,
11086 tree v2si_ftype_v2sf_v2sf
11087 = build_function_type (V2SI_type_node,
11088 tree_cons (NULL_TREE, V2SF_type_node,
11089 tree_cons (NULL_TREE,
11093 tree void_ftype_pchar
11094 = build_function_type (void_type_node,
11095 tree_cons (NULL_TREE, pchar_type_node,
11098 /* Add all builtins that are more or less simple operations on two
11100 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11102 /* Use one of the operands; the target can have a different mode for
11103 mask-generating compares. */
11104 enum machine_mode mode;
11109 mode = insn_data[d->icode].operand[1].mode;
11114 type = v4sf_ftype_v4sf_v4sf;
11117 type = v8qi_ftype_v8qi_v8qi;
11120 type = v4hi_ftype_v4hi_v4hi;
11123 type = v2si_ftype_v2si_v2si;
11126 type = ti_ftype_ti_ti;
11129 type = di_ftype_di_di;
11136 /* Override for comparisons. */
11137 if (d->icode == CODE_FOR_maskcmpv4sf3
11138 || d->icode == CODE_FOR_maskncmpv4sf3
11139 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11140 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11141 type = v4si_ftype_v4sf_v4sf;
11143 def_builtin (d->mask, d->name, type, d->code);
11146 /* Add the remaining MMX insns with somewhat more complicated types. */
11147 def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
11148 def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
11149 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11150 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11151 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11152 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11153 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11154 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11155 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11157 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11158 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11159 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11161 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11162 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11164 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11165 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11167 /* comi/ucomi insns. */
11168 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11169 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11171 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11172 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11173 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11175 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11176 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11177 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11178 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11179 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11180 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11182 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11183 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11185 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11187 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11188 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11189 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11190 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11191 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11192 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11194 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11195 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11196 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11197 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11199 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11200 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11201 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11202 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11204 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11205 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
11207 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11209 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11210 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11211 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11212 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11213 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11214 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11216 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11218 /* Original 3DNow! */
11219 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11220 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11221 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11222 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11223 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11224 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11225 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11226 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11227 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11228 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11229 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11230 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11231 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11232 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11233 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11234 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11235 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11236 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11237 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11238 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11239 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
11240 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
11242 /* 3DNow! extension as used in the Athlon CPU. */
11243 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11244 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11245 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11246 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11247 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11248 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11250 /* Composite intrinsics. */
11251 def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
11252 def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
11253 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
11254 def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
11255 def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
11256 def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
11257 def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
11260 /* Errors in the source file can cause expand_expr to return const0_rtx
11261 where we expect a vector. To avoid crashing, use one of the vector
11262 clear instructions. */
11264 safe_vector_operand (x, mode)
11266 enum machine_mode mode;
11268 if (x != const0_rtx)
11270 x = gen_reg_rtx (mode);
11272 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11273 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11274 : gen_rtx_SUBREG (DImode, x, 0)));
11276 emit_insn (gen_sse_clrti (mode == TImode ? x
11277 : gen_rtx_SUBREG (TImode, x, 0)));
11281 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11284 ix86_expand_binop_builtin (icode, arglist, target)
11285 enum insn_code icode;
11290 tree arg0 = TREE_VALUE (arglist);
11291 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11292 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11293 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11294 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11295 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11296 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11298 if (VECTOR_MODE_P (mode0))
11299 op0 = safe_vector_operand (op0, mode0);
11300 if (VECTOR_MODE_P (mode1))
11301 op1 = safe_vector_operand (op1, mode1);
11304 || GET_MODE (target) != tmode
11305 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11306 target = gen_reg_rtx (tmode);
11308 /* In case the insn wants input operands in modes different from
11309 the result, abort. */
11310 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11313 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11314 op0 = copy_to_mode_reg (mode0, op0);
11315 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11316 op1 = copy_to_mode_reg (mode1, op1);
11318 pat = GEN_FCN (icode) (target, op0, op1);
11325 /* Subroutine of ix86_expand_builtin to take care of stores. */
11328 ix86_expand_store_builtin (icode, arglist, shuffle)
11329 enum insn_code icode;
11334 tree arg0 = TREE_VALUE (arglist);
11335 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11336 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11337 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11338 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11339 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11341 if (VECTOR_MODE_P (mode1))
11342 op1 = safe_vector_operand (op1, mode1);
11344 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11345 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11346 op1 = copy_to_mode_reg (mode1, op1);
11348 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
11349 pat = GEN_FCN (icode) (op0, op1);
11355 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11358 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11359 enum insn_code icode;
11365 tree arg0 = TREE_VALUE (arglist);
11366 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11367 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11368 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11371 || GET_MODE (target) != tmode
11372 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11373 target = gen_reg_rtx (tmode);
11375 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11378 if (VECTOR_MODE_P (mode0))
11379 op0 = safe_vector_operand (op0, mode0);
11381 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11382 op0 = copy_to_mode_reg (mode0, op0);
11385 pat = GEN_FCN (icode) (target, op0);
11392 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11393 sqrtss, rsqrtss, rcpss. */
11396 ix86_expand_unop1_builtin (icode, arglist, target)
11397 enum insn_code icode;
11402 tree arg0 = TREE_VALUE (arglist);
11403 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11404 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11405 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11408 || GET_MODE (target) != tmode
11409 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11410 target = gen_reg_rtx (tmode);
11412 if (VECTOR_MODE_P (mode0))
11413 op0 = safe_vector_operand (op0, mode0);
11415 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11416 op0 = copy_to_mode_reg (mode0, op0);
11418 pat = GEN_FCN (icode) (target, op0, op0);
11425 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11428 ix86_expand_sse_compare (d, arglist, target)
11429 const struct builtin_description *d;
11434 tree arg0 = TREE_VALUE (arglist);
11435 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11436 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11437 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11439 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11440 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11441 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11442 enum rtx_code comparison = d->comparison;
11444 if (VECTOR_MODE_P (mode0))
11445 op0 = safe_vector_operand (op0, mode0);
11446 if (VECTOR_MODE_P (mode1))
11447 op1 = safe_vector_operand (op1, mode1);
11449 /* Swap operands if we have a comparison that isn't available in
11453 rtx tmp = gen_reg_rtx (mode1);
11454 emit_move_insn (tmp, op1);
11460 || GET_MODE (target) != tmode
11461 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11462 target = gen_reg_rtx (tmode);
11464 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11465 op0 = copy_to_mode_reg (mode0, op0);
11466 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11467 op1 = copy_to_mode_reg (mode1, op1);
11469 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11470 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11477 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11480 ix86_expand_sse_comi (d, arglist, target)
11481 const struct builtin_description *d;
11486 tree arg0 = TREE_VALUE (arglist);
11487 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11488 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11489 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11491 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11492 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11493 enum rtx_code comparison = d->comparison;
11495 if (VECTOR_MODE_P (mode0))
11496 op0 = safe_vector_operand (op0, mode0);
11497 if (VECTOR_MODE_P (mode1))
11498 op1 = safe_vector_operand (op1, mode1);
11500 /* Swap operands if we have a comparison that isn't available in
11509 target = gen_reg_rtx (SImode);
11510 emit_move_insn (target, const0_rtx);
11511 target = gen_rtx_SUBREG (QImode, target, 0);
11513 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11514 op0 = copy_to_mode_reg (mode0, op0);
11515 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11516 op1 = copy_to_mode_reg (mode1, op1);
11518 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11519 pat = GEN_FCN (d->icode) (op0, op1, op2);
11523 emit_insn (gen_rtx_SET (VOIDmode,
11524 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11525 gen_rtx_fmt_ee (comparison, QImode,
11526 gen_rtx_REG (CCmode, FLAGS_REG),
11532 /* Expand an expression EXP that calls a built-in function,
11533 with result going to TARGET if that's convenient
11534 (and in mode MODE if that's convenient).
11535 SUBTARGET may be used as the target for computing one of EXP's operands.
11536 IGNORE is nonzero if the value is to be ignored. */
11539 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11542 rtx subtarget ATTRIBUTE_UNUSED;
11543 enum machine_mode mode ATTRIBUTE_UNUSED;
11544 int ignore ATTRIBUTE_UNUSED;
11546 const struct builtin_description *d;
11548 enum insn_code icode;
11549 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11550 tree arglist = TREE_OPERAND (exp, 1);
11551 tree arg0, arg1, arg2, arg3;
11552 rtx op0, op1, op2, pat;
11553 enum machine_mode tmode, mode0, mode1, mode2;
11554 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11558 case IX86_BUILTIN_EMMS:
11559 emit_insn (gen_emms ());
11562 case IX86_BUILTIN_SFENCE:
11563 emit_insn (gen_sfence ());
11566 case IX86_BUILTIN_M_FROM_INT:
11567 target = gen_reg_rtx (DImode);
11568 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11569 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
11572 case IX86_BUILTIN_M_TO_INT:
11573 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11574 op0 = copy_to_mode_reg (DImode, op0);
11575 target = gen_reg_rtx (SImode);
11576 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
11579 case IX86_BUILTIN_PEXTRW:
11580 icode = CODE_FOR_mmx_pextrw;
11581 arg0 = TREE_VALUE (arglist);
11582 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11583 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11584 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11585 tmode = insn_data[icode].operand[0].mode;
11586 mode0 = insn_data[icode].operand[1].mode;
11587 mode1 = insn_data[icode].operand[2].mode;
11589 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11590 op0 = copy_to_mode_reg (mode0, op0);
11591 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11593 /* @@@ better error message */
11594 error ("selector must be an immediate");
11598 || GET_MODE (target) != tmode
11599 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11600 target = gen_reg_rtx (tmode);
11601 pat = GEN_FCN (icode) (target, op0, op1);
11607 case IX86_BUILTIN_PINSRW:
11608 icode = CODE_FOR_mmx_pinsrw;
11609 arg0 = TREE_VALUE (arglist);
11610 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11611 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11612 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11613 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11614 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11615 tmode = insn_data[icode].operand[0].mode;
11616 mode0 = insn_data[icode].operand[1].mode;
11617 mode1 = insn_data[icode].operand[2].mode;
11618 mode2 = insn_data[icode].operand[3].mode;
11620 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11621 op0 = copy_to_mode_reg (mode0, op0);
11622 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11623 op1 = copy_to_mode_reg (mode1, op1);
11624 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11626 /* @@@ better error message */
11627 error ("selector must be an immediate");
11631 || GET_MODE (target) != tmode
11632 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11633 target = gen_reg_rtx (tmode);
11634 pat = GEN_FCN (icode) (target, op0, op1, op2);
11640 case IX86_BUILTIN_MASKMOVQ:
11641 icode = CODE_FOR_mmx_maskmovq;
11642 /* Note the arg order is different from the operand order. */
11643 arg1 = TREE_VALUE (arglist);
11644 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11645 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11646 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11647 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11648 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11649 mode0 = insn_data[icode].operand[0].mode;
11650 mode1 = insn_data[icode].operand[1].mode;
11651 mode2 = insn_data[icode].operand[2].mode;
11653 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11654 op0 = copy_to_mode_reg (mode0, op0);
11655 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11656 op1 = copy_to_mode_reg (mode1, op1);
11657 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11658 op2 = copy_to_mode_reg (mode2, op2);
11659 pat = GEN_FCN (icode) (op0, op1, op2);
11665 case IX86_BUILTIN_SQRTSS:
11666 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11667 case IX86_BUILTIN_RSQRTSS:
11668 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11669 case IX86_BUILTIN_RCPSS:
11670 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11672 case IX86_BUILTIN_LOADAPS:
11673 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11675 case IX86_BUILTIN_LOADUPS:
11676 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11678 case IX86_BUILTIN_STOREAPS:
11679 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
11680 case IX86_BUILTIN_STOREUPS:
11681 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
11683 case IX86_BUILTIN_LOADSS:
11684 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11686 case IX86_BUILTIN_STORESS:
11687 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
11689 case IX86_BUILTIN_LOADHPS:
11690 case IX86_BUILTIN_LOADLPS:
11691 icode = (fcode == IX86_BUILTIN_LOADHPS
11692 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11693 arg0 = TREE_VALUE (arglist);
11694 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11695 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11696 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11697 tmode = insn_data[icode].operand[0].mode;
11698 mode0 = insn_data[icode].operand[1].mode;
11699 mode1 = insn_data[icode].operand[2].mode;
11701 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11702 op0 = copy_to_mode_reg (mode0, op0);
11703 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11705 || GET_MODE (target) != tmode
11706 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11707 target = gen_reg_rtx (tmode);
11708 pat = GEN_FCN (icode) (target, op0, op1);
11714 case IX86_BUILTIN_STOREHPS:
11715 case IX86_BUILTIN_STORELPS:
11716 icode = (fcode == IX86_BUILTIN_STOREHPS
11717 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11718 arg0 = TREE_VALUE (arglist);
11719 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11720 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11721 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11722 mode0 = insn_data[icode].operand[1].mode;
11723 mode1 = insn_data[icode].operand[2].mode;
11725 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11726 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11727 op1 = copy_to_mode_reg (mode1, op1);
11729 pat = GEN_FCN (icode) (op0, op0, op1);
11735 case IX86_BUILTIN_MOVNTPS:
11736 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
11737 case IX86_BUILTIN_MOVNTQ:
11738 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
11740 case IX86_BUILTIN_LDMXCSR:
11741 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11742 target = assign_386_stack_local (SImode, 0);
11743 emit_move_insn (target, op0);
11744 emit_insn (gen_ldmxcsr (target));
11747 case IX86_BUILTIN_STMXCSR:
11748 target = assign_386_stack_local (SImode, 0);
11749 emit_insn (gen_stmxcsr (target));
11750 return copy_to_mode_reg (SImode, target);
11752 case IX86_BUILTIN_PREFETCH:
11753 icode = CODE_FOR_prefetch_sse;
11754 arg0 = TREE_VALUE (arglist);
11755 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11756 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11757 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11758 mode0 = insn_data[icode].operand[0].mode;
11759 mode1 = insn_data[icode].operand[1].mode;
11761 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11763 /* @@@ better error message */
11764 error ("selector must be an immediate");
11768 op0 = copy_to_mode_reg (Pmode, op0);
11769 pat = GEN_FCN (icode) (op0, op1);
11775 case IX86_BUILTIN_SHUFPS:
11776 icode = CODE_FOR_sse_shufps;
11777 arg0 = TREE_VALUE (arglist);
11778 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11779 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11780 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11781 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11782 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11783 tmode = insn_data[icode].operand[0].mode;
11784 mode0 = insn_data[icode].operand[1].mode;
11785 mode1 = insn_data[icode].operand[2].mode;
11786 mode2 = insn_data[icode].operand[3].mode;
11788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11789 op0 = copy_to_mode_reg (mode0, op0);
11790 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11791 op1 = copy_to_mode_reg (mode1, op1);
11792 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11794 /* @@@ better error message */
11795 error ("mask must be an immediate");
11799 || GET_MODE (target) != tmode
11800 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11801 target = gen_reg_rtx (tmode);
11802 pat = GEN_FCN (icode) (target, op0, op1, op2);
11808 case IX86_BUILTIN_PSHUFW:
11809 icode = CODE_FOR_mmx_pshufw;
11810 arg0 = TREE_VALUE (arglist);
11811 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11812 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11813 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11814 tmode = insn_data[icode].operand[0].mode;
11815 mode1 = insn_data[icode].operand[1].mode;
11816 mode2 = insn_data[icode].operand[2].mode;
11818 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11819 op0 = copy_to_mode_reg (mode1, op0);
11820 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11822 /* @@@ better error message */
11823 error ("mask must be an immediate");
11827 || GET_MODE (target) != tmode
11828 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11829 target = gen_reg_rtx (tmode);
11830 pat = GEN_FCN (icode) (target, op0, op1);
11836 case IX86_BUILTIN_FEMMS:
11837 emit_insn (gen_femms ());
11840 case IX86_BUILTIN_PAVGUSB:
11841 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11843 case IX86_BUILTIN_PF2ID:
11844 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11846 case IX86_BUILTIN_PFACC:
11847 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11849 case IX86_BUILTIN_PFADD:
11850 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11852 case IX86_BUILTIN_PFCMPEQ:
11853 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11855 case IX86_BUILTIN_PFCMPGE:
11856 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11858 case IX86_BUILTIN_PFCMPGT:
11859 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11861 case IX86_BUILTIN_PFMAX:
11862 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11864 case IX86_BUILTIN_PFMIN:
11865 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11867 case IX86_BUILTIN_PFMUL:
11868 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11870 case IX86_BUILTIN_PFRCP:
11871 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11873 case IX86_BUILTIN_PFRCPIT1:
11874 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11876 case IX86_BUILTIN_PFRCPIT2:
11877 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11879 case IX86_BUILTIN_PFRSQIT1:
11880 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11882 case IX86_BUILTIN_PFRSQRT:
11883 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11885 case IX86_BUILTIN_PFSUB:
11886 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11888 case IX86_BUILTIN_PFSUBR:
11889 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11891 case IX86_BUILTIN_PI2FD:
11892 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11894 case IX86_BUILTIN_PMULHRW:
11895 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11897 case IX86_BUILTIN_PREFETCH_3DNOW:
11898 case IX86_BUILTIN_PREFETCHW:
11899 icode = CODE_FOR_prefetch_3dnow;
11900 arg0 = TREE_VALUE (arglist);
11901 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11902 op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx);
11903 mode0 = insn_data[icode].operand[0].mode;
11904 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1);
11910 case IX86_BUILTIN_PF2IW:
11911 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11913 case IX86_BUILTIN_PFNACC:
11914 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11916 case IX86_BUILTIN_PFPNACC:
11917 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11919 case IX86_BUILTIN_PI2FW:
11920 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11922 case IX86_BUILTIN_PSWAPDSI:
11923 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11925 case IX86_BUILTIN_PSWAPDSF:
11926 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11928 /* Composite intrinsics. */
11929 case IX86_BUILTIN_SETPS1:
11930 target = assign_386_stack_local (SFmode, 0);
11931 arg0 = TREE_VALUE (arglist);
11932 emit_move_insn (adjust_address (target, SFmode, 0),
11933 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11934 op0 = gen_reg_rtx (V4SFmode);
11935 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
11936 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
11939 case IX86_BUILTIN_SETPS:
11940 target = assign_386_stack_local (V4SFmode, 0);
11941 arg0 = TREE_VALUE (arglist);
11942 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11943 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11944 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
11945 emit_move_insn (adjust_address (target, SFmode, 0),
11946 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11947 emit_move_insn (adjust_address (target, SFmode, 4),
11948 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
11949 emit_move_insn (adjust_address (target, SFmode, 8),
11950 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
11951 emit_move_insn (adjust_address (target, SFmode, 12),
11952 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
11953 op0 = gen_reg_rtx (V4SFmode);
11954 emit_insn (gen_sse_movaps (op0, target));
11957 case IX86_BUILTIN_CLRPS:
11958 target = gen_reg_rtx (TImode);
11959 emit_insn (gen_sse_clrti (target));
11962 case IX86_BUILTIN_LOADRPS:
11963 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
11964 gen_reg_rtx (V4SFmode), 1);
11965 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
11968 case IX86_BUILTIN_LOADPS1:
11969 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
11970 gen_reg_rtx (V4SFmode), 1);
11971 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
11974 case IX86_BUILTIN_STOREPS1:
11975 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
11976 case IX86_BUILTIN_STORERPS:
11977 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
11979 case IX86_BUILTIN_MMX_ZERO:
11980 target = gen_reg_rtx (DImode);
11981 emit_insn (gen_mmx_clrdi (target));
11988 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11989 if (d->code == fcode)
11991 /* Compares are treated specially. */
11992 if (d->icode == CODE_FOR_maskcmpv4sf3
11993 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11994 || d->icode == CODE_FOR_maskncmpv4sf3
11995 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11996 return ix86_expand_sse_compare (d, arglist, target);
11998 return ix86_expand_binop_builtin (d->icode, arglist, target);
12001 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12002 if (d->code == fcode)
12003 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12005 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12006 if (d->code == fcode)
12007 return ix86_expand_sse_comi (d, arglist, target);
12009 /* @@@ Should really do something sensible here. */
12013 /* Store OPERAND to the memory after reload is completed. This means
12014 that we can't easily use assign_stack_local. */
12016 ix86_force_to_memory (mode, operand)
12017 enum machine_mode mode;
12021 if (!reload_completed)
12023 if (TARGET_64BIT && TARGET_RED_ZONE)
12025 result = gen_rtx_MEM (mode,
12026 gen_rtx_PLUS (Pmode,
12028 GEN_INT (-RED_ZONE_SIZE)));
12029 emit_move_insn (result, operand);
12031 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12037 operand = gen_lowpart (DImode, operand);
12041 gen_rtx_SET (VOIDmode,
12042 gen_rtx_MEM (DImode,
12043 gen_rtx_PRE_DEC (DImode,
12044 stack_pointer_rtx)),
12050 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12059 split_di (&operand, 1, operands, operands + 1);
12061 gen_rtx_SET (VOIDmode,
12062 gen_rtx_MEM (SImode,
12063 gen_rtx_PRE_DEC (Pmode,
12064 stack_pointer_rtx)),
12067 gen_rtx_SET (VOIDmode,
12068 gen_rtx_MEM (SImode,
12069 gen_rtx_PRE_DEC (Pmode,
12070 stack_pointer_rtx)),
12075 /* It is better to store HImodes as SImodes. */
12076 if (!TARGET_PARTIAL_REG_STALL)
12077 operand = gen_lowpart (SImode, operand);
12081 gen_rtx_SET (VOIDmode,
12082 gen_rtx_MEM (GET_MODE (operand),
12083 gen_rtx_PRE_DEC (SImode,
12084 stack_pointer_rtx)),
12090 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12095 /* Free operand from the memory. */
12097 ix86_free_from_memory (mode)
12098 enum machine_mode mode;
12100 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12104 if (mode == DImode || TARGET_64BIT)
12106 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12110 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12111 to pop or add instruction if registers are available. */
12112 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12113 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12118 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12119 QImode must go into class Q_REGS.
12120 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12121 movdf to do mem-to-mem moves through integer regs. */
12123 ix86_preferred_reload_class (x, class)
12125 enum reg_class class;
12127 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12129 /* SSE can't load any constant directly yet. */
12130 if (SSE_CLASS_P (class))
12132 /* Floats can load 0 and 1. */
12133 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12135 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12136 if (MAYBE_SSE_CLASS_P (class))
12137 return (reg_class_subset_p (class, GENERAL_REGS)
12138 ? GENERAL_REGS : FLOAT_REGS);
12142 /* General regs can load everything. */
12143 if (reg_class_subset_p (class, GENERAL_REGS))
12144 return GENERAL_REGS;
12145 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12146 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12149 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12151 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12156 /* If we are copying between general and FP registers, we need a memory
12157 location. The same is true for SSE and MMX registers.
12159 The macro can't work reliably when one of the CLASSES is class containing
12160 registers from multiple units (SSE, MMX, integer). We avoid this by never
12161 combining those units in single alternative in the machine description.
12162 Ensure that this constraint holds to avoid unexpected surprises.
12164 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12165 enforce these sanity checks. */
12167 ix86_secondary_memory_needed (class1, class2, mode, strict)
12168 enum reg_class class1, class2;
12169 enum machine_mode mode;
12172 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12173 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12174 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12175 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12176 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12177 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12184 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12185 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12186 && (mode) != SImode)
12187 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12188 && (mode) != SImode));
12190 /* Return the cost of moving data from a register in class CLASS1 to
12191 one in class CLASS2.
12193 It is not required that the cost always equal 2 when FROM is the same as TO;
12194 on some machines it is expensive to move between registers if they are not
12195 general registers. */
12197 ix86_register_move_cost (mode, class1, class2)
12198 enum machine_mode mode;
12199 enum reg_class class1, class2;
12201 /* In case we require secondary memory, compute cost of the store followed
12202 by load. In case of copying from general_purpose_register we may emit
12203 multiple stores followed by single load causing memory size mismatch
12204 stall. Count this as arbitarily high cost of 20. */
12205 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12208 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12210 return (MEMORY_MOVE_COST (mode, class1, 0)
12211 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12213 /* Moves between SSE/MMX and integer unit are expensive. */
12214 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12215 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12216 return ix86_cost->mmxsse_to_integer;
12217 if (MAYBE_FLOAT_CLASS_P (class1))
12218 return ix86_cost->fp_move;
12219 if (MAYBE_SSE_CLASS_P (class1))
12220 return ix86_cost->sse_move;
12221 if (MAYBE_MMX_CLASS_P (class1))
12222 return ix86_cost->mmx_move;
12226 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12228 ix86_hard_regno_mode_ok (regno, mode)
12230 enum machine_mode mode;
12232 /* Flags and only flags can only hold CCmode values. */
12233 if (CC_REGNO_P (regno))
12234 return GET_MODE_CLASS (mode) == MODE_CC;
12235 if (GET_MODE_CLASS (mode) == MODE_CC
12236 || GET_MODE_CLASS (mode) == MODE_RANDOM
12237 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12239 if (FP_REGNO_P (regno))
12240 return VALID_FP_MODE_P (mode);
12241 if (SSE_REGNO_P (regno))
12242 return VALID_SSE_REG_MODE (mode);
12243 if (MMX_REGNO_P (regno))
12244 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12245 /* We handle both integer and floats in the general purpose registers.
12246 In future we should be able to handle vector modes as well. */
12247 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12249 /* Take care for QImode values - they can be in non-QI regs, but then
12250 they do cause partial register stalls. */
12251 if (regno < 4 || mode != QImode || TARGET_64BIT)
12253 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12256 /* Return the cost of moving data of mode M between a
12257 register and memory. A value of 2 is the default; this cost is
12258 relative to those in `REGISTER_MOVE_COST'.
12260 If moving between registers and memory is more expensive than
12261 between two registers, you should define this macro to express the
12264 Model also increased moving costs of QImode registers in non
12268 ix86_memory_move_cost (mode, class, in)
12269 enum machine_mode mode;
12270 enum reg_class class;
12273 if (FLOAT_CLASS_P (class))
12291 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12293 if (SSE_CLASS_P (class))
12296 switch (GET_MODE_SIZE (mode))
12310 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12312 if (MMX_CLASS_P (class))
12315 switch (GET_MODE_SIZE (mode))
12326 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12328 switch (GET_MODE_SIZE (mode))
12332 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12333 : ix86_cost->movzbl_load);
12335 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12336 : ix86_cost->int_store[0] + 4);
12339 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12341 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12342 if (mode == TFmode)
12344 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12345 * (int) GET_MODE_SIZE (mode) / 4);
12349 #ifdef DO_GLOBAL_CTORS_BODY
12351 ix86_svr3_asm_out_constructor (symbol, priority)
12353 int priority ATTRIBUTE_UNUSED;
12356 fputs ("\tpushl $", asm_out_file);
12357 assemble_name (asm_out_file, XSTR (symbol, 0));
12358 fputc ('\n', asm_out_file);