1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs *ix86_cost = &pentium_cost;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
403 #define FAST_PROLOGUE_INSN_COUNT 30
405 /* Set by prologue expander and used by epilogue expander to determine
407 static int use_fast_prologue_epilogue;
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
420 AREG, DREG, CREG, BREG,
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
440 /* The "default" register map used in 32bit mode. */
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
453 static int const x86_64_int_parameter_registers[6] =
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
459 static int const x86_64_int_return_registers[4] =
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
547 /* The encoding characters for the four TLS models present in ELF. */
549 static char const tls_model_chars[] = " GLil";
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
567 /* Structure describing stack frame layout.
568 Stack grows downward:
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
581 > to_allocate <- FRAME_POINTER
593 int outgoing_arguments_size;
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
608 enum cmodel ix86_cmodel;
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
635 /* ix86_regparm_string as a number */
638 /* Alignment to use for loops and jumps: */
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
707 rtx base, index, disp;
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
761 enum x86_64_reg_class
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
851 struct gcc_target targetm = TARGET_INITIALIZER;
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
881 const processor_target_table[PROCESSOR_max] =
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
902 PTA_PREFETCH_SSE = 8,
907 const processor_alias_table[] =
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"i686", PROCESSOR_PENTIUMPRO, 0},
915 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
916 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
917 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
918 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
919 PTA_MMX | PTA_PREFETCH_SSE},
920 {"k6", PROCESSOR_K6, PTA_MMX},
921 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
922 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
925 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
926 | PTA_3DNOW | PTA_3DNOW_A},
927 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
928 | PTA_3DNOW_A | PTA_SSE},
929 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
930 | PTA_3DNOW_A | PTA_SSE},
931 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
932 | PTA_3DNOW_A | PTA_SSE},
935 int const pta_size = ARRAY_SIZE (processor_alias_table);
937 #ifdef SUBTARGET_OVERRIDE_OPTIONS
938 SUBTARGET_OVERRIDE_OPTIONS;
941 if (!ix86_cpu_string && ix86_arch_string)
942 ix86_cpu_string = ix86_arch_string;
943 if (!ix86_cpu_string)
944 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
945 if (!ix86_arch_string)
946 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
948 if (ix86_cmodel_string != 0)
950 if (!strcmp (ix86_cmodel_string, "small"))
951 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
953 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
954 else if (!strcmp (ix86_cmodel_string, "32"))
956 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
957 ix86_cmodel = CM_KERNEL;
958 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
959 ix86_cmodel = CM_MEDIUM;
960 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
961 ix86_cmodel = CM_LARGE;
963 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
969 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
971 if (ix86_asm_string != 0)
973 if (!strcmp (ix86_asm_string, "intel"))
974 ix86_asm_dialect = ASM_INTEL;
975 else if (!strcmp (ix86_asm_string, "att"))
976 ix86_asm_dialect = ASM_ATT;
978 error ("bad value (%s) for -masm= switch", ix86_asm_string);
980 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
981 error ("code model `%s' not supported in the %s bit mode",
982 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
983 if (ix86_cmodel == CM_LARGE)
984 sorry ("code model `large' not supported yet");
985 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
986 sorry ("%i-bit mode not compiled in",
987 (target_flags & MASK_64BIT) ? 64 : 32);
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
992 ix86_arch = processor_alias_table[i].processor;
993 /* Default cpu tuning to the architecture. */
994 ix86_cpu = ix86_arch;
995 if (processor_alias_table[i].flags & PTA_MMX
996 && !(target_flags & MASK_MMX_SET))
997 target_flags |= MASK_MMX;
998 if (processor_alias_table[i].flags & PTA_3DNOW
999 && !(target_flags & MASK_3DNOW_SET))
1000 target_flags |= MASK_3DNOW;
1001 if (processor_alias_table[i].flags & PTA_3DNOW_A
1002 && !(target_flags & MASK_3DNOW_A_SET))
1003 target_flags |= MASK_3DNOW_A;
1004 if (processor_alias_table[i].flags & PTA_SSE
1005 && !(target_flags & MASK_SSE_SET))
1006 target_flags |= MASK_SSE;
1007 if (processor_alias_table[i].flags & PTA_SSE2
1008 && !(target_flags & MASK_SSE2_SET))
1009 target_flags |= MASK_SSE2;
1010 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1011 x86_prefetch_sse = true;
1016 error ("bad value (%s) for -march= switch", ix86_arch_string);
1018 for (i = 0; i < pta_size; i++)
1019 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1021 ix86_cpu = processor_alias_table[i].processor;
1024 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1025 x86_prefetch_sse = true;
1027 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1030 ix86_cost = &size_cost;
1032 ix86_cost = processor_target_table[ix86_cpu].cost;
1033 target_flags |= processor_target_table[ix86_cpu].target_enable;
1034 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1036 /* Arrange to set up i386_stack_locals for all functions. */
1037 init_machine_status = ix86_init_machine_status;
1039 /* Validate -mregparm= value. */
1040 if (ix86_regparm_string)
1042 i = atoi (ix86_regparm_string);
1043 if (i < 0 || i > REGPARM_MAX)
1044 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1050 ix86_regparm = REGPARM_MAX;
1052 /* If the user has provided any of the -malign-* options,
1053 warn and use that value only if -falign-* is not set.
1054 Remove this code in GCC 3.2 or later. */
1055 if (ix86_align_loops_string)
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops == 0)
1060 i = atoi (ix86_align_loops_string);
1061 if (i < 0 || i > MAX_CODE_ALIGN)
1062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1064 align_loops = 1 << i;
1068 if (ix86_align_jumps_string)
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps == 0)
1073 i = atoi (ix86_align_jumps_string);
1074 if (i < 0 || i > MAX_CODE_ALIGN)
1075 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1077 align_jumps = 1 << i;
1081 if (ix86_align_funcs_string)
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions == 0)
1086 i = atoi (ix86_align_funcs_string);
1087 if (i < 0 || i > MAX_CODE_ALIGN)
1088 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1090 align_functions = 1 << i;
1094 /* Default align_* from the processor table. */
1095 if (align_loops == 0)
1097 align_loops = processor_target_table[ix86_cpu].align_loop;
1098 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1100 if (align_jumps == 0)
1102 align_jumps = processor_target_table[ix86_cpu].align_jump;
1103 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1105 if (align_functions == 0)
1107 align_functions = processor_target_table[ix86_cpu].align_func;
1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary = (optimize_size
1115 ? TARGET_64BIT ? 64 : 32
1117 if (ix86_preferred_stack_boundary_string)
1119 i = atoi (ix86_preferred_stack_boundary_string);
1120 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1122 TARGET_64BIT ? 3 : 2);
1124 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1127 /* Validate -mbranch-cost= value, or provide default. */
1128 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1129 if (ix86_branch_cost_string)
1131 i = atoi (ix86_branch_cost_string);
1133 error ("-mbranch-cost=%d is not between 0 and 5", i);
1135 ix86_branch_cost = i;
1138 if (ix86_tls_dialect_string)
1140 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1141 ix86_tls_dialect = TLS_DIALECT_GNU;
1142 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1143 ix86_tls_dialect = TLS_DIALECT_SUN;
1145 error ("bad value (%s) for -mtls-dialect= switch",
1146 ix86_tls_dialect_string);
1149 /* Keep nonleaf frame pointers. */
1150 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1151 flag_omit_frame_pointer = 1;
1153 /* If we're doing fast math, we don't care about comparison order
1154 wrt NaNs. This lets us use a shorter comparison sequence. */
1155 if (flag_unsafe_math_optimizations)
1156 target_flags &= ~MASK_IEEE_FP;
1158 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1159 since the insns won't need emulation. */
1160 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1161 target_flags &= ~MASK_NO_FANCY_MATH_387;
1165 if (TARGET_ALIGN_DOUBLE)
1166 error ("-malign-double makes no sense in the 64bit mode");
1168 error ("-mrtd calling convention not supported in the 64bit mode");
1169 /* Enable by default the SSE and MMX builtins. */
1170 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1171 ix86_fpmath = FPMATH_SSE;
1174 ix86_fpmath = FPMATH_387;
1176 if (ix86_fpmath_string != 0)
1178 if (! strcmp (ix86_fpmath_string, "387"))
1179 ix86_fpmath = FPMATH_387;
1180 else if (! strcmp (ix86_fpmath_string, "sse"))
1184 warning ("SSE instruction set disabled, using 387 arithmetics");
1185 ix86_fpmath = FPMATH_387;
1188 ix86_fpmath = FPMATH_SSE;
1190 else if (! strcmp (ix86_fpmath_string, "387,sse")
1191 || ! strcmp (ix86_fpmath_string, "sse,387"))
1195 warning ("SSE instruction set disabled, using 387 arithmetics");
1196 ix86_fpmath = FPMATH_387;
1198 else if (!TARGET_80387)
1200 warning ("387 instruction set disabled, using SSE arithmetics");
1201 ix86_fpmath = FPMATH_SSE;
1204 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1207 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1210 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1214 target_flags |= MASK_MMX;
1215 x86_prefetch_sse = true;
1218 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1221 target_flags |= MASK_MMX;
1222 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1223 extensions it adds. */
1224 if (x86_3dnow_a & (1 << ix86_arch))
1225 target_flags |= MASK_3DNOW_A;
1227 if ((x86_accumulate_outgoing_args & CPUMASK)
1228 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1230 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1232 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1235 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1236 p = strchr (internal_label_prefix, 'X');
1237 internal_label_prefix_len = p - internal_label_prefix;
1243 optimization_options (level, size)
1245 int size ATTRIBUTE_UNUSED;
1247 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1248 make the problem with not enough registers even worse. */
1249 #ifdef INSN_SCHEDULING
1251 flag_schedule_insns = 0;
1253 if (TARGET_64BIT && optimize >= 1)
1254 flag_omit_frame_pointer = 1;
1257 flag_pcc_struct_return = 0;
1258 flag_asynchronous_unwind_tables = 1;
1262 /* Table of valid machine attributes. */
1263 const struct attribute_spec ix86_attribute_table[] =
1265 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1266 /* Stdcall attribute says callee is responsible for popping arguments
1267 if they are not variable. */
1268 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1269 /* Cdecl attribute says the callee is a normal C declaration */
1270 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1271 /* Regparm attribute specifies how many integer arguments are to be
1272 passed in registers. */
1273 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1274 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1275 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1276 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1277 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1279 { NULL, 0, 0, false, false, false, NULL }
1282 /* Handle a "cdecl" or "stdcall" attribute;
1283 arguments as in struct attribute_spec.handler. */
1285 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1288 tree args ATTRIBUTE_UNUSED;
1289 int flags ATTRIBUTE_UNUSED;
1292 if (TREE_CODE (*node) != FUNCTION_TYPE
1293 && TREE_CODE (*node) != METHOD_TYPE
1294 && TREE_CODE (*node) != FIELD_DECL
1295 && TREE_CODE (*node) != TYPE_DECL)
1297 warning ("`%s' attribute only applies to functions",
1298 IDENTIFIER_POINTER (name));
1299 *no_add_attrs = true;
1304 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1305 *no_add_attrs = true;
1311 /* Handle a "regparm" attribute;
1312 arguments as in struct attribute_spec.handler. */
1314 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1318 int flags ATTRIBUTE_UNUSED;
1321 if (TREE_CODE (*node) != FUNCTION_TYPE
1322 && TREE_CODE (*node) != METHOD_TYPE
1323 && TREE_CODE (*node) != FIELD_DECL
1324 && TREE_CODE (*node) != TYPE_DECL)
1326 warning ("`%s' attribute only applies to functions",
1327 IDENTIFIER_POINTER (name));
1328 *no_add_attrs = true;
1334 cst = TREE_VALUE (args);
1335 if (TREE_CODE (cst) != INTEGER_CST)
1337 warning ("`%s' attribute requires an integer constant argument",
1338 IDENTIFIER_POINTER (name));
1339 *no_add_attrs = true;
1341 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1343 warning ("argument to `%s' attribute larger than %d",
1344 IDENTIFIER_POINTER (name), REGPARM_MAX);
1345 *no_add_attrs = true;
1352 /* Return 0 if the attributes for two types are incompatible, 1 if they
1353 are compatible, and 2 if they are nearly compatible (which causes a
1354 warning to be generated). */
1357 ix86_comp_type_attributes (type1, type2)
1361 /* Check for mismatch of non-default calling convention. */
1362 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1364 if (TREE_CODE (type1) != FUNCTION_TYPE)
1367 /* Check for mismatched return types (cdecl vs stdcall). */
1368 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1369 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1374 /* Value is the number of bytes of arguments automatically
1375 popped when returning from a subroutine call.
1376 FUNDECL is the declaration node of the function (as a tree),
1377 FUNTYPE is the data type of the function (as a tree),
1378 or for a library call it is an identifier node for the subroutine name.
1379 SIZE is the number of bytes of arguments passed on the stack.
1381 On the 80386, the RTD insn may be used to pop them if the number
1382 of args is fixed, but if the number is variable then the caller
1383 must pop them all. RTD can't be used for library calls now
1384 because the library is compiled with the Unix compiler.
1385 Use of RTD is a selectable option, since it is incompatible with
1386 standard Unix calling sequences. If the option is not selected,
1387 the caller must always pop the args.
1389 The attribute stdcall is equivalent to RTD on a per module basis. */
1392 ix86_return_pops_args (fundecl, funtype, size)
1397 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1399 /* Cdecl functions override -mrtd, and never pop the stack. */
1400 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1402 /* Stdcall functions will pop the stack if not variable args. */
1403 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1407 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1408 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1409 == void_type_node)))
1413 /* Lose any fake structure return argument if it is passed on the stack. */
1414 if (aggregate_value_p (TREE_TYPE (funtype))
1417 int nregs = ix86_regparm;
1421 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1424 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1428 return GET_MODE_SIZE (Pmode);
1434 /* Argument support functions. */
1436 /* Return true when register may be used to pass function parameters. */
1438 ix86_function_arg_regno_p (regno)
1443 return (regno < REGPARM_MAX
1444 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1445 if (SSE_REGNO_P (regno) && TARGET_SSE)
1447 /* RAX is used as hidden argument to va_arg functions. */
1450 for (i = 0; i < REGPARM_MAX; i++)
1451 if (regno == x86_64_int_parameter_registers[i])
1456 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1457 for a call to a function whose data type is FNTYPE.
1458 For a library call, FNTYPE is 0. */
1461 init_cumulative_args (cum, fntype, libname)
1462 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1463 tree fntype; /* tree ptr for function decl */
1464 rtx libname; /* SYMBOL_REF of library name or 0 */
1466 static CUMULATIVE_ARGS zero_cum;
1467 tree param, next_param;
1469 if (TARGET_DEBUG_ARG)
1471 fprintf (stderr, "\ninit_cumulative_args (");
1473 fprintf (stderr, "fntype code = %s, ret code = %s",
1474 tree_code_name[(int) TREE_CODE (fntype)],
1475 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1477 fprintf (stderr, "no fntype");
1480 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1485 /* Set up the number of registers to use for passing arguments. */
1486 cum->nregs = ix86_regparm;
1487 cum->sse_nregs = SSE_REGPARM_MAX;
1488 if (fntype && !TARGET_64BIT)
1490 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1493 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1495 cum->maybe_vaarg = false;
1497 /* Determine if this function has variable arguments. This is
1498 indicated by the last argument being 'void_type_mode' if there
1499 are no variable arguments. If there are variable arguments, then
1500 we won't pass anything in registers */
1504 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1505 param != 0; param = next_param)
1507 next_param = TREE_CHAIN (param);
1508 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1512 cum->maybe_vaarg = true;
1516 if ((!fntype && !libname)
1517 || (fntype && !TYPE_ARG_TYPES (fntype)))
1518 cum->maybe_vaarg = 1;
1520 if (TARGET_DEBUG_ARG)
1521 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1526 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1527 of this code is to classify each 8bytes of incoming argument by the register
1528 class and assign registers accordingly. */
1530 /* Return the union class of CLASS1 and CLASS2.
1531 See the x86-64 PS ABI for details. */
1533 static enum x86_64_reg_class
1534 merge_classes (class1, class2)
1535 enum x86_64_reg_class class1, class2;
1537 /* Rule #1: If both classes are equal, this is the resulting class. */
1538 if (class1 == class2)
1541 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1543 if (class1 == X86_64_NO_CLASS)
1545 if (class2 == X86_64_NO_CLASS)
1548 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1549 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1550 return X86_64_MEMORY_CLASS;
1552 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1553 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1554 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1555 return X86_64_INTEGERSI_CLASS;
1556 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1557 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1558 return X86_64_INTEGER_CLASS;
1560 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1561 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1562 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1563 return X86_64_MEMORY_CLASS;
1565 /* Rule #6: Otherwise class SSE is used. */
1566 return X86_64_SSE_CLASS;
1569 /* Classify the argument of type TYPE and mode MODE.
1570 CLASSES will be filled by the register class used to pass each word
1571 of the operand. The number of words is returned. In case the parameter
1572 should be passed in memory, 0 is returned. As a special case for zero
1573 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1575 BIT_OFFSET is used internally for handling records and specifies offset
1576 of the offset in bits modulo 256 to avoid overflow cases.
1578 See the x86-64 PS ABI for details.
1582 classify_argument (mode, type, classes, bit_offset)
1583 enum machine_mode mode;
1585 enum x86_64_reg_class classes[MAX_CLASSES];
1589 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1590 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1592 if (type && AGGREGATE_TYPE_P (type))
1596 enum x86_64_reg_class subclasses[MAX_CLASSES];
1598 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1602 for (i = 0; i < words; i++)
1603 classes[i] = X86_64_NO_CLASS;
1605 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1606 signalize memory class, so handle it as special case. */
1609 classes[0] = X86_64_NO_CLASS;
1613 /* Classify each field of record and merge classes. */
1614 if (TREE_CODE (type) == RECORD_TYPE)
1616 /* For classes first merge in the field of the subclasses. */
1617 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1619 tree bases = TYPE_BINFO_BASETYPES (type);
1620 int n_bases = TREE_VEC_LENGTH (bases);
1623 for (i = 0; i < n_bases; ++i)
1625 tree binfo = TREE_VEC_ELT (bases, i);
1627 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1628 tree type = BINFO_TYPE (binfo);
1630 num = classify_argument (TYPE_MODE (type),
1632 (offset + bit_offset) % 256);
1635 for (i = 0; i < num; i++)
1637 int pos = (offset + bit_offset) / 8 / 8;
1639 merge_classes (subclasses[i], classes[i + pos]);
1643 /* And now merge the fields of structure. */
1644 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1646 if (TREE_CODE (field) == FIELD_DECL)
1650 /* Bitfields are always classified as integer. Handle them
1651 early, since later code would consider them to be
1652 misaligned integers. */
1653 if (DECL_BIT_FIELD (field))
1655 for (i = int_bit_position (field) / 8 / 8;
1656 i < (int_bit_position (field)
1657 + tree_low_cst (DECL_SIZE (field), 0)
1660 merge_classes (X86_64_INTEGER_CLASS,
1665 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1666 TREE_TYPE (field), subclasses,
1667 (int_bit_position (field)
1668 + bit_offset) % 256);
1671 for (i = 0; i < num; i++)
1674 (int_bit_position (field) + bit_offset) / 8 / 8;
1676 merge_classes (subclasses[i], classes[i + pos]);
1682 /* Arrays are handled as small records. */
1683 else if (TREE_CODE (type) == ARRAY_TYPE)
1686 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1687 TREE_TYPE (type), subclasses, bit_offset);
1691 /* The partial classes are now full classes. */
1692 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1693 subclasses[0] = X86_64_SSE_CLASS;
1694 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1695 subclasses[0] = X86_64_INTEGER_CLASS;
1697 for (i = 0; i < words; i++)
1698 classes[i] = subclasses[i % num];
1700 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1701 else if (TREE_CODE (type) == UNION_TYPE
1702 || TREE_CODE (type) == QUAL_UNION_TYPE)
1704 /* For classes first merge in the field of the subclasses. */
1705 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1707 tree bases = TYPE_BINFO_BASETYPES (type);
1708 int n_bases = TREE_VEC_LENGTH (bases);
1711 for (i = 0; i < n_bases; ++i)
1713 tree binfo = TREE_VEC_ELT (bases, i);
1715 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1716 tree type = BINFO_TYPE (binfo);
1718 num = classify_argument (TYPE_MODE (type),
1720 (offset + bit_offset) % 256);
1723 for (i = 0; i < num; i++)
1725 int pos = (offset + bit_offset) / 8 / 8;
1727 merge_classes (subclasses[i], classes[i + pos]);
1731 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1733 if (TREE_CODE (field) == FIELD_DECL)
1736 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1737 TREE_TYPE (field), subclasses,
1741 for (i = 0; i < num; i++)
1742 classes[i] = merge_classes (subclasses[i], classes[i]);
1749 /* Final merger cleanup. */
1750 for (i = 0; i < words; i++)
1752 /* If one class is MEMORY, everything should be passed in
1754 if (classes[i] == X86_64_MEMORY_CLASS)
1757 /* The X86_64_SSEUP_CLASS should be always preceded by
1758 X86_64_SSE_CLASS. */
1759 if (classes[i] == X86_64_SSEUP_CLASS
1760 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1761 classes[i] = X86_64_SSE_CLASS;
1763 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1764 if (classes[i] == X86_64_X87UP_CLASS
1765 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1766 classes[i] = X86_64_SSE_CLASS;
1771 /* Compute alignment needed. We align all types to natural boundaries with
1772 exception of XFmode that is aligned to 64bits. */
1773 if (mode != VOIDmode && mode != BLKmode)
1775 int mode_alignment = GET_MODE_BITSIZE (mode);
1778 mode_alignment = 128;
1779 else if (mode == XCmode)
1780 mode_alignment = 256;
1781 /* Misaligned fields are always returned in memory. */
1782 if (bit_offset % mode_alignment)
1786 /* Classification of atomic types. */
1796 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1797 classes[0] = X86_64_INTEGERSI_CLASS;
1799 classes[0] = X86_64_INTEGER_CLASS;
1803 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1806 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1807 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1810 if (!(bit_offset % 64))
1811 classes[0] = X86_64_SSESF_CLASS;
1813 classes[0] = X86_64_SSE_CLASS;
1816 classes[0] = X86_64_SSEDF_CLASS;
1819 classes[0] = X86_64_X87_CLASS;
1820 classes[1] = X86_64_X87UP_CLASS;
1823 classes[0] = X86_64_X87_CLASS;
1824 classes[1] = X86_64_X87UP_CLASS;
1825 classes[2] = X86_64_X87_CLASS;
1826 classes[3] = X86_64_X87UP_CLASS;
1829 classes[0] = X86_64_SSEDF_CLASS;
1830 classes[1] = X86_64_SSEDF_CLASS;
1833 classes[0] = X86_64_SSE_CLASS;
1841 classes[0] = X86_64_SSE_CLASS;
1842 classes[1] = X86_64_SSEUP_CLASS;
1848 classes[0] = X86_64_SSE_CLASS;
1858 /* Examine the argument and return set number of register required in each
1859 class. Return 0 iff parameter should be passed in memory. */
1861 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1862 enum machine_mode mode;
1864 int *int_nregs, *sse_nregs;
1867 enum x86_64_reg_class class[MAX_CLASSES];
1868 int n = classify_argument (mode, type, class, 0);
1874 for (n--; n >= 0; n--)
1877 case X86_64_INTEGER_CLASS:
1878 case X86_64_INTEGERSI_CLASS:
1881 case X86_64_SSE_CLASS:
1882 case X86_64_SSESF_CLASS:
1883 case X86_64_SSEDF_CLASS:
1886 case X86_64_NO_CLASS:
1887 case X86_64_SSEUP_CLASS:
1889 case X86_64_X87_CLASS:
1890 case X86_64_X87UP_CLASS:
1894 case X86_64_MEMORY_CLASS:
1899 /* Construct container for the argument used by GCC interface. See
1900 FUNCTION_ARG for the detailed description. */
1902 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1903 enum machine_mode mode;
1906 int nintregs, nsseregs;
1910 enum machine_mode tmpmode;
1912 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1913 enum x86_64_reg_class class[MAX_CLASSES];
1917 int needed_sseregs, needed_intregs;
1918 rtx exp[MAX_CLASSES];
1921 n = classify_argument (mode, type, class, 0);
1922 if (TARGET_DEBUG_ARG)
1925 fprintf (stderr, "Memory class\n");
1928 fprintf (stderr, "Classes:");
1929 for (i = 0; i < n; i++)
1931 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1933 fprintf (stderr, "\n");
1938 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1940 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1943 /* First construct simple cases. Avoid SCmode, since we want to use
1944 single register to pass this type. */
1945 if (n == 1 && mode != SCmode)
1948 case X86_64_INTEGER_CLASS:
1949 case X86_64_INTEGERSI_CLASS:
1950 return gen_rtx_REG (mode, intreg[0]);
1951 case X86_64_SSE_CLASS:
1952 case X86_64_SSESF_CLASS:
1953 case X86_64_SSEDF_CLASS:
1954 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1955 case X86_64_X87_CLASS:
1956 return gen_rtx_REG (mode, FIRST_STACK_REG);
1957 case X86_64_NO_CLASS:
1958 /* Zero sized array, struct or class. */
1963 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1964 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1966 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1967 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1968 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1969 && class[1] == X86_64_INTEGER_CLASS
1970 && (mode == CDImode || mode == TImode)
1971 && intreg[0] + 1 == intreg[1])
1972 return gen_rtx_REG (mode, intreg[0]);
1974 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1975 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1976 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1978 /* Otherwise figure out the entries of the PARALLEL. */
1979 for (i = 0; i < n; i++)
1983 case X86_64_NO_CLASS:
1985 case X86_64_INTEGER_CLASS:
1986 case X86_64_INTEGERSI_CLASS:
1987 /* Merge TImodes on aligned occassions here too. */
1988 if (i * 8 + 8 > bytes)
1989 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1990 else if (class[i] == X86_64_INTEGERSI_CLASS)
1994 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1995 if (tmpmode == BLKmode)
1997 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1998 gen_rtx_REG (tmpmode, *intreg),
2002 case X86_64_SSESF_CLASS:
2003 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2004 gen_rtx_REG (SFmode,
2005 SSE_REGNO (sse_regno)),
2009 case X86_64_SSEDF_CLASS:
2010 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2011 gen_rtx_REG (DFmode,
2012 SSE_REGNO (sse_regno)),
2016 case X86_64_SSE_CLASS:
2017 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2018 tmpmode = TImode, i++;
2021 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2022 gen_rtx_REG (tmpmode,
2023 SSE_REGNO (sse_regno)),
2031 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2032 for (i = 0; i < nexps; i++)
2033 XVECEXP (ret, 0, i) = exp [i];
2037 /* Update the data in CUM to advance over an argument
2038 of mode MODE and data type TYPE.
2039 (TYPE is null for libcalls where that information may not be available.) */
2042 function_arg_advance (cum, mode, type, named)
2043 CUMULATIVE_ARGS *cum; /* current arg information */
2044 enum machine_mode mode; /* current arg mode */
2045 tree type; /* type of the argument or 0 if lib support */
2046 int named; /* whether or not the argument was named */
2049 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2050 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2052 if (TARGET_DEBUG_ARG)
2054 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2055 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2058 int int_nregs, sse_nregs;
2059 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2060 cum->words += words;
2061 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2063 cum->nregs -= int_nregs;
2064 cum->sse_nregs -= sse_nregs;
2065 cum->regno += int_nregs;
2066 cum->sse_regno += sse_nregs;
2069 cum->words += words;
2073 if (TARGET_SSE && mode == TImode)
2075 cum->sse_words += words;
2076 cum->sse_nregs -= 1;
2077 cum->sse_regno += 1;
2078 if (cum->sse_nregs <= 0)
2086 cum->words += words;
2087 cum->nregs -= words;
2088 cum->regno += words;
2090 if (cum->nregs <= 0)
2100 /* Define where to put the arguments to a function.
2101 Value is zero to push the argument on the stack,
2102 or a hard register in which to store the argument.
2104 MODE is the argument's machine mode.
2105 TYPE is the data type of the argument (as a tree).
2106 This is null for libcalls where that information may
2108 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2109 the preceding args and about the function being called.
2110 NAMED is nonzero if this argument is a named parameter
2111 (otherwise it is an extra parameter matching an ellipsis). */
2114 function_arg (cum, mode, type, named)
2115 CUMULATIVE_ARGS *cum; /* current arg information */
2116 enum machine_mode mode; /* current arg mode */
2117 tree type; /* type of the argument or 0 if lib support */
2118 int named; /* != 0 for normal args, == 0 for ... args */
2122 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2123 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2125 /* Handle an hidden AL argument containing number of registers for varargs
2126 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2128 if (mode == VOIDmode)
2131 return GEN_INT (cum->maybe_vaarg
2132 ? (cum->sse_nregs < 0
2140 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2141 &x86_64_int_parameter_registers [cum->regno],
2146 /* For now, pass fp/complex values on the stack. */
2155 if (words <= cum->nregs)
2156 ret = gen_rtx_REG (mode, cum->regno);
2160 ret = gen_rtx_REG (mode, cum->sse_regno);
2164 if (TARGET_DEBUG_ARG)
2167 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2168 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2171 print_simple_rtl (stderr, ret);
2173 fprintf (stderr, ", stack");
2175 fprintf (stderr, " )\n");
2181 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2185 ix86_function_arg_boundary (mode, type)
2186 enum machine_mode mode;
2191 return PARM_BOUNDARY;
2193 align = TYPE_ALIGN (type);
2195 align = GET_MODE_ALIGNMENT (mode);
2196 if (align < PARM_BOUNDARY)
2197 align = PARM_BOUNDARY;
2203 /* Return true if N is a possible register number of function value. */
2205 ix86_function_value_regno_p (regno)
2210 return ((regno) == 0
2211 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2212 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2214 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2215 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2216 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2219 /* Define how to find the value returned by a function.
2220 VALTYPE is the data type of the value (as a tree).
2221 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2222 otherwise, FUNC is 0. */
2224 ix86_function_value (valtype)
2229 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2230 REGPARM_MAX, SSE_REGPARM_MAX,
2231 x86_64_int_return_registers, 0);
2232 /* For zero sized structures, construct_continer return NULL, but we need
2233 to keep rest of compiler happy by returning meaningfull value. */
2235 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2239 return gen_rtx_REG (TYPE_MODE (valtype),
2240 ix86_value_regno (TYPE_MODE (valtype)));
2243 /* Return false iff type is returned in memory. */
2245 ix86_return_in_memory (type)
2248 int needed_intregs, needed_sseregs;
2251 return !examine_argument (TYPE_MODE (type), type, 1,
2252 &needed_intregs, &needed_sseregs);
2256 if (TYPE_MODE (type) == BLKmode
2257 || (VECTOR_MODE_P (TYPE_MODE (type))
2258 && int_size_in_bytes (type) == 8)
2259 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2260 && TYPE_MODE (type) != TFmode
2261 && !VECTOR_MODE_P (TYPE_MODE (type))))
2267 /* Define how to find the value returned by a library function
2268 assuming the value has mode MODE. */
2270 ix86_libcall_value (mode)
2271 enum machine_mode mode;
2281 return gen_rtx_REG (mode, FIRST_SSE_REG);
2284 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2286 return gen_rtx_REG (mode, 0);
2290 return gen_rtx_REG (mode, ix86_value_regno (mode));
2293 /* Given a mode, return the register to use for a return value. */
2296 ix86_value_regno (mode)
2297 enum machine_mode mode;
2299 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2300 return FIRST_FLOAT_REG;
2301 if (mode == TImode || VECTOR_MODE_P (mode))
2302 return FIRST_SSE_REG;
2306 /* Create the va_list data type. */
2309 ix86_build_va_list ()
2311 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2313 /* For i386 we use plain pointer to argument area. */
2315 return build_pointer_type (char_type_node);
2317 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2318 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2320 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2321 unsigned_type_node);
2322 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2323 unsigned_type_node);
2324 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2326 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2329 DECL_FIELD_CONTEXT (f_gpr) = record;
2330 DECL_FIELD_CONTEXT (f_fpr) = record;
2331 DECL_FIELD_CONTEXT (f_ovf) = record;
2332 DECL_FIELD_CONTEXT (f_sav) = record;
2334 TREE_CHAIN (record) = type_decl;
2335 TYPE_NAME (record) = type_decl;
2336 TYPE_FIELDS (record) = f_gpr;
2337 TREE_CHAIN (f_gpr) = f_fpr;
2338 TREE_CHAIN (f_fpr) = f_ovf;
2339 TREE_CHAIN (f_ovf) = f_sav;
2341 layout_type (record);
2343 /* The correct type is an array type of one element. */
2344 return build_array_type (record, build_index_type (size_zero_node));
2347 /* Perform any needed actions needed for a function that is receiving a
2348 variable number of arguments.
2352 MODE and TYPE are the mode and type of the current parameter.
2354 PRETEND_SIZE is a variable that should be set to the amount of stack
2355 that must be pushed by the prolog to pretend that our caller pushed
2358 Normally, this macro will push all remaining incoming registers on the
2359 stack and set PRETEND_SIZE to the length of the registers pushed. */
2362 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2363 CUMULATIVE_ARGS *cum;
2364 enum machine_mode mode;
2366 int *pretend_size ATTRIBUTE_UNUSED;
2370 CUMULATIVE_ARGS next_cum;
2371 rtx save_area = NULL_RTX, mem;
2384 /* Indicate to allocate space on the stack for varargs save area. */
2385 ix86_save_varrargs_registers = 1;
2387 fntype = TREE_TYPE (current_function_decl);
2388 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2389 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2390 != void_type_node));
2392 /* For varargs, we do not want to skip the dummy va_dcl argument.
2393 For stdargs, we do want to skip the last named argument. */
2396 function_arg_advance (&next_cum, mode, type, 1);
2399 save_area = frame_pointer_rtx;
2401 set = get_varargs_alias_set ();
2403 for (i = next_cum.regno; i < ix86_regparm; i++)
2405 mem = gen_rtx_MEM (Pmode,
2406 plus_constant (save_area, i * UNITS_PER_WORD));
2407 set_mem_alias_set (mem, set);
2408 emit_move_insn (mem, gen_rtx_REG (Pmode,
2409 x86_64_int_parameter_registers[i]));
2412 if (next_cum.sse_nregs)
2414 /* Now emit code to save SSE registers. The AX parameter contains number
2415 of SSE parameter regsiters used to call this function. We use
2416 sse_prologue_save insn template that produces computed jump across
2417 SSE saves. We need some preparation work to get this working. */
2419 label = gen_label_rtx ();
2420 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2422 /* Compute address to jump to :
2423 label - 5*eax + nnamed_sse_arguments*5 */
2424 tmp_reg = gen_reg_rtx (Pmode);
2425 nsse_reg = gen_reg_rtx (Pmode);
2426 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2427 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2428 gen_rtx_MULT (Pmode, nsse_reg,
2430 if (next_cum.sse_regno)
2433 gen_rtx_CONST (DImode,
2434 gen_rtx_PLUS (DImode,
2436 GEN_INT (next_cum.sse_regno * 4))));
2438 emit_move_insn (nsse_reg, label_ref);
2439 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2441 /* Compute address of memory block we save into. We always use pointer
2442 pointing 127 bytes after first byte to store - this is needed to keep
2443 instruction size limited by 4 bytes. */
2444 tmp_reg = gen_reg_rtx (Pmode);
2445 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2446 plus_constant (save_area,
2447 8 * REGPARM_MAX + 127)));
2448 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2449 set_mem_alias_set (mem, set);
2450 set_mem_align (mem, BITS_PER_WORD);
2452 /* And finally do the dirty job! */
2453 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2454 GEN_INT (next_cum.sse_regno), label));
2459 /* Implement va_start. */
2462 ix86_va_start (valist, nextarg)
2466 HOST_WIDE_INT words, n_gpr, n_fpr;
2467 tree f_gpr, f_fpr, f_ovf, f_sav;
2468 tree gpr, fpr, ovf, sav, t;
2470 /* Only 64bit target needs something special. */
2473 std_expand_builtin_va_start (valist, nextarg);
2477 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2478 f_fpr = TREE_CHAIN (f_gpr);
2479 f_ovf = TREE_CHAIN (f_fpr);
2480 f_sav = TREE_CHAIN (f_ovf);
2482 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2483 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2484 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2485 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2486 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2488 /* Count number of gp and fp argument registers used. */
2489 words = current_function_args_info.words;
2490 n_gpr = current_function_args_info.regno;
2491 n_fpr = current_function_args_info.sse_regno;
2493 if (TARGET_DEBUG_ARG)
2494 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2495 (int) words, (int) n_gpr, (int) n_fpr);
2497 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2498 build_int_2 (n_gpr * 8, 0));
2499 TREE_SIDE_EFFECTS (t) = 1;
2500 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2502 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2503 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2504 TREE_SIDE_EFFECTS (t) = 1;
2505 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2507 /* Find the overflow area. */
2508 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2510 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2511 build_int_2 (words * UNITS_PER_WORD, 0));
2512 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2513 TREE_SIDE_EFFECTS (t) = 1;
2514 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2516 /* Find the register save area.
2517 Prologue of the function save it right above stack frame. */
2518 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2519 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2520 TREE_SIDE_EFFECTS (t) = 1;
2521 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2524 /* Implement va_arg. */
2526 ix86_va_arg (valist, type)
2529 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2530 tree f_gpr, f_fpr, f_ovf, f_sav;
2531 tree gpr, fpr, ovf, sav, t;
2533 rtx lab_false, lab_over = NULL_RTX;
2537 /* Only 64bit target needs something special. */
2540 return std_expand_builtin_va_arg (valist, type);
2543 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2544 f_fpr = TREE_CHAIN (f_gpr);
2545 f_ovf = TREE_CHAIN (f_fpr);
2546 f_sav = TREE_CHAIN (f_ovf);
2548 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2549 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2550 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2551 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2552 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2554 size = int_size_in_bytes (type);
2555 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2557 container = construct_container (TYPE_MODE (type), type, 0,
2558 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2560 * Pull the value out of the saved registers ...
2563 addr_rtx = gen_reg_rtx (Pmode);
2567 rtx int_addr_rtx, sse_addr_rtx;
2568 int needed_intregs, needed_sseregs;
2571 lab_over = gen_label_rtx ();
2572 lab_false = gen_label_rtx ();
2574 examine_argument (TYPE_MODE (type), type, 0,
2575 &needed_intregs, &needed_sseregs);
2578 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2579 || TYPE_ALIGN (type) > 128);
2581 /* In case we are passing structure, verify that it is consetuctive block
2582 on the register save area. If not we need to do moves. */
2583 if (!need_temp && !REG_P (container))
2585 /* Verify that all registers are strictly consetuctive */
2586 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2590 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2592 rtx slot = XVECEXP (container, 0, i);
2593 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2594 || INTVAL (XEXP (slot, 1)) != i * 16)
2602 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2604 rtx slot = XVECEXP (container, 0, i);
2605 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2606 || INTVAL (XEXP (slot, 1)) != i * 8)
2613 int_addr_rtx = addr_rtx;
2614 sse_addr_rtx = addr_rtx;
2618 int_addr_rtx = gen_reg_rtx (Pmode);
2619 sse_addr_rtx = gen_reg_rtx (Pmode);
2621 /* First ensure that we fit completely in registers. */
2624 emit_cmp_and_jump_insns (expand_expr
2625 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2626 GEN_INT ((REGPARM_MAX - needed_intregs +
2627 1) * 8), GE, const1_rtx, SImode,
2632 emit_cmp_and_jump_insns (expand_expr
2633 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2634 GEN_INT ((SSE_REGPARM_MAX -
2635 needed_sseregs + 1) * 16 +
2636 REGPARM_MAX * 8), GE, const1_rtx,
2637 SImode, 1, lab_false);
2640 /* Compute index to start of area used for integer regs. */
2643 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2644 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2645 if (r != int_addr_rtx)
2646 emit_move_insn (int_addr_rtx, r);
2650 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2651 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2652 if (r != sse_addr_rtx)
2653 emit_move_insn (sse_addr_rtx, r);
2660 /* Never use the memory itself, as it has the alias set. */
2661 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2662 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2663 set_mem_alias_set (mem, get_varargs_alias_set ());
2664 set_mem_align (mem, BITS_PER_UNIT);
2666 for (i = 0; i < XVECLEN (container, 0); i++)
2668 rtx slot = XVECEXP (container, 0, i);
2669 rtx reg = XEXP (slot, 0);
2670 enum machine_mode mode = GET_MODE (reg);
2676 if (SSE_REGNO_P (REGNO (reg)))
2678 src_addr = sse_addr_rtx;
2679 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2683 src_addr = int_addr_rtx;
2684 src_offset = REGNO (reg) * 8;
2686 src_mem = gen_rtx_MEM (mode, src_addr);
2687 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2688 src_mem = adjust_address (src_mem, mode, src_offset);
2689 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2690 emit_move_insn (dest_mem, src_mem);
2697 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2698 build_int_2 (needed_intregs * 8, 0));
2699 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2700 TREE_SIDE_EFFECTS (t) = 1;
2701 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2706 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2707 build_int_2 (needed_sseregs * 16, 0));
2708 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2709 TREE_SIDE_EFFECTS (t) = 1;
2710 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2713 emit_jump_insn (gen_jump (lab_over));
2715 emit_label (lab_false);
2718 /* ... otherwise out of the overflow area. */
2720 /* Care for on-stack alignment if needed. */
2721 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2725 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2726 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2727 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2731 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2733 emit_move_insn (addr_rtx, r);
2736 build (PLUS_EXPR, TREE_TYPE (t), t,
2737 build_int_2 (rsize * UNITS_PER_WORD, 0));
2738 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2739 TREE_SIDE_EFFECTS (t) = 1;
2740 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2743 emit_label (lab_over);
2748 /* Return nonzero if OP is general operand representable on x86_64. */
2751 x86_64_general_operand (op, mode)
2753 enum machine_mode mode;
2756 return general_operand (op, mode);
2757 if (nonimmediate_operand (op, mode))
2759 return x86_64_sign_extended_value (op);
2762 /* Return nonzero if OP is general operand representable on x86_64
2763 as either sign extended or zero extended constant. */
2766 x86_64_szext_general_operand (op, mode)
2768 enum machine_mode mode;
2771 return general_operand (op, mode);
2772 if (nonimmediate_operand (op, mode))
2774 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2777 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2780 x86_64_nonmemory_operand (op, mode)
2782 enum machine_mode mode;
2785 return nonmemory_operand (op, mode);
2786 if (register_operand (op, mode))
2788 return x86_64_sign_extended_value (op);
2791 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2794 x86_64_movabs_operand (op, mode)
2796 enum machine_mode mode;
2798 if (!TARGET_64BIT || !flag_pic)
2799 return nonmemory_operand (op, mode);
2800 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2802 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2807 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2810 x86_64_szext_nonmemory_operand (op, mode)
2812 enum machine_mode mode;
2815 return nonmemory_operand (op, mode);
2816 if (register_operand (op, mode))
2818 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2821 /* Return nonzero if OP is immediate operand representable on x86_64. */
2824 x86_64_immediate_operand (op, mode)
2826 enum machine_mode mode;
2829 return immediate_operand (op, mode);
2830 return x86_64_sign_extended_value (op);
2833 /* Return nonzero if OP is immediate operand representable on x86_64. */
2836 x86_64_zext_immediate_operand (op, mode)
2838 enum machine_mode mode ATTRIBUTE_UNUSED;
2840 return x86_64_zero_extended_value (op);
2843 /* Return nonzero if OP is (const_int 1), else return zero. */
2846 const_int_1_operand (op, mode)
2848 enum machine_mode mode ATTRIBUTE_UNUSED;
2850 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2853 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2854 for shift & compare patterns, as shifting by 0 does not change flags),
2855 else return zero. */
2858 const_int_1_31_operand (op, mode)
2860 enum machine_mode mode ATTRIBUTE_UNUSED;
2862 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2865 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2866 reference and a constant. */
2869 symbolic_operand (op, mode)
2871 enum machine_mode mode ATTRIBUTE_UNUSED;
2873 switch (GET_CODE (op))
2881 if (GET_CODE (op) == SYMBOL_REF
2882 || GET_CODE (op) == LABEL_REF
2883 || (GET_CODE (op) == UNSPEC
2884 && (XINT (op, 1) == UNSPEC_GOT
2885 || XINT (op, 1) == UNSPEC_GOTOFF
2886 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2888 if (GET_CODE (op) != PLUS
2889 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2893 if (GET_CODE (op) == SYMBOL_REF
2894 || GET_CODE (op) == LABEL_REF)
2896 /* Only @GOTOFF gets offsets. */
2897 if (GET_CODE (op) != UNSPEC
2898 || XINT (op, 1) != UNSPEC_GOTOFF)
2901 op = XVECEXP (op, 0, 0);
2902 if (GET_CODE (op) == SYMBOL_REF
2903 || GET_CODE (op) == LABEL_REF)
2912 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2915 pic_symbolic_operand (op, mode)
2917 enum machine_mode mode ATTRIBUTE_UNUSED;
2919 if (GET_CODE (op) != CONST)
2924 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2929 if (GET_CODE (op) == UNSPEC)
2931 if (GET_CODE (op) != PLUS
2932 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2935 if (GET_CODE (op) == UNSPEC)
2941 /* Return true if OP is a symbolic operand that resolves locally. */
2944 local_symbolic_operand (op, mode)
2946 enum machine_mode mode ATTRIBUTE_UNUSED;
2948 if (GET_CODE (op) == LABEL_REF)
2951 if (GET_CODE (op) == CONST
2952 && GET_CODE (XEXP (op, 0)) == PLUS
2953 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2954 op = XEXP (XEXP (op, 0), 0);
2956 if (GET_CODE (op) != SYMBOL_REF)
2959 /* These we've been told are local by varasm and encode_section_info
2961 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2964 /* There is, however, a not insubstantial body of code in the rest of
2965 the compiler that assumes it can just stick the results of
2966 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2967 /* ??? This is a hack. Should update the body of the compiler to
2968 always create a DECL an invoke targetm.encode_section_info. */
2969 if (strncmp (XSTR (op, 0), internal_label_prefix,
2970 internal_label_prefix_len) == 0)
2976 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2979 tls_symbolic_operand (op, mode)
2981 enum machine_mode mode ATTRIBUTE_UNUSED;
2983 const char *symbol_str;
2985 if (GET_CODE (op) != SYMBOL_REF)
2987 symbol_str = XSTR (op, 0);
2989 if (symbol_str[0] != '%')
2991 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
2995 tls_symbolic_operand_1 (op, kind)
2997 enum tls_model kind;
2999 const char *symbol_str;
3001 if (GET_CODE (op) != SYMBOL_REF)
3003 symbol_str = XSTR (op, 0);
3005 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3009 global_dynamic_symbolic_operand (op, mode)
3011 enum machine_mode mode ATTRIBUTE_UNUSED;
3013 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3017 local_dynamic_symbolic_operand (op, mode)
3019 enum machine_mode mode ATTRIBUTE_UNUSED;
3021 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3025 initial_exec_symbolic_operand (op, mode)
3027 enum machine_mode mode ATTRIBUTE_UNUSED;
3029 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3033 local_exec_symbolic_operand (op, mode)
3035 enum machine_mode mode ATTRIBUTE_UNUSED;
3037 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3040 /* Test for a valid operand for a call instruction. Don't allow the
3041 arg pointer register or virtual regs since they may decay into
3042 reg + const, which the patterns can't handle. */
3045 call_insn_operand (op, mode)
3047 enum machine_mode mode ATTRIBUTE_UNUSED;
3049 /* Disallow indirect through a virtual register. This leads to
3050 compiler aborts when trying to eliminate them. */
3051 if (GET_CODE (op) == REG
3052 && (op == arg_pointer_rtx
3053 || op == frame_pointer_rtx
3054 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3055 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3058 /* Disallow `call 1234'. Due to varying assembler lameness this
3059 gets either rejected or translated to `call .+1234'. */
3060 if (GET_CODE (op) == CONST_INT)
3063 /* Explicitly allow SYMBOL_REF even if pic. */
3064 if (GET_CODE (op) == SYMBOL_REF)
3067 /* Otherwise we can allow any general_operand in the address. */
3068 return general_operand (op, Pmode);
3072 constant_call_address_operand (op, mode)
3074 enum machine_mode mode ATTRIBUTE_UNUSED;
3076 if (GET_CODE (op) == CONST
3077 && GET_CODE (XEXP (op, 0)) == PLUS
3078 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3079 op = XEXP (XEXP (op, 0), 0);
3080 return GET_CODE (op) == SYMBOL_REF;
3083 /* Match exactly zero and one. */
3086 const0_operand (op, mode)
3088 enum machine_mode mode;
3090 return op == CONST0_RTX (mode);
3094 const1_operand (op, mode)
3096 enum machine_mode mode ATTRIBUTE_UNUSED;
3098 return op == const1_rtx;
3101 /* Match 2, 4, or 8. Used for leal multiplicands. */
3104 const248_operand (op, mode)
3106 enum machine_mode mode ATTRIBUTE_UNUSED;
3108 return (GET_CODE (op) == CONST_INT
3109 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3112 /* True if this is a constant appropriate for an increment or decremenmt. */
3115 incdec_operand (op, mode)
3117 enum machine_mode mode ATTRIBUTE_UNUSED;
3119 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3120 registers, since carry flag is not set. */
3121 if (TARGET_PENTIUM4 && !optimize_size)
3123 return op == const1_rtx || op == constm1_rtx;
3126 /* Return nonzero if OP is acceptable as operand of DImode shift
3130 shiftdi_operand (op, mode)
3132 enum machine_mode mode ATTRIBUTE_UNUSED;
3135 return nonimmediate_operand (op, mode);
3137 return register_operand (op, mode);
3140 /* Return false if this is the stack pointer, or any other fake
3141 register eliminable to the stack pointer. Otherwise, this is
3144 This is used to prevent esp from being used as an index reg.
3145 Which would only happen in pathological cases. */
3148 reg_no_sp_operand (op, mode)
3150 enum machine_mode mode;
3153 if (GET_CODE (t) == SUBREG)
3155 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3158 return register_operand (op, mode);
3162 mmx_reg_operand (op, mode)
3164 enum machine_mode mode ATTRIBUTE_UNUSED;
3166 return MMX_REG_P (op);
3169 /* Return false if this is any eliminable register. Otherwise
3173 general_no_elim_operand (op, mode)
3175 enum machine_mode mode;
3178 if (GET_CODE (t) == SUBREG)
3180 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3181 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3182 || t == virtual_stack_dynamic_rtx)
3185 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3186 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3189 return general_operand (op, mode);
3192 /* Return false if this is any eliminable register. Otherwise
3193 register_operand or const_int. */
3196 nonmemory_no_elim_operand (op, mode)
3198 enum machine_mode mode;
3201 if (GET_CODE (t) == SUBREG)
3203 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3204 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3205 || t == virtual_stack_dynamic_rtx)
3208 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3211 /* Return true if op is a Q_REGS class register. */
3214 q_regs_operand (op, mode)
3216 enum machine_mode mode;
3218 if (mode != VOIDmode && GET_MODE (op) != mode)
3220 if (GET_CODE (op) == SUBREG)
3221 op = SUBREG_REG (op);
3222 return ANY_QI_REG_P (op);
3225 /* Return true if op is a NON_Q_REGS class register. */
3228 non_q_regs_operand (op, mode)
3230 enum machine_mode mode;
3232 if (mode != VOIDmode && GET_MODE (op) != mode)
3234 if (GET_CODE (op) == SUBREG)
3235 op = SUBREG_REG (op);
3236 return NON_QI_REG_P (op);
3239 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3242 sse_comparison_operator (op, mode)
3244 enum machine_mode mode ATTRIBUTE_UNUSED;
3246 enum rtx_code code = GET_CODE (op);
3249 /* Operations supported directly. */
3259 /* These are equivalent to ones above in non-IEEE comparisons. */
3266 return !TARGET_IEEE_FP;
3271 /* Return 1 if OP is a valid comparison operator in valid mode. */
3273 ix86_comparison_operator (op, mode)
3275 enum machine_mode mode;
3277 enum machine_mode inmode;
3278 enum rtx_code code = GET_CODE (op);
3279 if (mode != VOIDmode && GET_MODE (op) != mode)
3281 if (GET_RTX_CLASS (code) != '<')
3283 inmode = GET_MODE (XEXP (op, 0));
3285 if (inmode == CCFPmode || inmode == CCFPUmode)
3287 enum rtx_code second_code, bypass_code;
3288 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3289 return (bypass_code == NIL && second_code == NIL);
3296 if (inmode == CCmode || inmode == CCGCmode
3297 || inmode == CCGOCmode || inmode == CCNOmode)
3300 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3301 if (inmode == CCmode)
3305 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3313 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3316 fcmov_comparison_operator (op, mode)
3318 enum machine_mode mode;
3320 enum machine_mode inmode;
3321 enum rtx_code code = GET_CODE (op);
3322 if (mode != VOIDmode && GET_MODE (op) != mode)
3324 if (GET_RTX_CLASS (code) != '<')
3326 inmode = GET_MODE (XEXP (op, 0));
3327 if (inmode == CCFPmode || inmode == CCFPUmode)
3329 enum rtx_code second_code, bypass_code;
3330 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3331 if (bypass_code != NIL || second_code != NIL)
3333 code = ix86_fp_compare_code_to_integer (code);
3335 /* i387 supports just limited amount of conditional codes. */
3338 case LTU: case GTU: case LEU: case GEU:
3339 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3342 case ORDERED: case UNORDERED:
3350 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3353 promotable_binary_operator (op, mode)
3355 enum machine_mode mode ATTRIBUTE_UNUSED;
3357 switch (GET_CODE (op))
3360 /* Modern CPUs have same latency for HImode and SImode multiply,
3361 but 386 and 486 do HImode multiply faster. */
3362 return ix86_cpu > PROCESSOR_I486;
3374 /* Nearly general operand, but accept any const_double, since we wish
3375 to be able to drop them into memory rather than have them get pulled
3379 cmp_fp_expander_operand (op, mode)
3381 enum machine_mode mode;
3383 if (mode != VOIDmode && mode != GET_MODE (op))
3385 if (GET_CODE (op) == CONST_DOUBLE)
3387 return general_operand (op, mode);
3390 /* Match an SI or HImode register for a zero_extract. */
3393 ext_register_operand (op, mode)
3395 enum machine_mode mode ATTRIBUTE_UNUSED;
3398 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3399 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3402 if (!register_operand (op, VOIDmode))
3405 /* Be curefull to accept only registers having upper parts. */
3406 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3407 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3410 /* Return 1 if this is a valid binary floating-point operation.
3411 OP is the expression matched, and MODE is its mode. */
3414 binary_fp_operator (op, mode)
3416 enum machine_mode mode;
3418 if (mode != VOIDmode && mode != GET_MODE (op))
3421 switch (GET_CODE (op))
3427 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3435 mult_operator (op, mode)
3437 enum machine_mode mode ATTRIBUTE_UNUSED;
3439 return GET_CODE (op) == MULT;
3443 div_operator (op, mode)
3445 enum machine_mode mode ATTRIBUTE_UNUSED;
3447 return GET_CODE (op) == DIV;
3451 arith_or_logical_operator (op, mode)
3453 enum machine_mode mode;
3455 return ((mode == VOIDmode || GET_MODE (op) == mode)
3456 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3457 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3460 /* Returns 1 if OP is memory operand with a displacement. */
3463 memory_displacement_operand (op, mode)
3465 enum machine_mode mode;
3467 struct ix86_address parts;
3469 if (! memory_operand (op, mode))
3472 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3475 return parts.disp != NULL_RTX;
3478 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3479 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3481 ??? It seems likely that this will only work because cmpsi is an
3482 expander, and no actual insns use this. */
3485 cmpsi_operand (op, mode)
3487 enum machine_mode mode;
3489 if (nonimmediate_operand (op, mode))
3492 if (GET_CODE (op) == AND
3493 && GET_MODE (op) == SImode
3494 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3495 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3496 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3497 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3498 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3499 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3505 /* Returns 1 if OP is memory operand that can not be represented by the
3509 long_memory_operand (op, mode)
3511 enum machine_mode mode;
3513 if (! memory_operand (op, mode))
3516 return memory_address_length (op) != 0;
3519 /* Return nonzero if the rtx is known aligned. */
3522 aligned_operand (op, mode)
3524 enum machine_mode mode;
3526 struct ix86_address parts;
3528 if (!general_operand (op, mode))
3531 /* Registers and immediate operands are always "aligned". */
3532 if (GET_CODE (op) != MEM)
3535 /* Don't even try to do any aligned optimizations with volatiles. */
3536 if (MEM_VOLATILE_P (op))
3541 /* Pushes and pops are only valid on the stack pointer. */
3542 if (GET_CODE (op) == PRE_DEC
3543 || GET_CODE (op) == POST_INC)
3546 /* Decode the address. */
3547 if (! ix86_decompose_address (op, &parts))
3550 if (parts.base && GET_CODE (parts.base) == SUBREG)
3551 parts.base = SUBREG_REG (parts.base);
3552 if (parts.index && GET_CODE (parts.index) == SUBREG)
3553 parts.index = SUBREG_REG (parts.index);
3555 /* Look for some component that isn't known to be aligned. */
3559 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3564 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3569 if (GET_CODE (parts.disp) != CONST_INT
3570 || (INTVAL (parts.disp) & 3) != 0)
3574 /* Didn't find one -- this must be an aligned address. */
3578 /* Return true if the constant is something that can be loaded with
3579 a special instruction. Only handle 0.0 and 1.0; others are less
3583 standard_80387_constant_p (x)
3586 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3588 /* Note that on the 80387, other constants, such as pi, that we should support
3589 too. On some machines, these are much slower to load as standard constant,
3590 than to load from doubles in memory. */
3591 if (x == CONST0_RTX (GET_MODE (x)))
3593 if (x == CONST1_RTX (GET_MODE (x)))
3598 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3601 standard_sse_constant_p (x)
3604 if (GET_CODE (x) != CONST_DOUBLE)
3606 return (x == CONST0_RTX (GET_MODE (x)));
3609 /* Returns 1 if OP contains a symbol reference */
3612 symbolic_reference_mentioned_p (op)
3615 register const char *fmt;
3618 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3621 fmt = GET_RTX_FORMAT (GET_CODE (op));
3622 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3628 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3629 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3633 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3640 /* Return 1 if it is appropriate to emit `ret' instructions in the
3641 body of a function. Do this only if the epilogue is simple, needing a
3642 couple of insns. Prior to reloading, we can't tell how many registers
3643 must be saved, so return 0 then. Return 0 if there is no frame
3644 marker to de-allocate.
3646 If NON_SAVING_SETJMP is defined and true, then it is not possible
3647 for the epilogue to be simple, so return 0. This is a special case
3648 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3649 until final, but jump_optimize may need to know sooner if a
3653 ix86_can_use_return_insn_p ()
3655 struct ix86_frame frame;
3657 #ifdef NON_SAVING_SETJMP
3658 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3662 if (! reload_completed || frame_pointer_needed)
3665 /* Don't allow more than 32 pop, since that's all we can do
3666 with one instruction. */
3667 if (current_function_pops_args
3668 && current_function_args_size >= 32768)
3671 ix86_compute_frame_layout (&frame);
3672 return frame.to_allocate == 0 && frame.nregs == 0;
3675 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3677 x86_64_sign_extended_value (value)
3680 switch (GET_CODE (value))
3682 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3683 to be at least 32 and this all acceptable constants are
3684 represented as CONST_INT. */
3686 if (HOST_BITS_PER_WIDE_INT == 32)
3690 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3691 return trunc_int_for_mode (val, SImode) == val;
3695 /* For certain code models, the symbolic references are known to fit. */
3697 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3699 /* For certain code models, the code is near as well. */
3701 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3703 /* We also may accept the offsetted memory references in certain special
3706 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3707 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3709 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3711 rtx op1 = XEXP (XEXP (value, 0), 0);
3712 rtx op2 = XEXP (XEXP (value, 0), 1);
3713 HOST_WIDE_INT offset;
3715 if (ix86_cmodel == CM_LARGE)
3717 if (GET_CODE (op2) != CONST_INT)
3719 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3720 switch (GET_CODE (op1))
3723 /* For CM_SMALL assume that latest object is 1MB before
3724 end of 31bits boundary. We may also accept pretty
3725 large negative constants knowing that all objects are
3726 in the positive half of address space. */
3727 if (ix86_cmodel == CM_SMALL
3728 && offset < 1024*1024*1024
3729 && trunc_int_for_mode (offset, SImode) == offset)
3731 /* For CM_KERNEL we know that all object resist in the
3732 negative half of 32bits address space. We may not
3733 accept negative offsets, since they may be just off
3734 and we may accept pretty large positive ones. */
3735 if (ix86_cmodel == CM_KERNEL
3737 && trunc_int_for_mode (offset, SImode) == offset)
3741 /* These conditions are similar to SYMBOL_REF ones, just the
3742 constraints for code models differ. */
3743 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3744 && offset < 1024*1024*1024
3745 && trunc_int_for_mode (offset, SImode) == offset)
3747 if (ix86_cmodel == CM_KERNEL
3749 && trunc_int_for_mode (offset, SImode) == offset)
3762 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3764 x86_64_zero_extended_value (value)
3767 switch (GET_CODE (value))
3770 if (HOST_BITS_PER_WIDE_INT == 32)
3771 return (GET_MODE (value) == VOIDmode
3772 && !CONST_DOUBLE_HIGH (value));
3776 if (HOST_BITS_PER_WIDE_INT == 32)
3777 return INTVAL (value) >= 0;
3779 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3782 /* For certain code models, the symbolic references are known to fit. */
3784 return ix86_cmodel == CM_SMALL;
3786 /* For certain code models, the code is near as well. */
3788 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3790 /* We also may accept the offsetted memory references in certain special
3793 if (GET_CODE (XEXP (value, 0)) == PLUS)
3795 rtx op1 = XEXP (XEXP (value, 0), 0);
3796 rtx op2 = XEXP (XEXP (value, 0), 1);
3798 if (ix86_cmodel == CM_LARGE)
3800 switch (GET_CODE (op1))
3804 /* For small code model we may accept pretty large positive
3805 offsets, since one bit is available for free. Negative
3806 offsets are limited by the size of NULL pointer area
3807 specified by the ABI. */
3808 if (ix86_cmodel == CM_SMALL
3809 && GET_CODE (op2) == CONST_INT
3810 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3811 && (trunc_int_for_mode (INTVAL (op2), SImode)
3814 /* ??? For the kernel, we may accept adjustment of
3815 -0x10000000, since we know that it will just convert
3816 negative address space to positive, but perhaps this
3817 is not worthwhile. */
3820 /* These conditions are similar to SYMBOL_REF ones, just the
3821 constraints for code models differ. */
3822 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3823 && GET_CODE (op2) == CONST_INT
3824 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3825 && (trunc_int_for_mode (INTVAL (op2), SImode)
3839 /* Value should be nonzero if functions must have frame pointers.
3840 Zero means the frame pointer need not be set up (and parms may
3841 be accessed via the stack pointer) in functions that seem suitable. */
3844 ix86_frame_pointer_required ()
3846 /* If we accessed previous frames, then the generated code expects
3847 to be able to access the saved ebp value in our frame. */
3848 if (cfun->machine->accesses_prev_frame)
3851 /* Several x86 os'es need a frame pointer for other reasons,
3852 usually pertaining to setjmp. */
3853 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3856 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3857 the frame pointer by default. Turn it back on now if we've not
3858 got a leaf function. */
3859 if (TARGET_OMIT_LEAF_FRAME_POINTER
3860 && (!current_function_is_leaf || current_function_profile))
3866 /* Record that the current function accesses previous call frames. */
3869 ix86_setup_frame_addresses ()
3871 cfun->machine->accesses_prev_frame = 1;
3874 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3875 # define USE_HIDDEN_LINKONCE 1
3877 # define USE_HIDDEN_LINKONCE 0
3880 static int pic_labels_used;
3882 /* Fills in the label name that should be used for a pc thunk for
3883 the given register. */
3886 get_pc_thunk_name (name, regno)
3890 if (USE_HIDDEN_LINKONCE)
3891 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3893 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3897 /* This function generates code for -fpic that loads %ebx with
3898 the return address of the caller and then returns. */
3901 ix86_asm_file_end (file)
3907 for (regno = 0; regno < 8; ++regno)
3911 if (! ((pic_labels_used >> regno) & 1))
3914 get_pc_thunk_name (name, regno);
3916 if (USE_HIDDEN_LINKONCE)
3920 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3922 TREE_PUBLIC (decl) = 1;
3923 TREE_STATIC (decl) = 1;
3924 DECL_ONE_ONLY (decl) = 1;
3926 (*targetm.asm_out.unique_section) (decl, 0);
3927 named_section (decl, NULL, 0);
3929 ASM_GLOBALIZE_LABEL (file, name);
3930 fputs ("\t.hidden\t", file);
3931 assemble_name (file, name);
3933 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3938 ASM_OUTPUT_LABEL (file, name);
3941 xops[0] = gen_rtx_REG (SImode, regno);
3942 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3943 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3944 output_asm_insn ("ret", xops);
3948 /* Emit code for the SET_GOT patterns. */
3951 output_set_got (dest)
3957 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3959 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3961 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3964 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3966 output_asm_insn ("call\t%a2", xops);
3969 /* Output the "canonical" label name ("Lxx$pb") here too. This
3970 is what will be referred to by the Mach-O PIC subsystem. */
3971 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3973 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3974 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3977 output_asm_insn ("pop{l}\t%0", xops);
3982 get_pc_thunk_name (name, REGNO (dest));
3983 pic_labels_used |= 1 << REGNO (dest);
3985 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3986 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3987 output_asm_insn ("call\t%X2", xops);
3990 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3991 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3992 else if (!TARGET_MACHO)
3993 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3998 /* Generate an "push" pattern for input ARG. */
4004 return gen_rtx_SET (VOIDmode,
4006 gen_rtx_PRE_DEC (Pmode,
4007 stack_pointer_rtx)),
4011 /* Return >= 0 if there is an unused call-clobbered register available
4012 for the entire function. */
4015 ix86_select_alt_pic_regnum ()
4017 if (current_function_is_leaf && !current_function_profile)
4020 for (i = 2; i >= 0; --i)
4021 if (!regs_ever_live[i])
4025 return INVALID_REGNUM;
4028 /* Return 1 if we need to save REGNO. */
4030 ix86_save_reg (regno, maybe_eh_return)
4032 int maybe_eh_return;
4034 if (pic_offset_table_rtx
4035 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4036 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4037 || current_function_profile
4038 || current_function_calls_eh_return))
4040 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4045 if (current_function_calls_eh_return && maybe_eh_return)
4050 unsigned test = EH_RETURN_DATA_REGNO (i);
4051 if (test == INVALID_REGNUM)
4058 return (regs_ever_live[regno]
4059 && !call_used_regs[regno]
4060 && !fixed_regs[regno]
4061 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4064 /* Return number of registers to be saved on the stack. */
4072 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4073 if (ix86_save_reg (regno, true))
4078 /* Return the offset between two registers, one to be eliminated, and the other
4079 its replacement, at the start of a routine. */
4082 ix86_initial_elimination_offset (from, to)
4086 struct ix86_frame frame;
4087 ix86_compute_frame_layout (&frame);
4089 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4090 return frame.hard_frame_pointer_offset;
4091 else if (from == FRAME_POINTER_REGNUM
4092 && to == HARD_FRAME_POINTER_REGNUM)
4093 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4096 if (to != STACK_POINTER_REGNUM)
4098 else if (from == ARG_POINTER_REGNUM)
4099 return frame.stack_pointer_offset;
4100 else if (from != FRAME_POINTER_REGNUM)
4103 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4107 /* Fill structure ix86_frame about frame of currently computed function. */
4110 ix86_compute_frame_layout (frame)
4111 struct ix86_frame *frame;
4113 HOST_WIDE_INT total_size;
4114 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4116 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4117 HOST_WIDE_INT size = get_frame_size ();
4119 frame->nregs = ix86_nsaved_regs ();
4122 /* Skip return address and saved base pointer. */
4123 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4125 frame->hard_frame_pointer_offset = offset;
4127 /* Do some sanity checking of stack_alignment_needed and
4128 preferred_alignment, since i386 port is the only using those features
4129 that may break easily. */
4131 if (size && !stack_alignment_needed)
4133 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4135 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4137 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4140 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4141 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4143 /* Register save area */
4144 offset += frame->nregs * UNITS_PER_WORD;
4147 if (ix86_save_varrargs_registers)
4149 offset += X86_64_VARARGS_SIZE;
4150 frame->va_arg_size = X86_64_VARARGS_SIZE;
4153 frame->va_arg_size = 0;
4155 /* Align start of frame for local function. */
4156 frame->padding1 = ((offset + stack_alignment_needed - 1)
4157 & -stack_alignment_needed) - offset;
4159 offset += frame->padding1;
4161 /* Frame pointer points here. */
4162 frame->frame_pointer_offset = offset;
4166 /* Add outgoing arguments area. Can be skipped if we eliminated
4167 all the function calls as dead code. */
4168 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4170 offset += current_function_outgoing_args_size;
4171 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4174 frame->outgoing_arguments_size = 0;
4176 /* Align stack boundary. Only needed if we're calling another function
4178 if (!current_function_is_leaf || current_function_calls_alloca)
4179 frame->padding2 = ((offset + preferred_alignment - 1)
4180 & -preferred_alignment) - offset;
4182 frame->padding2 = 0;
4184 offset += frame->padding2;
4186 /* We've reached end of stack frame. */
4187 frame->stack_pointer_offset = offset;
4189 /* Size prologue needs to allocate. */
4190 frame->to_allocate =
4191 (size + frame->padding1 + frame->padding2
4192 + frame->outgoing_arguments_size + frame->va_arg_size);
4194 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4195 && current_function_is_leaf)
4197 frame->red_zone_size = frame->to_allocate;
4198 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4199 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4202 frame->red_zone_size = 0;
4203 frame->to_allocate -= frame->red_zone_size;
4204 frame->stack_pointer_offset -= frame->red_zone_size;
4206 fprintf (stderr, "nregs: %i\n", frame->nregs);
4207 fprintf (stderr, "size: %i\n", size);
4208 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4209 fprintf (stderr, "padding1: %i\n", frame->padding1);
4210 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4211 fprintf (stderr, "padding2: %i\n", frame->padding2);
4212 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4213 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4214 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4215 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4216 frame->hard_frame_pointer_offset);
4217 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4221 /* Emit code to save registers in the prologue. */
4224 ix86_emit_save_regs ()
4229 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4230 if (ix86_save_reg (regno, true))
4232 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4233 RTX_FRAME_RELATED_P (insn) = 1;
4237 /* Emit code to save registers using MOV insns. First register
4238 is restored from POINTER + OFFSET. */
4240 ix86_emit_save_regs_using_mov (pointer, offset)
4242 HOST_WIDE_INT offset;
4247 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4248 if (ix86_save_reg (regno, true))
4250 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4252 gen_rtx_REG (Pmode, regno));
4253 RTX_FRAME_RELATED_P (insn) = 1;
4254 offset += UNITS_PER_WORD;
4258 /* Expand the prologue into a bunch of separate insns. */
4261 ix86_expand_prologue ()
4265 struct ix86_frame frame;
4267 HOST_WIDE_INT allocate;
4271 use_fast_prologue_epilogue
4272 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4273 if (TARGET_PROLOGUE_USING_MOVE)
4274 use_mov = use_fast_prologue_epilogue;
4276 ix86_compute_frame_layout (&frame);
4278 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4279 slower on all targets. Also sdb doesn't like it. */
4281 if (frame_pointer_needed)
4283 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4284 RTX_FRAME_RELATED_P (insn) = 1;
4286 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4287 RTX_FRAME_RELATED_P (insn) = 1;
4290 allocate = frame.to_allocate;
4291 /* In case we are dealing only with single register and empty frame,
4292 push is equivalent of the mov+add sequence. */
4293 if (allocate == 0 && frame.nregs <= 1)
4297 ix86_emit_save_regs ();
4299 allocate += frame.nregs * UNITS_PER_WORD;
4303 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4305 insn = emit_insn (gen_pro_epilogue_adjust_stack
4306 (stack_pointer_rtx, stack_pointer_rtx,
4307 GEN_INT (-allocate)));
4308 RTX_FRAME_RELATED_P (insn) = 1;
4312 /* ??? Is this only valid for Win32? */
4319 arg0 = gen_rtx_REG (SImode, 0);
4320 emit_move_insn (arg0, GEN_INT (allocate));
4322 sym = gen_rtx_MEM (FUNCTION_MODE,
4323 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4324 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4326 CALL_INSN_FUNCTION_USAGE (insn)
4327 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4328 CALL_INSN_FUNCTION_USAGE (insn));
4332 if (!frame_pointer_needed || !frame.to_allocate)
4333 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4335 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4336 -frame.nregs * UNITS_PER_WORD);
4339 #ifdef SUBTARGET_PROLOGUE
4343 pic_reg_used = false;
4344 if (pic_offset_table_rtx
4345 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4346 || current_function_profile))
4348 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4350 if (alt_pic_reg_used != INVALID_REGNUM)
4351 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4353 pic_reg_used = true;
4358 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4360 /* Even with accurate pre-reload life analysis, we can wind up
4361 deleting all references to the pic register after reload.
4362 Consider if cross-jumping unifies two sides of a branch
4363 controled by a comparison vs the only read from a global.
4364 In which case, allow the set_got to be deleted, though we're
4365 too late to do anything about the ebx save in the prologue. */
4366 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4369 /* Prevent function calls from be scheduled before the call to mcount.
4370 In the pic_reg_used case, make sure that the got load isn't deleted. */
4371 if (current_function_profile)
4372 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4375 /* Emit code to restore saved registers using MOV insns. First register
4376 is restored from POINTER + OFFSET. */
4378 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4381 int maybe_eh_return;
4385 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4386 if (ix86_save_reg (regno, maybe_eh_return))
4388 emit_move_insn (gen_rtx_REG (Pmode, regno),
4389 adjust_address (gen_rtx_MEM (Pmode, pointer),
4391 offset += UNITS_PER_WORD;
4395 /* Restore function stack, frame, and registers. */
4398 ix86_expand_epilogue (style)
4402 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4403 struct ix86_frame frame;
4404 HOST_WIDE_INT offset;
4406 ix86_compute_frame_layout (&frame);
4408 /* Calculate start of saved registers relative to ebp. Special care
4409 must be taken for the normal return case of a function using
4410 eh_return: the eax and edx registers are marked as saved, but not
4411 restored along this path. */
4412 offset = frame.nregs;
4413 if (current_function_calls_eh_return && style != 2)
4415 offset *= -UNITS_PER_WORD;
4417 /* If we're only restoring one register and sp is not valid then
4418 using a move instruction to restore the register since it's
4419 less work than reloading sp and popping the register.
4421 The default code result in stack adjustment using add/lea instruction,
4422 while this code results in LEAVE instruction (or discrete equivalent),
4423 so it is profitable in some other cases as well. Especially when there
4424 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4425 and there is exactly one register to pop. This heruistic may need some
4426 tuning in future. */
4427 if ((!sp_valid && frame.nregs <= 1)
4428 || (TARGET_EPILOGUE_USING_MOVE
4429 && use_fast_prologue_epilogue
4430 && (frame.nregs > 1 || frame.to_allocate))
4431 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4432 || (frame_pointer_needed && TARGET_USE_LEAVE
4433 && use_fast_prologue_epilogue && frame.nregs == 1)
4434 || current_function_calls_eh_return)
4436 /* Restore registers. We can use ebp or esp to address the memory
4437 locations. If both are available, default to ebp, since offsets
4438 are known to be small. Only exception is esp pointing directly to the
4439 end of block of saved registers, where we may simplify addressing
4442 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4443 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4444 frame.to_allocate, style == 2);
4446 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4447 offset, style == 2);
4449 /* eh_return epilogues need %ecx added to the stack pointer. */
4452 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4454 if (frame_pointer_needed)
4456 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4457 tmp = plus_constant (tmp, UNITS_PER_WORD);
4458 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4460 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4461 emit_move_insn (hard_frame_pointer_rtx, tmp);
4463 emit_insn (gen_pro_epilogue_adjust_stack
4464 (stack_pointer_rtx, sa, const0_rtx));
4468 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4469 tmp = plus_constant (tmp, (frame.to_allocate
4470 + frame.nregs * UNITS_PER_WORD));
4471 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4474 else if (!frame_pointer_needed)
4475 emit_insn (gen_pro_epilogue_adjust_stack
4476 (stack_pointer_rtx, stack_pointer_rtx,
4477 GEN_INT (frame.to_allocate
4478 + frame.nregs * UNITS_PER_WORD)));
4479 /* If not an i386, mov & pop is faster than "leave". */
4480 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4481 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4484 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4485 hard_frame_pointer_rtx,
4488 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4490 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4495 /* First step is to deallocate the stack frame so that we can
4496 pop the registers. */
4499 if (!frame_pointer_needed)
4501 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4502 hard_frame_pointer_rtx,
4505 else if (frame.to_allocate)
4506 emit_insn (gen_pro_epilogue_adjust_stack
4507 (stack_pointer_rtx, stack_pointer_rtx,
4508 GEN_INT (frame.to_allocate)));
4510 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4511 if (ix86_save_reg (regno, false))
4514 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4516 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4518 if (frame_pointer_needed)
4520 /* Leave results in shorter dependency chains on CPUs that are
4521 able to grok it fast. */
4522 if (TARGET_USE_LEAVE)
4523 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4524 else if (TARGET_64BIT)
4525 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4527 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4531 /* Sibcall epilogues don't want a return instruction. */
4535 if (current_function_pops_args && current_function_args_size)
4537 rtx popc = GEN_INT (current_function_pops_args);
4539 /* i386 can only pop 64K bytes. If asked to pop more, pop
4540 return address, do explicit add, and jump indirectly to the
4543 if (current_function_pops_args >= 65536)
4545 rtx ecx = gen_rtx_REG (SImode, 2);
4547 /* There are is no "pascal" calling convention in 64bit ABI. */
4551 emit_insn (gen_popsi1 (ecx));
4552 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4553 emit_jump_insn (gen_return_indirect_internal (ecx));
4556 emit_jump_insn (gen_return_pop_internal (popc));
4559 emit_jump_insn (gen_return_internal ());
4562 /* Reset from the function's potential modifications. */
4565 ix86_output_function_epilogue (file, size)
4566 FILE *file ATTRIBUTE_UNUSED;
4567 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4569 if (pic_offset_table_rtx)
4570 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4573 /* Extract the parts of an RTL expression that is a valid memory address
4574 for an instruction. Return 0 if the structure of the address is
4575 grossly off. Return -1 if the address contains ASHIFT, so it is not
4576 strictly valid, but still used for computing length of lea instruction.
4580 ix86_decompose_address (addr, out)
4582 struct ix86_address *out;
4584 rtx base = NULL_RTX;
4585 rtx index = NULL_RTX;
4586 rtx disp = NULL_RTX;
4587 HOST_WIDE_INT scale = 1;
4588 rtx scale_rtx = NULL_RTX;
4591 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4593 else if (GET_CODE (addr) == PLUS)
4595 rtx op0 = XEXP (addr, 0);
4596 rtx op1 = XEXP (addr, 1);
4597 enum rtx_code code0 = GET_CODE (op0);
4598 enum rtx_code code1 = GET_CODE (op1);
4600 if (code0 == REG || code0 == SUBREG)
4602 if (code1 == REG || code1 == SUBREG)
4603 index = op0, base = op1; /* index + base */
4605 base = op0, disp = op1; /* base + displacement */
4607 else if (code0 == MULT)
4609 index = XEXP (op0, 0);
4610 scale_rtx = XEXP (op0, 1);
4611 if (code1 == REG || code1 == SUBREG)
4612 base = op1; /* index*scale + base */
4614 disp = op1; /* index*scale + disp */
4616 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4618 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4619 scale_rtx = XEXP (XEXP (op0, 0), 1);
4620 base = XEXP (op0, 1);
4623 else if (code0 == PLUS)
4625 index = XEXP (op0, 0); /* index + base + disp */
4626 base = XEXP (op0, 1);
4632 else if (GET_CODE (addr) == MULT)
4634 index = XEXP (addr, 0); /* index*scale */
4635 scale_rtx = XEXP (addr, 1);
4637 else if (GET_CODE (addr) == ASHIFT)
4641 /* We're called for lea too, which implements ashift on occasion. */
4642 index = XEXP (addr, 0);
4643 tmp = XEXP (addr, 1);
4644 if (GET_CODE (tmp) != CONST_INT)
4646 scale = INTVAL (tmp);
4647 if ((unsigned HOST_WIDE_INT) scale > 3)
4653 disp = addr; /* displacement */
4655 /* Extract the integral value of scale. */
4658 if (GET_CODE (scale_rtx) != CONST_INT)
4660 scale = INTVAL (scale_rtx);
4663 /* Allow arg pointer and stack pointer as index if there is not scaling */
4664 if (base && index && scale == 1
4665 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4666 || index == stack_pointer_rtx))
4673 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4674 if ((base == hard_frame_pointer_rtx
4675 || base == frame_pointer_rtx
4676 || base == arg_pointer_rtx) && !disp)
4679 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4680 Avoid this by transforming to [%esi+0]. */
4681 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4682 && base && !index && !disp
4684 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4687 /* Special case: encode reg+reg instead of reg*2. */
4688 if (!base && index && scale && scale == 2)
4689 base = index, scale = 1;
4691 /* Special case: scaling cannot be encoded without base or displacement. */
4692 if (!base && !disp && index && scale != 1)
4703 /* Return cost of the memory address x.
4704 For i386, it is better to use a complex address than let gcc copy
4705 the address into a reg and make a new pseudo. But not if the address
4706 requires to two regs - that would mean more pseudos with longer
4709 ix86_address_cost (x)
4712 struct ix86_address parts;
4715 if (!ix86_decompose_address (x, &parts))
4718 if (parts.base && GET_CODE (parts.base) == SUBREG)
4719 parts.base = SUBREG_REG (parts.base);
4720 if (parts.index && GET_CODE (parts.index) == SUBREG)
4721 parts.index = SUBREG_REG (parts.index);
4723 /* More complex memory references are better. */
4724 if (parts.disp && parts.disp != const0_rtx)
4727 /* Attempt to minimize number of registers in the address. */
4729 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4731 && (!REG_P (parts.index)
4732 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4736 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4738 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4739 && parts.base != parts.index)
4742 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4743 since it's predecode logic can't detect the length of instructions
4744 and it degenerates to vector decoded. Increase cost of such
4745 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4746 to split such addresses or even refuse such addresses at all.
4748 Following addressing modes are affected:
4753 The first and last case may be avoidable by explicitly coding the zero in
4754 memory address, but I don't have AMD-K6 machine handy to check this
4758 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4759 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4760 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4766 /* If X is a machine specific address (i.e. a symbol or label being
4767 referenced as a displacement from the GOT implemented using an
4768 UNSPEC), then return the base term. Otherwise return X. */
4771 ix86_find_base_term (x)
4778 if (GET_CODE (x) != CONST)
4781 if (GET_CODE (term) == PLUS
4782 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4783 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4784 term = XEXP (term, 0);
4785 if (GET_CODE (term) != UNSPEC
4786 || XINT (term, 1) != UNSPEC_GOTPCREL)
4789 term = XVECEXP (term, 0, 0);
4791 if (GET_CODE (term) != SYMBOL_REF
4792 && GET_CODE (term) != LABEL_REF)
4798 if (GET_CODE (x) != PLUS
4799 || XEXP (x, 0) != pic_offset_table_rtx
4800 || GET_CODE (XEXP (x, 1)) != CONST)
4803 term = XEXP (XEXP (x, 1), 0);
4805 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4806 term = XEXP (term, 0);
4808 if (GET_CODE (term) != UNSPEC
4809 || XINT (term, 1) != UNSPEC_GOTOFF)
4812 term = XVECEXP (term, 0, 0);
4814 if (GET_CODE (term) != SYMBOL_REF
4815 && GET_CODE (term) != LABEL_REF)
4821 /* Determine if a given RTX is a valid constant. We already know this
4822 satisfies CONSTANT_P. */
4825 legitimate_constant_p (x)
4830 switch (GET_CODE (x))
4833 /* TLS symbols are not constant. */
4834 if (tls_symbolic_operand (x, Pmode))
4839 inner = XEXP (x, 0);
4841 /* Offsets of TLS symbols are never valid.
4842 Discourage CSE from creating them. */
4843 if (GET_CODE (inner) == PLUS
4844 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4847 /* Only some unspecs are valid as "constants". */
4848 if (GET_CODE (inner) == UNSPEC)
4849 switch (XINT (inner, 1))
4852 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4862 /* Otherwise we handle everything else in the move patterns. */
4866 /* Determine if a given RTX is a valid constant address. */
4869 constant_address_p (x)
4872 switch (GET_CODE (x))
4879 return TARGET_64BIT;
4882 /* For Mach-O, really believe the CONST. */
4885 /* Otherwise fall through. */
4887 return !flag_pic && legitimate_constant_p (x);
4894 /* Nonzero if the constant value X is a legitimate general operand
4895 when generating PIC code. It is given that flag_pic is on and
4896 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4899 legitimate_pic_operand_p (x)
4904 switch (GET_CODE (x))
4907 inner = XEXP (x, 0);
4909 /* Only some unspecs are valid as "constants". */
4910 if (GET_CODE (inner) == UNSPEC)
4911 switch (XINT (inner, 1))
4914 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4922 return legitimate_pic_address_disp_p (x);
4929 /* Determine if a given CONST RTX is a valid memory displacement
4933 legitimate_pic_address_disp_p (disp)
4938 /* In 64bit mode we can allow direct addresses of symbols and labels
4939 when they are not dynamic symbols. */
4943 if (GET_CODE (disp) == CONST)
4945 /* ??? Handle PIC code models */
4946 if (GET_CODE (x) == PLUS
4947 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4948 && ix86_cmodel == CM_SMALL_PIC
4949 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4950 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4952 if (local_symbolic_operand (x, Pmode))
4955 if (GET_CODE (disp) != CONST)
4957 disp = XEXP (disp, 0);
4961 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4962 of GOT tables. We should not need these anyway. */
4963 if (GET_CODE (disp) != UNSPEC
4964 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4967 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4968 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4974 if (GET_CODE (disp) == PLUS)
4976 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4978 disp = XEXP (disp, 0);
4982 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
4983 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
4985 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4986 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4987 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4989 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4990 if (strstr (sym_name, "$pb") != 0)
4995 if (GET_CODE (disp) != UNSPEC)
4998 switch (XINT (disp, 1))
5003 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5005 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5006 case UNSPEC_GOTTPOFF:
5009 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5011 /* ??? Could support offset here. */
5014 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5016 /* ??? Could support offset here. */
5019 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5025 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5026 memory address for an instruction. The MODE argument is the machine mode
5027 for the MEM expression that wants to use this address.
5029 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5030 convert common non-canonical forms to canonical form so that they will
5034 legitimate_address_p (mode, addr, strict)
5035 enum machine_mode mode;
5039 struct ix86_address parts;
5040 rtx base, index, disp;
5041 HOST_WIDE_INT scale;
5042 const char *reason = NULL;
5043 rtx reason_rtx = NULL_RTX;
5045 if (TARGET_DEBUG_ADDR)
5048 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5049 GET_MODE_NAME (mode), strict);
5053 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5055 if (TARGET_DEBUG_ADDR)
5056 fprintf (stderr, "Success.\n");
5060 if (ix86_decompose_address (addr, &parts) <= 0)
5062 reason = "decomposition failed";
5067 index = parts.index;
5069 scale = parts.scale;
5071 /* Validate base register.
5073 Don't allow SUBREG's here, it can lead to spill failures when the base
5074 is one word out of a two word structure, which is represented internally
5082 if (GET_CODE (base) == SUBREG)
5083 reg = SUBREG_REG (base);
5087 if (GET_CODE (reg) != REG)
5089 reason = "base is not a register";
5093 if (GET_MODE (base) != Pmode)
5095 reason = "base is not in Pmode";
5099 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5100 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5102 reason = "base is not valid";
5107 /* Validate index register.
5109 Don't allow SUBREG's here, it can lead to spill failures when the index
5110 is one word out of a two word structure, which is represented internally
5118 if (GET_CODE (index) == SUBREG)
5119 reg = SUBREG_REG (index);
5123 if (GET_CODE (reg) != REG)
5125 reason = "index is not a register";
5129 if (GET_MODE (index) != Pmode)
5131 reason = "index is not in Pmode";
5135 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5136 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5138 reason = "index is not valid";
5143 /* Validate scale factor. */
5146 reason_rtx = GEN_INT (scale);
5149 reason = "scale without index";
5153 if (scale != 2 && scale != 4 && scale != 8)
5155 reason = "scale is not a valid multiplier";
5160 /* Validate displacement. */
5167 if (!x86_64_sign_extended_value (disp))
5169 reason = "displacement is out of range";
5175 if (GET_CODE (disp) == CONST_DOUBLE)
5177 reason = "displacement is a const_double";
5182 if (GET_CODE (disp) == CONST
5183 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5184 switch (XINT (XEXP (disp, 0), 1))
5188 case UNSPEC_GOTPCREL:
5191 goto is_legitimate_pic;
5193 case UNSPEC_GOTTPOFF:
5199 reason = "invalid address unspec";
5203 else if (flag_pic && (SYMBOLIC_CONST (disp)
5205 && !machopic_operand_p (disp)
5210 if (TARGET_64BIT && (index || base))
5212 reason = "non-constant pic memory reference";
5215 if (! legitimate_pic_address_disp_p (disp))
5217 reason = "displacement is an invalid pic construct";
5221 /* This code used to verify that a symbolic pic displacement
5222 includes the pic_offset_table_rtx register.
5224 While this is good idea, unfortunately these constructs may
5225 be created by "adds using lea" optimization for incorrect
5234 This code is nonsensical, but results in addressing
5235 GOT table with pic_offset_table_rtx base. We can't
5236 just refuse it easily, since it gets matched by
5237 "addsi3" pattern, that later gets split to lea in the
5238 case output register differs from input. While this
5239 can be handled by separate addsi pattern for this case
5240 that never results in lea, this seems to be easier and
5241 correct fix for crash to disable this test. */
5243 else if (!CONSTANT_ADDRESS_P (disp))
5245 reason = "displacement is not constant";
5250 /* Everything looks valid. */
5251 if (TARGET_DEBUG_ADDR)
5252 fprintf (stderr, "Success.\n");
5256 if (TARGET_DEBUG_ADDR)
5258 fprintf (stderr, "Error: %s\n", reason);
5259 debug_rtx (reason_rtx);
5264 /* Return an unique alias set for the GOT. */
5266 static HOST_WIDE_INT
5267 ix86_GOT_alias_set ()
5269 static HOST_WIDE_INT set = -1;
5271 set = new_alias_set ();
5275 /* Return a legitimate reference for ORIG (an address) using the
5276 register REG. If REG is 0, a new pseudo is generated.
5278 There are two types of references that must be handled:
5280 1. Global data references must load the address from the GOT, via
5281 the PIC reg. An insn is emitted to do this load, and the reg is
5284 2. Static data references, constant pool addresses, and code labels
5285 compute the address as an offset from the GOT, whose base is in
5286 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5287 differentiate them from global data objects. The returned
5288 address is the PIC reg + an unspec constant.
5290 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5291 reg also appears in the address. */
5294 legitimize_pic_address (orig, reg)
5304 reg = gen_reg_rtx (Pmode);
5305 /* Use the generic Mach-O PIC machinery. */
5306 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5309 if (local_symbolic_operand (addr, Pmode))
5311 /* In 64bit mode we can address such objects directly. */
5316 /* This symbol may be referenced via a displacement from the PIC
5317 base address (@GOTOFF). */
5319 if (reload_in_progress)
5320 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5321 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5322 new = gen_rtx_CONST (Pmode, new);
5323 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5327 emit_move_insn (reg, new);
5332 else if (GET_CODE (addr) == SYMBOL_REF)
5336 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5337 new = gen_rtx_CONST (Pmode, new);
5338 new = gen_rtx_MEM (Pmode, new);
5339 RTX_UNCHANGING_P (new) = 1;
5340 set_mem_alias_set (new, ix86_GOT_alias_set ());
5343 reg = gen_reg_rtx (Pmode);
5344 /* Use directly gen_movsi, otherwise the address is loaded
5345 into register for CSE. We don't want to CSE this addresses,
5346 instead we CSE addresses from the GOT table, so skip this. */
5347 emit_insn (gen_movsi (reg, new));
5352 /* This symbol must be referenced via a load from the
5353 Global Offset Table (@GOT). */
5355 if (reload_in_progress)
5356 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5357 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5358 new = gen_rtx_CONST (Pmode, new);
5359 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5360 new = gen_rtx_MEM (Pmode, new);
5361 RTX_UNCHANGING_P (new) = 1;
5362 set_mem_alias_set (new, ix86_GOT_alias_set ());
5365 reg = gen_reg_rtx (Pmode);
5366 emit_move_insn (reg, new);
5372 if (GET_CODE (addr) == CONST)
5374 addr = XEXP (addr, 0);
5376 /* We must match stuff we generate before. Assume the only
5377 unspecs that can get here are ours. Not that we could do
5378 anything with them anyway... */
5379 if (GET_CODE (addr) == UNSPEC
5380 || (GET_CODE (addr) == PLUS
5381 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5383 if (GET_CODE (addr) != PLUS)
5386 if (GET_CODE (addr) == PLUS)
5388 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5390 /* Check first to see if this is a constant offset from a @GOTOFF
5391 symbol reference. */
5392 if (local_symbolic_operand (op0, Pmode)
5393 && GET_CODE (op1) == CONST_INT)
5397 if (reload_in_progress)
5398 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5399 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5401 new = gen_rtx_PLUS (Pmode, new, op1);
5402 new = gen_rtx_CONST (Pmode, new);
5403 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5407 emit_move_insn (reg, new);
5413 /* ??? We need to limit offsets here. */
5418 base = legitimize_pic_address (XEXP (addr, 0), reg);
5419 new = legitimize_pic_address (XEXP (addr, 1),
5420 base == reg ? NULL_RTX : reg);
5422 if (GET_CODE (new) == CONST_INT)
5423 new = plus_constant (base, INTVAL (new));
5426 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5428 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5429 new = XEXP (new, 1);
5431 new = gen_rtx_PLUS (Pmode, base, new);
5440 ix86_encode_section_info (decl, first)
5442 int first ATTRIBUTE_UNUSED;
5444 bool local_p = (*targetm.binds_local_p) (decl);
5447 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5448 if (GET_CODE (rtl) != MEM)
5450 symbol = XEXP (rtl, 0);
5451 if (GET_CODE (symbol) != SYMBOL_REF)
5454 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5455 symbol so that we may access it directly in the GOT. */
5458 SYMBOL_REF_FLAG (symbol) = local_p;
5460 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5461 "local dynamic", "initial exec" or "local exec" TLS models
5464 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5466 const char *symbol_str;
5469 enum tls_model kind;
5474 kind = TLS_MODEL_LOCAL_EXEC;
5476 kind = TLS_MODEL_INITIAL_EXEC;
5478 /* Local dynamic is inefficient when we're not combining the
5479 parts of the address. */
5480 else if (optimize && local_p)
5481 kind = TLS_MODEL_LOCAL_DYNAMIC;
5483 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5484 if (kind < flag_tls_default)
5485 kind = flag_tls_default;
5487 symbol_str = XSTR (symbol, 0);
5489 if (symbol_str[0] == '%')
5491 if (symbol_str[1] == tls_model_chars[kind])
5495 len = strlen (symbol_str) + 1;
5496 newstr = alloca (len + 2);
5499 newstr[1] = tls_model_chars[kind];
5500 memcpy (newstr + 2, symbol_str, len);
5502 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5506 /* Undo the above when printing symbol names. */
5509 ix86_strip_name_encoding (str)
5519 /* Load the thread pointer into a register. */
5522 get_thread_pointer ()
5526 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5527 tp = gen_rtx_MEM (Pmode, tp);
5528 RTX_UNCHANGING_P (tp) = 1;
5529 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5530 tp = force_reg (Pmode, tp);
5535 /* Try machine-dependent ways of modifying an illegitimate address
5536 to be legitimate. If we find one, return the new, valid address.
5537 This macro is used in only one place: `memory_address' in explow.c.
5539 OLDX is the address as it was before break_out_memory_refs was called.
5540 In some cases it is useful to look at this to decide what needs to be done.
5542 MODE and WIN are passed so that this macro can use
5543 GO_IF_LEGITIMATE_ADDRESS.
5545 It is always safe for this macro to do nothing. It exists to recognize
5546 opportunities to optimize the output.
5548 For the 80386, we handle X+REG by loading X into a register R and
5549 using R+REG. R will go in a general reg and indexing will be used.
5550 However, if REG is a broken-out memory address or multiplication,
5551 nothing needs to be done because REG can certainly go in a general reg.
5553 When -fpic is used, special handling is needed for symbolic references.
5554 See comments by legitimize_pic_address in i386.c for details. */
5557 legitimize_address (x, oldx, mode)
5559 register rtx oldx ATTRIBUTE_UNUSED;
5560 enum machine_mode mode;
5565 if (TARGET_DEBUG_ADDR)
5567 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5568 GET_MODE_NAME (mode));
5572 log = tls_symbolic_operand (x, mode);
5575 rtx dest, base, off, pic;
5579 case TLS_MODEL_GLOBAL_DYNAMIC:
5580 dest = gen_reg_rtx (Pmode);
5581 emit_insn (gen_tls_global_dynamic (dest, x));
5584 case TLS_MODEL_LOCAL_DYNAMIC:
5585 base = gen_reg_rtx (Pmode);
5586 emit_insn (gen_tls_local_dynamic_base (base));
5588 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5589 off = gen_rtx_CONST (Pmode, off);
5591 return gen_rtx_PLUS (Pmode, base, off);
5593 case TLS_MODEL_INITIAL_EXEC:
5596 if (reload_in_progress)
5597 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5598 pic = pic_offset_table_rtx;
5602 pic = gen_reg_rtx (Pmode);
5603 emit_insn (gen_set_got (pic));
5606 base = get_thread_pointer ();
5608 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5609 off = gen_rtx_CONST (Pmode, off);
5610 off = gen_rtx_PLUS (Pmode, pic, off);
5611 off = gen_rtx_MEM (Pmode, off);
5612 RTX_UNCHANGING_P (off) = 1;
5613 set_mem_alias_set (off, ix86_GOT_alias_set ());
5615 /* Damn Sun for specifing a set of dynamic relocations without
5616 considering the two-operand nature of the architecture!
5617 We'd be much better off with a "GOTNTPOFF" relocation that
5618 already contained the negated constant. */
5619 /* ??? Using negl and reg+reg addressing appears to be a lose
5620 size-wise. The negl is two bytes, just like the extra movl
5621 incurred by the two-operand subl, but reg+reg addressing
5622 uses the two-byte modrm form, unlike plain reg. */
5624 dest = gen_reg_rtx (Pmode);
5625 emit_insn (gen_subsi3 (dest, base, off));
5628 case TLS_MODEL_LOCAL_EXEC:
5629 base = get_thread_pointer ();
5631 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5632 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5633 off = gen_rtx_CONST (Pmode, off);
5636 return gen_rtx_PLUS (Pmode, base, off);
5639 dest = gen_reg_rtx (Pmode);
5640 emit_insn (gen_subsi3 (dest, base, off));
5651 if (flag_pic && SYMBOLIC_CONST (x))
5652 return legitimize_pic_address (x, 0);
5654 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5655 if (GET_CODE (x) == ASHIFT
5656 && GET_CODE (XEXP (x, 1)) == CONST_INT
5657 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5660 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5661 GEN_INT (1 << log));
5664 if (GET_CODE (x) == PLUS)
5666 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5668 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5669 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5670 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5673 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5674 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5675 GEN_INT (1 << log));
5678 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5679 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5680 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5683 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5684 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5685 GEN_INT (1 << log));
5688 /* Put multiply first if it isn't already. */
5689 if (GET_CODE (XEXP (x, 1)) == MULT)
5691 rtx tmp = XEXP (x, 0);
5692 XEXP (x, 0) = XEXP (x, 1);
5697 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5698 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5699 created by virtual register instantiation, register elimination, and
5700 similar optimizations. */
5701 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5704 x = gen_rtx_PLUS (Pmode,
5705 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5706 XEXP (XEXP (x, 1), 0)),
5707 XEXP (XEXP (x, 1), 1));
5711 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5712 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5713 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5714 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5715 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5716 && CONSTANT_P (XEXP (x, 1)))
5719 rtx other = NULL_RTX;
5721 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5723 constant = XEXP (x, 1);
5724 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5726 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5728 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5729 other = XEXP (x, 1);
5737 x = gen_rtx_PLUS (Pmode,
5738 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5739 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5740 plus_constant (other, INTVAL (constant)));
5744 if (changed && legitimate_address_p (mode, x, FALSE))
5747 if (GET_CODE (XEXP (x, 0)) == MULT)
5750 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5753 if (GET_CODE (XEXP (x, 1)) == MULT)
5756 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5760 && GET_CODE (XEXP (x, 1)) == REG
5761 && GET_CODE (XEXP (x, 0)) == REG)
5764 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5767 x = legitimize_pic_address (x, 0);
5770 if (changed && legitimate_address_p (mode, x, FALSE))
5773 if (GET_CODE (XEXP (x, 0)) == REG)
5775 register rtx temp = gen_reg_rtx (Pmode);
5776 register rtx val = force_operand (XEXP (x, 1), temp);
5778 emit_move_insn (temp, val);
5784 else if (GET_CODE (XEXP (x, 1)) == REG)
5786 register rtx temp = gen_reg_rtx (Pmode);
5787 register rtx val = force_operand (XEXP (x, 0), temp);
5789 emit_move_insn (temp, val);
5799 /* Print an integer constant expression in assembler syntax. Addition
5800 and subtraction are the only arithmetic that may appear in these
5801 expressions. FILE is the stdio stream to write to, X is the rtx, and
5802 CODE is the operand print code from the output string. */
5805 output_pic_addr_const (file, x, code)
5812 switch (GET_CODE (x))
5822 assemble_name (file, XSTR (x, 0));
5823 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5824 fputs ("@PLT", file);
5831 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5832 assemble_name (asm_out_file, buf);
5836 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5840 /* This used to output parentheses around the expression,
5841 but that does not work on the 386 (either ATT or BSD assembler). */
5842 output_pic_addr_const (file, XEXP (x, 0), code);
5846 if (GET_MODE (x) == VOIDmode)
5848 /* We can use %d if the number is <32 bits and positive. */
5849 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5850 fprintf (file, "0x%lx%08lx",
5851 (unsigned long) CONST_DOUBLE_HIGH (x),
5852 (unsigned long) CONST_DOUBLE_LOW (x));
5854 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5857 /* We can't handle floating point constants;
5858 PRINT_OPERAND must handle them. */
5859 output_operand_lossage ("floating constant misused");
5863 /* Some assemblers need integer constants to appear first. */
5864 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5866 output_pic_addr_const (file, XEXP (x, 0), code);
5868 output_pic_addr_const (file, XEXP (x, 1), code);
5870 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5872 output_pic_addr_const (file, XEXP (x, 1), code);
5874 output_pic_addr_const (file, XEXP (x, 0), code);
5882 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5883 output_pic_addr_const (file, XEXP (x, 0), code);
5885 output_pic_addr_const (file, XEXP (x, 1), code);
5887 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5891 if (XVECLEN (x, 0) != 1)
5893 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5894 switch (XINT (x, 1))
5897 fputs ("@GOT", file);
5900 fputs ("@GOTOFF", file);
5902 case UNSPEC_GOTPCREL:
5903 fputs ("@GOTPCREL(%rip)", file);
5905 case UNSPEC_GOTTPOFF:
5906 fputs ("@GOTTPOFF", file);
5909 fputs ("@TPOFF", file);
5912 fputs ("@NTPOFF", file);
5915 fputs ("@DTPOFF", file);
5918 output_operand_lossage ("invalid UNSPEC as operand");
5924 output_operand_lossage ("invalid expression as operand");
5928 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5929 We need to handle our special PIC relocations. */
5932 i386_dwarf_output_addr_const (file, x)
5937 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5941 fprintf (file, "%s", ASM_LONG);
5944 output_pic_addr_const (file, x, '\0');
5946 output_addr_const (file, x);
5950 /* In the name of slightly smaller debug output, and to cater to
5951 general assembler losage, recognize PIC+GOTOFF and turn it back
5952 into a direct symbol reference. */
5955 i386_simplify_dwarf_addr (orig_x)
5960 if (GET_CODE (x) == MEM)
5965 if (GET_CODE (x) != CONST
5966 || GET_CODE (XEXP (x, 0)) != UNSPEC
5967 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5968 || GET_CODE (orig_x) != MEM)
5970 return XVECEXP (XEXP (x, 0), 0, 0);
5973 if (GET_CODE (x) != PLUS
5974 || GET_CODE (XEXP (x, 1)) != CONST)
5977 if (GET_CODE (XEXP (x, 0)) == REG
5978 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5979 /* %ebx + GOT/GOTOFF */
5981 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5983 /* %ebx + %reg * scale + GOT/GOTOFF */
5985 if (GET_CODE (XEXP (y, 0)) == REG
5986 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5988 else if (GET_CODE (XEXP (y, 1)) == REG
5989 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5993 if (GET_CODE (y) != REG
5994 && GET_CODE (y) != MULT
5995 && GET_CODE (y) != ASHIFT)
6001 x = XEXP (XEXP (x, 1), 0);
6002 if (GET_CODE (x) == UNSPEC
6003 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6004 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6007 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6008 return XVECEXP (x, 0, 0);
6011 if (GET_CODE (x) == PLUS
6012 && GET_CODE (XEXP (x, 0)) == UNSPEC
6013 && GET_CODE (XEXP (x, 1)) == CONST_INT
6014 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6015 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6016 && GET_CODE (orig_x) != MEM)))
6018 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6020 return gen_rtx_PLUS (Pmode, y, x);
6028 put_condition_code (code, mode, reverse, fp, file)
6030 enum machine_mode mode;
6036 if (mode == CCFPmode || mode == CCFPUmode)
6038 enum rtx_code second_code, bypass_code;
6039 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6040 if (bypass_code != NIL || second_code != NIL)
6042 code = ix86_fp_compare_code_to_integer (code);
6046 code = reverse_condition (code);
6057 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6062 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6063 Those same assemblers have the same but opposite losage on cmov. */
6066 suffix = fp ? "nbe" : "a";
6069 if (mode == CCNOmode || mode == CCGOCmode)
6071 else if (mode == CCmode || mode == CCGCmode)
6082 if (mode == CCNOmode || mode == CCGOCmode)
6084 else if (mode == CCmode || mode == CCGCmode)
6093 suffix = fp ? "nb" : "ae";
6096 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6106 suffix = fp ? "u" : "p";
6109 suffix = fp ? "nu" : "np";
6114 fputs (suffix, file);
6118 print_reg (x, code, file)
6123 if (REGNO (x) == ARG_POINTER_REGNUM
6124 || REGNO (x) == FRAME_POINTER_REGNUM
6125 || REGNO (x) == FLAGS_REG
6126 || REGNO (x) == FPSR_REG)
6129 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6132 if (code == 'w' || MMX_REG_P (x))
6134 else if (code == 'b')
6136 else if (code == 'k')
6138 else if (code == 'q')
6140 else if (code == 'y')
6142 else if (code == 'h')
6145 code = GET_MODE_SIZE (GET_MODE (x));
6147 /* Irritatingly, AMD extended registers use different naming convention
6148 from the normal registers. */
6149 if (REX_INT_REG_P (x))
6156 error ("extended registers have no high halves");
6159 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6162 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6165 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6168 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6171 error ("unsupported operand size for extended register");
6179 if (STACK_TOP_P (x))
6181 fputs ("st(0)", file);
6188 if (! ANY_FP_REG_P (x))
6189 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6193 fputs (hi_reg_name[REGNO (x)], file);
6196 fputs (qi_reg_name[REGNO (x)], file);
6199 fputs (qi_high_reg_name[REGNO (x)], file);
6206 /* Locate some local-dynamic symbol still in use by this function
6207 so that we can print its name in some tls_local_dynamic_base
6211 get_some_local_dynamic_name ()
6215 if (cfun->machine->some_ld_name)
6216 return cfun->machine->some_ld_name;
6218 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6220 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6221 return cfun->machine->some_ld_name;
6227 get_some_local_dynamic_name_1 (px, data)
6229 void *data ATTRIBUTE_UNUSED;
6233 if (GET_CODE (x) == SYMBOL_REF
6234 && local_dynamic_symbolic_operand (x, Pmode))
6236 cfun->machine->some_ld_name = XSTR (x, 0);
6244 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6245 C -- print opcode suffix for set/cmov insn.
6246 c -- like C, but print reversed condition
6247 F,f -- likewise, but for floating-point.
6248 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6250 R -- print the prefix for register names.
6251 z -- print the opcode suffix for the size of the current operand.
6252 * -- print a star (in certain assembler syntax)
6253 A -- print an absolute memory reference.
6254 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6255 s -- print a shift double count, followed by the assemblers argument
6257 b -- print the QImode name of the register for the indicated operand.
6258 %b0 would print %al if operands[0] is reg 0.
6259 w -- likewise, print the HImode name of the register.
6260 k -- likewise, print the SImode name of the register.
6261 q -- likewise, print the DImode name of the register.
6262 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6263 y -- print "st(0)" instead of "st" as a register.
6264 D -- print condition for SSE cmp instruction.
6265 P -- if PIC, print an @PLT suffix.
6266 X -- don't print any sort of PIC '@' suffix for a symbol.
6267 & -- print some in-use local-dynamic symbol name.
6271 print_operand (file, x, code)
6281 if (ASSEMBLER_DIALECT == ASM_ATT)
6286 assemble_name (file, get_some_local_dynamic_name ());
6290 if (ASSEMBLER_DIALECT == ASM_ATT)
6292 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6294 /* Intel syntax. For absolute addresses, registers should not
6295 be surrounded by braces. */
6296 if (GET_CODE (x) != REG)
6299 PRINT_OPERAND (file, x, 0);
6307 PRINT_OPERAND (file, x, 0);
6312 if (ASSEMBLER_DIALECT == ASM_ATT)
6317 if (ASSEMBLER_DIALECT == ASM_ATT)
6322 if (ASSEMBLER_DIALECT == ASM_ATT)
6327 if (ASSEMBLER_DIALECT == ASM_ATT)
6332 if (ASSEMBLER_DIALECT == ASM_ATT)
6337 if (ASSEMBLER_DIALECT == ASM_ATT)
6342 /* 387 opcodes don't get size suffixes if the operands are
6344 if (STACK_REG_P (x))
6347 /* Likewise if using Intel opcodes. */
6348 if (ASSEMBLER_DIALECT == ASM_INTEL)
6351 /* This is the size of op from size of operand. */
6352 switch (GET_MODE_SIZE (GET_MODE (x)))
6355 #ifdef HAVE_GAS_FILDS_FISTS
6361 if (GET_MODE (x) == SFmode)
6376 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6378 #ifdef GAS_MNEMONICS
6404 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6406 PRINT_OPERAND (file, x, 0);
6412 /* Little bit of braindamage here. The SSE compare instructions
6413 does use completely different names for the comparisons that the
6414 fp conditional moves. */
6415 switch (GET_CODE (x))
6430 fputs ("unord", file);
6434 fputs ("neq", file);
6438 fputs ("nlt", file);
6442 fputs ("nle", file);
6445 fputs ("ord", file);
6453 #ifdef CMOV_SUN_AS_SYNTAX
6454 if (ASSEMBLER_DIALECT == ASM_ATT)
6456 switch (GET_MODE (x))
6458 case HImode: putc ('w', file); break;
6460 case SFmode: putc ('l', file); break;
6462 case DFmode: putc ('q', file); break;
6470 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6473 #ifdef CMOV_SUN_AS_SYNTAX
6474 if (ASSEMBLER_DIALECT == ASM_ATT)
6477 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6480 /* Like above, but reverse condition */
6482 /* Check to see if argument to %c is really a constant
6483 and not a condition code which needs to be reversed. */
6484 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6486 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6489 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6492 #ifdef CMOV_SUN_AS_SYNTAX
6493 if (ASSEMBLER_DIALECT == ASM_ATT)
6496 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6502 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6505 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6508 int pred_val = INTVAL (XEXP (x, 0));
6510 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6511 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6513 int taken = pred_val > REG_BR_PROB_BASE / 2;
6514 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6516 /* Emit hints only in the case default branch prediction
6517 heruistics would fail. */
6518 if (taken != cputaken)
6520 /* We use 3e (DS) prefix for taken branches and
6521 2e (CS) prefix for not taken branches. */
6523 fputs ("ds ; ", file);
6525 fputs ("cs ; ", file);
6532 output_operand_lossage ("invalid operand code `%c'", code);
6536 if (GET_CODE (x) == REG)
6538 PRINT_REG (x, code, file);
6541 else if (GET_CODE (x) == MEM)
6543 /* No `byte ptr' prefix for call instructions. */
6544 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6547 switch (GET_MODE_SIZE (GET_MODE (x)))
6549 case 1: size = "BYTE"; break;
6550 case 2: size = "WORD"; break;
6551 case 4: size = "DWORD"; break;
6552 case 8: size = "QWORD"; break;
6553 case 12: size = "XWORD"; break;
6554 case 16: size = "XMMWORD"; break;
6559 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6562 else if (code == 'w')
6564 else if (code == 'k')
6568 fputs (" PTR ", file);
6572 if (flag_pic && CONSTANT_ADDRESS_P (x))
6573 output_pic_addr_const (file, x, code);
6574 /* Avoid (%rip) for call operands. */
6575 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6576 && GET_CODE (x) != CONST_INT)
6577 output_addr_const (file, x);
6578 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6579 output_operand_lossage ("invalid constraints for operand");
6584 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6589 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6590 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6592 if (ASSEMBLER_DIALECT == ASM_ATT)
6594 fprintf (file, "0x%lx", l);
6597 /* These float cases don't actually occur as immediate operands. */
6598 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6603 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6604 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6605 fprintf (file, "%s", dstr);
6608 else if (GET_CODE (x) == CONST_DOUBLE
6609 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6614 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6615 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6616 fprintf (file, "%s", dstr);
6623 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6625 if (ASSEMBLER_DIALECT == ASM_ATT)
6628 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6629 || GET_CODE (x) == LABEL_REF)
6631 if (ASSEMBLER_DIALECT == ASM_ATT)
6634 fputs ("OFFSET FLAT:", file);
6637 if (GET_CODE (x) == CONST_INT)
6638 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6640 output_pic_addr_const (file, x, code);
6642 output_addr_const (file, x);
6646 /* Print a memory operand whose address is ADDR. */
6649 print_operand_address (file, addr)
6653 struct ix86_address parts;
6654 rtx base, index, disp;
6657 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6659 if (ASSEMBLER_DIALECT == ASM_INTEL)
6660 fputs ("DWORD PTR ", file);
6661 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6663 fputs ("gs:0", file);
6667 if (! ix86_decompose_address (addr, &parts))
6671 index = parts.index;
6673 scale = parts.scale;
6675 if (!base && !index)
6677 /* Displacement only requires special attention. */
6679 if (GET_CODE (disp) == CONST_INT)
6681 if (ASSEMBLER_DIALECT == ASM_INTEL)
6683 if (USER_LABEL_PREFIX[0] == 0)
6685 fputs ("ds:", file);
6687 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6690 output_pic_addr_const (file, addr, 0);
6692 output_addr_const (file, addr);
6694 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6696 && (GET_CODE (addr) == SYMBOL_REF
6697 || GET_CODE (addr) == LABEL_REF
6698 || (GET_CODE (addr) == CONST
6699 && GET_CODE (XEXP (addr, 0)) == PLUS
6700 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6701 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6702 fputs ("(%rip)", file);
6706 if (ASSEMBLER_DIALECT == ASM_ATT)
6711 output_pic_addr_const (file, disp, 0);
6712 else if (GET_CODE (disp) == LABEL_REF)
6713 output_asm_label (disp);
6715 output_addr_const (file, disp);
6720 PRINT_REG (base, 0, file);
6724 PRINT_REG (index, 0, file);
6726 fprintf (file, ",%d", scale);
6732 rtx offset = NULL_RTX;
6736 /* Pull out the offset of a symbol; print any symbol itself. */
6737 if (GET_CODE (disp) == CONST
6738 && GET_CODE (XEXP (disp, 0)) == PLUS
6739 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6741 offset = XEXP (XEXP (disp, 0), 1);
6742 disp = gen_rtx_CONST (VOIDmode,
6743 XEXP (XEXP (disp, 0), 0));
6747 output_pic_addr_const (file, disp, 0);
6748 else if (GET_CODE (disp) == LABEL_REF)
6749 output_asm_label (disp);
6750 else if (GET_CODE (disp) == CONST_INT)
6753 output_addr_const (file, disp);
6759 PRINT_REG (base, 0, file);
6762 if (INTVAL (offset) >= 0)
6764 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6768 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6775 PRINT_REG (index, 0, file);
6777 fprintf (file, "*%d", scale);
6785 output_addr_const_extra (file, x)
6791 if (GET_CODE (x) != UNSPEC)
6794 op = XVECEXP (x, 0, 0);
6795 switch (XINT (x, 1))
6797 case UNSPEC_GOTTPOFF:
6798 output_addr_const (file, op);
6799 fputs ("@GOTTPOFF", file);
6802 output_addr_const (file, op);
6803 fputs ("@TPOFF", file);
6806 output_addr_const (file, op);
6807 fputs ("@NTPOFF", file);
6810 output_addr_const (file, op);
6811 fputs ("@DTPOFF", file);
6821 /* Split one or more DImode RTL references into pairs of SImode
6822 references. The RTL can be REG, offsettable MEM, integer constant, or
6823 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6824 split and "num" is its length. lo_half and hi_half are output arrays
6825 that parallel "operands". */
6828 split_di (operands, num, lo_half, hi_half)
6831 rtx lo_half[], hi_half[];
6835 rtx op = operands[num];
6837 /* simplify_subreg refuse to split volatile memory addresses,
6838 but we still have to handle it. */
6839 if (GET_CODE (op) == MEM)
6841 lo_half[num] = adjust_address (op, SImode, 0);
6842 hi_half[num] = adjust_address (op, SImode, 4);
6846 lo_half[num] = simplify_gen_subreg (SImode, op,
6847 GET_MODE (op) == VOIDmode
6848 ? DImode : GET_MODE (op), 0);
6849 hi_half[num] = simplify_gen_subreg (SImode, op,
6850 GET_MODE (op) == VOIDmode
6851 ? DImode : GET_MODE (op), 4);
6855 /* Split one or more TImode RTL references into pairs of SImode
6856 references. The RTL can be REG, offsettable MEM, integer constant, or
6857 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6858 split and "num" is its length. lo_half and hi_half are output arrays
6859 that parallel "operands". */
6862 split_ti (operands, num, lo_half, hi_half)
6865 rtx lo_half[], hi_half[];
6869 rtx op = operands[num];
6871 /* simplify_subreg refuse to split volatile memory addresses, but we
6872 still have to handle it. */
6873 if (GET_CODE (op) == MEM)
6875 lo_half[num] = adjust_address (op, DImode, 0);
6876 hi_half[num] = adjust_address (op, DImode, 8);
6880 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6881 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6886 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6887 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6888 is the expression of the binary operation. The output may either be
6889 emitted here, or returned to the caller, like all output_* functions.
6891 There is no guarantee that the operands are the same mode, as they
6892 might be within FLOAT or FLOAT_EXTEND expressions. */
6894 #ifndef SYSV386_COMPAT
6895 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6896 wants to fix the assemblers because that causes incompatibility
6897 with gcc. No-one wants to fix gcc because that causes
6898 incompatibility with assemblers... You can use the option of
6899 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6900 #define SYSV386_COMPAT 1
6904 output_387_binary_op (insn, operands)
6908 static char buf[30];
6911 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6913 #ifdef ENABLE_CHECKING
6914 /* Even if we do not want to check the inputs, this documents input
6915 constraints. Which helps in understanding the following code. */
6916 if (STACK_REG_P (operands[0])
6917 && ((REG_P (operands[1])
6918 && REGNO (operands[0]) == REGNO (operands[1])
6919 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6920 || (REG_P (operands[2])
6921 && REGNO (operands[0]) == REGNO (operands[2])
6922 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6923 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6929 switch (GET_CODE (operands[3]))
6932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6941 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6942 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6950 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6951 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6959 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6960 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6974 if (GET_MODE (operands[0]) == SFmode)
6975 strcat (buf, "ss\t{%2, %0|%0, %2}");
6977 strcat (buf, "sd\t{%2, %0|%0, %2}");
6982 switch (GET_CODE (operands[3]))
6986 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6988 rtx temp = operands[2];
6989 operands[2] = operands[1];
6993 /* know operands[0] == operands[1]. */
6995 if (GET_CODE (operands[2]) == MEM)
7001 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7003 if (STACK_TOP_P (operands[0]))
7004 /* How is it that we are storing to a dead operand[2]?
7005 Well, presumably operands[1] is dead too. We can't
7006 store the result to st(0) as st(0) gets popped on this
7007 instruction. Instead store to operands[2] (which I
7008 think has to be st(1)). st(1) will be popped later.
7009 gcc <= 2.8.1 didn't have this check and generated
7010 assembly code that the Unixware assembler rejected. */
7011 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7013 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7017 if (STACK_TOP_P (operands[0]))
7018 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7020 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7025 if (GET_CODE (operands[1]) == MEM)
7031 if (GET_CODE (operands[2]) == MEM)
7037 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7040 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7041 derived assemblers, confusingly reverse the direction of
7042 the operation for fsub{r} and fdiv{r} when the
7043 destination register is not st(0). The Intel assembler
7044 doesn't have this brain damage. Read !SYSV386_COMPAT to
7045 figure out what the hardware really does. */
7046 if (STACK_TOP_P (operands[0]))
7047 p = "{p\t%0, %2|rp\t%2, %0}";
7049 p = "{rp\t%2, %0|p\t%0, %2}";
7051 if (STACK_TOP_P (operands[0]))
7052 /* As above for fmul/fadd, we can't store to st(0). */
7053 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7055 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7060 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7063 if (STACK_TOP_P (operands[0]))
7064 p = "{rp\t%0, %1|p\t%1, %0}";
7066 p = "{p\t%1, %0|rp\t%0, %1}";
7068 if (STACK_TOP_P (operands[0]))
7069 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7071 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7076 if (STACK_TOP_P (operands[0]))
7078 if (STACK_TOP_P (operands[1]))
7079 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7081 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7084 else if (STACK_TOP_P (operands[1]))
7087 p = "{\t%1, %0|r\t%0, %1}";
7089 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7095 p = "{r\t%2, %0|\t%0, %2}";
7097 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7110 /* Output code to initialize control word copies used by
7111 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7112 is set to control word rounding downwards. */
7114 emit_i387_cw_initialization (normal, round_down)
7115 rtx normal, round_down;
7117 rtx reg = gen_reg_rtx (HImode);
7119 emit_insn (gen_x86_fnstcw_1 (normal));
7120 emit_move_insn (reg, normal);
7121 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7123 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7125 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7126 emit_move_insn (round_down, reg);
7129 /* Output code for INSN to convert a float to a signed int. OPERANDS
7130 are the insn operands. The output may be [HSD]Imode and the input
7131 operand may be [SDX]Fmode. */
7134 output_fix_trunc (insn, operands)
7138 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7139 int dimode_p = GET_MODE (operands[0]) == DImode;
7141 /* Jump through a hoop or two for DImode, since the hardware has no
7142 non-popping instruction. We used to do this a different way, but
7143 that was somewhat fragile and broke with post-reload splitters. */
7144 if (dimode_p && !stack_top_dies)
7145 output_asm_insn ("fld\t%y1", operands);
7147 if (!STACK_TOP_P (operands[1]))
7150 if (GET_CODE (operands[0]) != MEM)
7153 output_asm_insn ("fldcw\t%3", operands);
7154 if (stack_top_dies || dimode_p)
7155 output_asm_insn ("fistp%z0\t%0", operands);
7157 output_asm_insn ("fist%z0\t%0", operands);
7158 output_asm_insn ("fldcw\t%2", operands);
7163 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7164 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7165 when fucom should be used. */
7168 output_fp_compare (insn, operands, eflags_p, unordered_p)
7171 int eflags_p, unordered_p;
7174 rtx cmp_op0 = operands[0];
7175 rtx cmp_op1 = operands[1];
7176 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7181 cmp_op1 = operands[2];
7185 if (GET_MODE (operands[0]) == SFmode)
7187 return "ucomiss\t{%1, %0|%0, %1}";
7189 return "comiss\t{%1, %0|%0, %y}";
7192 return "ucomisd\t{%1, %0|%0, %1}";
7194 return "comisd\t{%1, %0|%0, %y}";
7197 if (! STACK_TOP_P (cmp_op0))
7200 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7202 if (STACK_REG_P (cmp_op1)
7204 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7205 && REGNO (cmp_op1) != FIRST_STACK_REG)
7207 /* If both the top of the 387 stack dies, and the other operand
7208 is also a stack register that dies, then this must be a
7209 `fcompp' float compare */
7213 /* There is no double popping fcomi variant. Fortunately,
7214 eflags is immune from the fstp's cc clobbering. */
7216 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7218 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7226 return "fucompp\n\tfnstsw\t%0";
7228 return "fcompp\n\tfnstsw\t%0";
7241 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7243 static const char * const alt[24] =
7255 "fcomi\t{%y1, %0|%0, %y1}",
7256 "fcomip\t{%y1, %0|%0, %y1}",
7257 "fucomi\t{%y1, %0|%0, %y1}",
7258 "fucomip\t{%y1, %0|%0, %y1}",
7265 "fcom%z2\t%y2\n\tfnstsw\t%0",
7266 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7267 "fucom%z2\t%y2\n\tfnstsw\t%0",
7268 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7270 "ficom%z2\t%y2\n\tfnstsw\t%0",
7271 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7279 mask = eflags_p << 3;
7280 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7281 mask |= unordered_p << 1;
7282 mask |= stack_top_dies;
7295 ix86_output_addr_vec_elt (file, value)
7299 const char *directive = ASM_LONG;
7304 directive = ASM_QUAD;
7310 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7314 ix86_output_addr_diff_elt (file, value, rel)
7319 fprintf (file, "%s%s%d-%s%d\n",
7320 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7321 else if (HAVE_AS_GOTOFF_IN_DATA)
7322 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7324 else if (TARGET_MACHO)
7325 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7326 machopic_function_base_name () + 1);
7329 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7330 ASM_LONG, LPREFIX, value);
7333 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7337 ix86_expand_clear (dest)
7342 /* We play register width games, which are only valid after reload. */
7343 if (!reload_completed)
7346 /* Avoid HImode and its attendant prefix byte. */
7347 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7348 dest = gen_rtx_REG (SImode, REGNO (dest));
7350 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7352 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7353 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7355 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7356 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7362 /* X is an unchanging MEM. If it is a constant pool reference, return
7363 the constant pool rtx, else NULL. */
7366 maybe_get_pool_constant (x)
7373 if (GET_CODE (x) != PLUS)
7375 if (XEXP (x, 0) != pic_offset_table_rtx)
7378 if (GET_CODE (x) != CONST)
7381 if (GET_CODE (x) != UNSPEC)
7383 if (XINT (x, 1) != UNSPEC_GOTOFF)
7385 x = XVECEXP (x, 0, 0);
7388 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7389 return get_pool_constant (x);
7395 ix86_expand_move (mode, operands)
7396 enum machine_mode mode;
7399 int strict = (reload_in_progress || reload_completed);
7400 rtx insn, op0, op1, tmp;
7405 /* ??? We have a slight problem. We need to say that tls symbols are
7406 not legitimate constants so that reload does not helpfully reload
7407 these constants from a REG_EQUIV, which we cannot handle. (Recall
7408 that general- and local-dynamic address resolution requires a
7411 However, if we say that tls symbols are not legitimate constants,
7412 then emit_move_insn helpfully drop them into the constant pool.
7414 It is far easier to work around emit_move_insn than reload. Recognize
7415 the MEM that we would have created and extract the symbol_ref. */
7418 && GET_CODE (op1) == MEM
7419 && RTX_UNCHANGING_P (op1))
7421 tmp = maybe_get_pool_constant (op1);
7422 /* Note that we only care about symbolic constants here, which
7423 unlike CONST_INT will always have a proper mode. */
7424 if (tmp && GET_MODE (tmp) == Pmode)
7428 if (tls_symbolic_operand (op1, Pmode))
7430 op1 = legitimize_address (op1, op1, VOIDmode);
7431 if (GET_CODE (op0) == MEM)
7433 tmp = gen_reg_rtx (mode);
7434 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7438 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7443 rtx temp = ((reload_in_progress
7444 || ((op0 && GET_CODE (op0) == REG)
7446 ? op0 : gen_reg_rtx (Pmode));
7447 op1 = machopic_indirect_data_reference (op1, temp);
7448 op1 = machopic_legitimize_pic_address (op1, mode,
7449 temp == op1 ? 0 : temp);
7453 if (MACHOPIC_INDIRECT)
7454 op1 = machopic_indirect_data_reference (op1, 0);
7458 insn = gen_rtx_SET (VOIDmode, op0, op1);
7462 #endif /* TARGET_MACHO */
7463 if (GET_CODE (op0) == MEM)
7464 op1 = force_reg (Pmode, op1);
7468 if (GET_CODE (temp) != REG)
7469 temp = gen_reg_rtx (Pmode);
7470 temp = legitimize_pic_address (op1, temp);
7478 if (GET_CODE (op0) == MEM
7479 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7480 || !push_operand (op0, mode))
7481 && GET_CODE (op1) == MEM)
7482 op1 = force_reg (mode, op1);
7484 if (push_operand (op0, mode)
7485 && ! general_no_elim_operand (op1, mode))
7486 op1 = copy_to_mode_reg (mode, op1);
7488 /* Force large constants in 64bit compilation into register
7489 to get them CSEed. */
7490 if (TARGET_64BIT && mode == DImode
7491 && immediate_operand (op1, mode)
7492 && !x86_64_zero_extended_value (op1)
7493 && !register_operand (op0, mode)
7494 && optimize && !reload_completed && !reload_in_progress)
7495 op1 = copy_to_mode_reg (mode, op1);
7497 if (FLOAT_MODE_P (mode))
7499 /* If we are loading a floating point constant to a register,
7500 force the value to memory now, since we'll get better code
7501 out the back end. */
7505 else if (GET_CODE (op1) == CONST_DOUBLE
7506 && register_operand (op0, mode))
7507 op1 = validize_mem (force_const_mem (mode, op1));
7511 insn = gen_rtx_SET (VOIDmode, op0, op1);
7517 ix86_expand_vector_move (mode, operands)
7518 enum machine_mode mode;
7521 /* Force constants other than zero into memory. We do not know how
7522 the instructions used to build constants modify the upper 64 bits
7523 of the register, once we have that information we may be able
7524 to handle some of them more efficiently. */
7525 if ((reload_in_progress | reload_completed) == 0
7526 && register_operand (operands[0], mode)
7527 && CONSTANT_P (operands[1]))
7529 rtx addr = gen_reg_rtx (Pmode);
7530 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7531 operands[1] = gen_rtx_MEM (mode, addr);
7534 /* Make operand1 a register if it isn't already. */
7535 if ((reload_in_progress | reload_completed) == 0
7536 && !register_operand (operands[0], mode)
7537 && !register_operand (operands[1], mode)
7538 && operands[1] != CONST0_RTX (mode))
7540 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7541 emit_move_insn (operands[0], temp);
7545 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7548 /* Attempt to expand a binary operator. Make the expansion closer to the
7549 actual machine, then just general_operand, which will allow 3 separate
7550 memory references (one output, two input) in a single insn. */
7553 ix86_expand_binary_operator (code, mode, operands)
7555 enum machine_mode mode;
7558 int matching_memory;
7559 rtx src1, src2, dst, op, clob;
7565 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7566 if (GET_RTX_CLASS (code) == 'c'
7567 && (rtx_equal_p (dst, src2)
7568 || immediate_operand (src1, mode)))
7575 /* If the destination is memory, and we do not have matching source
7576 operands, do things in registers. */
7577 matching_memory = 0;
7578 if (GET_CODE (dst) == MEM)
7580 if (rtx_equal_p (dst, src1))
7581 matching_memory = 1;
7582 else if (GET_RTX_CLASS (code) == 'c'
7583 && rtx_equal_p (dst, src2))
7584 matching_memory = 2;
7586 dst = gen_reg_rtx (mode);
7589 /* Both source operands cannot be in memory. */
7590 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7592 if (matching_memory != 2)
7593 src2 = force_reg (mode, src2);
7595 src1 = force_reg (mode, src1);
7598 /* If the operation is not commutable, source 1 cannot be a constant
7599 or non-matching memory. */
7600 if ((CONSTANT_P (src1)
7601 || (!matching_memory && GET_CODE (src1) == MEM))
7602 && GET_RTX_CLASS (code) != 'c')
7603 src1 = force_reg (mode, src1);
7605 /* If optimizing, copy to regs to improve CSE */
7606 if (optimize && ! no_new_pseudos)
7608 if (GET_CODE (dst) == MEM)
7609 dst = gen_reg_rtx (mode);
7610 if (GET_CODE (src1) == MEM)
7611 src1 = force_reg (mode, src1);
7612 if (GET_CODE (src2) == MEM)
7613 src2 = force_reg (mode, src2);
7616 /* Emit the instruction. */
7618 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7619 if (reload_in_progress)
7621 /* Reload doesn't know about the flags register, and doesn't know that
7622 it doesn't want to clobber it. We can only do this with PLUS. */
7629 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7630 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7633 /* Fix up the destination if needed. */
7634 if (dst != operands[0])
7635 emit_move_insn (operands[0], dst);
7638 /* Return TRUE or FALSE depending on whether the binary operator meets the
7639 appropriate constraints. */
7642 ix86_binary_operator_ok (code, mode, operands)
7644 enum machine_mode mode ATTRIBUTE_UNUSED;
7647 /* Both source operands cannot be in memory. */
7648 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7650 /* If the operation is not commutable, source 1 cannot be a constant. */
7651 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7653 /* If the destination is memory, we must have a matching source operand. */
7654 if (GET_CODE (operands[0]) == MEM
7655 && ! (rtx_equal_p (operands[0], operands[1])
7656 || (GET_RTX_CLASS (code) == 'c'
7657 && rtx_equal_p (operands[0], operands[2]))))
7659 /* If the operation is not commutable and the source 1 is memory, we must
7660 have a matching destination. */
7661 if (GET_CODE (operands[1]) == MEM
7662 && GET_RTX_CLASS (code) != 'c'
7663 && ! rtx_equal_p (operands[0], operands[1]))
7668 /* Attempt to expand a unary operator. Make the expansion closer to the
7669 actual machine, then just general_operand, which will allow 2 separate
7670 memory references (one output, one input) in a single insn. */
7673 ix86_expand_unary_operator (code, mode, operands)
7675 enum machine_mode mode;
7678 int matching_memory;
7679 rtx src, dst, op, clob;
7684 /* If the destination is memory, and we do not have matching source
7685 operands, do things in registers. */
7686 matching_memory = 0;
7687 if (GET_CODE (dst) == MEM)
7689 if (rtx_equal_p (dst, src))
7690 matching_memory = 1;
7692 dst = gen_reg_rtx (mode);
7695 /* When source operand is memory, destination must match. */
7696 if (!matching_memory && GET_CODE (src) == MEM)
7697 src = force_reg (mode, src);
7699 /* If optimizing, copy to regs to improve CSE */
7700 if (optimize && ! no_new_pseudos)
7702 if (GET_CODE (dst) == MEM)
7703 dst = gen_reg_rtx (mode);
7704 if (GET_CODE (src) == MEM)
7705 src = force_reg (mode, src);
7708 /* Emit the instruction. */
7710 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7711 if (reload_in_progress || code == NOT)
7713 /* Reload doesn't know about the flags register, and doesn't know that
7714 it doesn't want to clobber it. */
7721 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7722 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7725 /* Fix up the destination if needed. */
7726 if (dst != operands[0])
7727 emit_move_insn (operands[0], dst);
7730 /* Return TRUE or FALSE depending on whether the unary operator meets the
7731 appropriate constraints. */
7734 ix86_unary_operator_ok (code, mode, operands)
7735 enum rtx_code code ATTRIBUTE_UNUSED;
7736 enum machine_mode mode ATTRIBUTE_UNUSED;
7737 rtx operands[2] ATTRIBUTE_UNUSED;
7739 /* If one of operands is memory, source and destination must match. */
7740 if ((GET_CODE (operands[0]) == MEM
7741 || GET_CODE (operands[1]) == MEM)
7742 && ! rtx_equal_p (operands[0], operands[1]))
7747 /* Return TRUE or FALSE depending on whether the first SET in INSN
7748 has source and destination with matching CC modes, and that the
7749 CC mode is at least as constrained as REQ_MODE. */
7752 ix86_match_ccmode (insn, req_mode)
7754 enum machine_mode req_mode;
7757 enum machine_mode set_mode;
7759 set = PATTERN (insn);
7760 if (GET_CODE (set) == PARALLEL)
7761 set = XVECEXP (set, 0, 0);
7762 if (GET_CODE (set) != SET)
7764 if (GET_CODE (SET_SRC (set)) != COMPARE)
7767 set_mode = GET_MODE (SET_DEST (set));
7771 if (req_mode != CCNOmode
7772 && (req_mode != CCmode
7773 || XEXP (SET_SRC (set), 1) != const0_rtx))
7777 if (req_mode == CCGCmode)
7781 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7785 if (req_mode == CCZmode)
7795 return (GET_MODE (SET_SRC (set)) == set_mode);
7798 /* Generate insn patterns to do an integer compare of OPERANDS. */
7801 ix86_expand_int_compare (code, op0, op1)
7805 enum machine_mode cmpmode;
7808 cmpmode = SELECT_CC_MODE (code, op0, op1);
7809 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7811 /* This is very simple, but making the interface the same as in the
7812 FP case makes the rest of the code easier. */
7813 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7814 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7816 /* Return the test that should be put into the flags user, i.e.
7817 the bcc, scc, or cmov instruction. */
7818 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7821 /* Figure out whether to use ordered or unordered fp comparisons.
7822 Return the appropriate mode to use. */
7825 ix86_fp_compare_mode (code)
7826 enum rtx_code code ATTRIBUTE_UNUSED;
7828 /* ??? In order to make all comparisons reversible, we do all comparisons
7829 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7830 all forms trapping and nontrapping comparisons, we can make inequality
7831 comparisons trapping again, since it results in better code when using
7832 FCOM based compares. */
7833 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7837 ix86_cc_mode (code, op0, op1)
7841 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7842 return ix86_fp_compare_mode (code);
7845 /* Only zero flag is needed. */
7847 case NE: /* ZF!=0 */
7849 /* Codes needing carry flag. */
7850 case GEU: /* CF=0 */
7851 case GTU: /* CF=0 & ZF=0 */
7852 case LTU: /* CF=1 */
7853 case LEU: /* CF=1 | ZF=1 */
7855 /* Codes possibly doable only with sign flag when
7856 comparing against zero. */
7857 case GE: /* SF=OF or SF=0 */
7858 case LT: /* SF<>OF or SF=1 */
7859 if (op1 == const0_rtx)
7862 /* For other cases Carry flag is not required. */
7864 /* Codes doable only with sign flag when comparing
7865 against zero, but we miss jump instruction for it
7866 so we need to use relational tests agains overflow
7867 that thus needs to be zero. */
7868 case GT: /* ZF=0 & SF=OF */
7869 case LE: /* ZF=1 | SF<>OF */
7870 if (op1 == const0_rtx)
7874 /* strcmp pattern do (use flags) and combine may ask us for proper
7883 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7886 ix86_use_fcomi_compare (code)
7887 enum rtx_code code ATTRIBUTE_UNUSED;
7889 enum rtx_code swapped_code = swap_condition (code);
7890 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7891 || (ix86_fp_comparison_cost (swapped_code)
7892 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7895 /* Swap, force into registers, or otherwise massage the two operands
7896 to a fp comparison. The operands are updated in place; the new
7897 comparsion code is returned. */
7899 static enum rtx_code
7900 ix86_prepare_fp_compare_args (code, pop0, pop1)
7904 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7905 rtx op0 = *pop0, op1 = *pop1;
7906 enum machine_mode op_mode = GET_MODE (op0);
7907 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7909 /* All of the unordered compare instructions only work on registers.
7910 The same is true of the XFmode compare instructions. The same is
7911 true of the fcomi compare instructions. */
7914 && (fpcmp_mode == CCFPUmode
7915 || op_mode == XFmode
7916 || op_mode == TFmode
7917 || ix86_use_fcomi_compare (code)))
7919 op0 = force_reg (op_mode, op0);
7920 op1 = force_reg (op_mode, op1);
7924 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7925 things around if they appear profitable, otherwise force op0
7928 if (standard_80387_constant_p (op0) == 0
7929 || (GET_CODE (op0) == MEM
7930 && ! (standard_80387_constant_p (op1) == 0
7931 || GET_CODE (op1) == MEM)))
7934 tmp = op0, op0 = op1, op1 = tmp;
7935 code = swap_condition (code);
7938 if (GET_CODE (op0) != REG)
7939 op0 = force_reg (op_mode, op0);
7941 if (CONSTANT_P (op1))
7943 if (standard_80387_constant_p (op1))
7944 op1 = force_reg (op_mode, op1);
7946 op1 = validize_mem (force_const_mem (op_mode, op1));
7950 /* Try to rearrange the comparison to make it cheaper. */
7951 if (ix86_fp_comparison_cost (code)
7952 > ix86_fp_comparison_cost (swap_condition (code))
7953 && (GET_CODE (op1) == REG || !no_new_pseudos))
7956 tmp = op0, op0 = op1, op1 = tmp;
7957 code = swap_condition (code);
7958 if (GET_CODE (op0) != REG)
7959 op0 = force_reg (op_mode, op0);
7967 /* Convert comparison codes we use to represent FP comparison to integer
7968 code that will result in proper branch. Return UNKNOWN if no such code
7970 static enum rtx_code
7971 ix86_fp_compare_code_to_integer (code)
8001 /* Split comparison code CODE into comparisons we can do using branch
8002 instructions. BYPASS_CODE is comparison code for branch that will
8003 branch around FIRST_CODE and SECOND_CODE. If some of branches
8004 is not required, set value to NIL.
8005 We never require more than two branches. */
8007 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8008 enum rtx_code code, *bypass_code, *first_code, *second_code;
8014 /* The fcomi comparison sets flags as follows:
8024 case GT: /* GTU - CF=0 & ZF=0 */
8025 case GE: /* GEU - CF=0 */
8026 case ORDERED: /* PF=0 */
8027 case UNORDERED: /* PF=1 */
8028 case UNEQ: /* EQ - ZF=1 */
8029 case UNLT: /* LTU - CF=1 */
8030 case UNLE: /* LEU - CF=1 | ZF=1 */
8031 case LTGT: /* EQ - ZF=0 */
8033 case LT: /* LTU - CF=1 - fails on unordered */
8035 *bypass_code = UNORDERED;
8037 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8039 *bypass_code = UNORDERED;
8041 case EQ: /* EQ - ZF=1 - fails on unordered */
8043 *bypass_code = UNORDERED;
8045 case NE: /* NE - ZF=0 - fails on unordered */
8047 *second_code = UNORDERED;
8049 case UNGE: /* GEU - CF=0 - fails on unordered */
8051 *second_code = UNORDERED;
8053 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8055 *second_code = UNORDERED;
8060 if (!TARGET_IEEE_FP)
8067 /* Return cost of comparison done fcom + arithmetics operations on AX.
8068 All following functions do use number of instructions as an cost metrics.
8069 In future this should be tweaked to compute bytes for optimize_size and
8070 take into account performance of various instructions on various CPUs. */
8072 ix86_fp_comparison_arithmetics_cost (code)
8075 if (!TARGET_IEEE_FP)
8077 /* The cost of code output by ix86_expand_fp_compare. */
8105 /* Return cost of comparison done using fcomi operation.
8106 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8108 ix86_fp_comparison_fcomi_cost (code)
8111 enum rtx_code bypass_code, first_code, second_code;
8112 /* Return arbitarily high cost when instruction is not supported - this
8113 prevents gcc from using it. */
8116 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8117 return (bypass_code != NIL || second_code != NIL) + 2;
8120 /* Return cost of comparison done using sahf operation.
8121 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8123 ix86_fp_comparison_sahf_cost (code)
8126 enum rtx_code bypass_code, first_code, second_code;
8127 /* Return arbitarily high cost when instruction is not preferred - this
8128 avoids gcc from using it. */
8129 if (!TARGET_USE_SAHF && !optimize_size)
8131 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8132 return (bypass_code != NIL || second_code != NIL) + 3;
8135 /* Compute cost of the comparison done using any method.
8136 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8138 ix86_fp_comparison_cost (code)
8141 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8144 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8145 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8147 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8148 if (min > sahf_cost)
8150 if (min > fcomi_cost)
8155 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8158 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8160 rtx op0, op1, scratch;
8164 enum machine_mode fpcmp_mode, intcmp_mode;
8166 int cost = ix86_fp_comparison_cost (code);
8167 enum rtx_code bypass_code, first_code, second_code;
8169 fpcmp_mode = ix86_fp_compare_mode (code);
8170 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8173 *second_test = NULL_RTX;
8175 *bypass_test = NULL_RTX;
8177 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8179 /* Do fcomi/sahf based test when profitable. */
8180 if ((bypass_code == NIL || bypass_test)
8181 && (second_code == NIL || second_test)
8182 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8186 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8187 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8193 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8194 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8196 scratch = gen_reg_rtx (HImode);
8197 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8198 emit_insn (gen_x86_sahf_1 (scratch));
8201 /* The FP codes work out to act like unsigned. */
8202 intcmp_mode = fpcmp_mode;
8204 if (bypass_code != NIL)
8205 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8206 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8208 if (second_code != NIL)
8209 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8210 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8215 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8216 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8217 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8219 scratch = gen_reg_rtx (HImode);
8220 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8222 /* In the unordered case, we have to check C2 for NaN's, which
8223 doesn't happen to work out to anything nice combination-wise.
8224 So do some bit twiddling on the value we've got in AH to come
8225 up with an appropriate set of condition codes. */
8227 intcmp_mode = CCNOmode;
8232 if (code == GT || !TARGET_IEEE_FP)
8234 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8239 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8240 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8241 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8242 intcmp_mode = CCmode;
8248 if (code == LT && TARGET_IEEE_FP)
8250 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8251 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8252 intcmp_mode = CCmode;
8257 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8263 if (code == GE || !TARGET_IEEE_FP)
8265 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8270 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8271 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8278 if (code == LE && TARGET_IEEE_FP)
8280 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8281 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8282 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8283 intcmp_mode = CCmode;
8288 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8294 if (code == EQ && TARGET_IEEE_FP)
8296 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8297 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8298 intcmp_mode = CCmode;
8303 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8310 if (code == NE && TARGET_IEEE_FP)
8312 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8313 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8319 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8325 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8329 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8338 /* Return the test that should be put into the flags user, i.e.
8339 the bcc, scc, or cmov instruction. */
8340 return gen_rtx_fmt_ee (code, VOIDmode,
8341 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8346 ix86_expand_compare (code, second_test, bypass_test)
8348 rtx *second_test, *bypass_test;
8351 op0 = ix86_compare_op0;
8352 op1 = ix86_compare_op1;
8355 *second_test = NULL_RTX;
8357 *bypass_test = NULL_RTX;
8359 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8360 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8361 second_test, bypass_test);
8363 ret = ix86_expand_int_compare (code, op0, op1);
8368 /* Return true if the CODE will result in nontrivial jump sequence. */
8370 ix86_fp_jump_nontrivial_p (code)
8373 enum rtx_code bypass_code, first_code, second_code;
8376 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8377 return bypass_code != NIL || second_code != NIL;
8381 ix86_expand_branch (code, label)
8387 switch (GET_MODE (ix86_compare_op0))
8393 tmp = ix86_expand_compare (code, NULL, NULL);
8394 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8395 gen_rtx_LABEL_REF (VOIDmode, label),
8397 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8407 enum rtx_code bypass_code, first_code, second_code;
8409 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8412 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8414 /* Check whether we will use the natural sequence with one jump. If
8415 so, we can expand jump early. Otherwise delay expansion by
8416 creating compound insn to not confuse optimizers. */
8417 if (bypass_code == NIL && second_code == NIL
8420 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8421 gen_rtx_LABEL_REF (VOIDmode, label),
8426 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8427 ix86_compare_op0, ix86_compare_op1);
8428 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8429 gen_rtx_LABEL_REF (VOIDmode, label),
8431 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8433 use_fcomi = ix86_use_fcomi_compare (code);
8434 vec = rtvec_alloc (3 + !use_fcomi);
8435 RTVEC_ELT (vec, 0) = tmp;
8437 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8439 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8442 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8444 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8452 /* Expand DImode branch into multiple compare+branch. */
8454 rtx lo[2], hi[2], label2;
8455 enum rtx_code code1, code2, code3;
8457 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8459 tmp = ix86_compare_op0;
8460 ix86_compare_op0 = ix86_compare_op1;
8461 ix86_compare_op1 = tmp;
8462 code = swap_condition (code);
8464 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8465 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8467 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8468 avoid two branches. This costs one extra insn, so disable when
8469 optimizing for size. */
8471 if ((code == EQ || code == NE)
8473 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8478 if (hi[1] != const0_rtx)
8479 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8480 NULL_RTX, 0, OPTAB_WIDEN);
8483 if (lo[1] != const0_rtx)
8484 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8485 NULL_RTX, 0, OPTAB_WIDEN);
8487 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8488 NULL_RTX, 0, OPTAB_WIDEN);
8490 ix86_compare_op0 = tmp;
8491 ix86_compare_op1 = const0_rtx;
8492 ix86_expand_branch (code, label);
8496 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8497 op1 is a constant and the low word is zero, then we can just
8498 examine the high word. */
8500 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8503 case LT: case LTU: case GE: case GEU:
8504 ix86_compare_op0 = hi[0];
8505 ix86_compare_op1 = hi[1];
8506 ix86_expand_branch (code, label);
8512 /* Otherwise, we need two or three jumps. */
8514 label2 = gen_label_rtx ();
8517 code2 = swap_condition (code);
8518 code3 = unsigned_condition (code);
8522 case LT: case GT: case LTU: case GTU:
8525 case LE: code1 = LT; code2 = GT; break;
8526 case GE: code1 = GT; code2 = LT; break;
8527 case LEU: code1 = LTU; code2 = GTU; break;
8528 case GEU: code1 = GTU; code2 = LTU; break;
8530 case EQ: code1 = NIL; code2 = NE; break;
8531 case NE: code2 = NIL; break;
8539 * if (hi(a) < hi(b)) goto true;
8540 * if (hi(a) > hi(b)) goto false;
8541 * if (lo(a) < lo(b)) goto true;
8545 ix86_compare_op0 = hi[0];
8546 ix86_compare_op1 = hi[1];
8549 ix86_expand_branch (code1, label);
8551 ix86_expand_branch (code2, label2);
8553 ix86_compare_op0 = lo[0];
8554 ix86_compare_op1 = lo[1];
8555 ix86_expand_branch (code3, label);
8558 emit_label (label2);
8567 /* Split branch based on floating point condition. */
8569 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8571 rtx op1, op2, target1, target2, tmp;
8574 rtx label = NULL_RTX;
8576 int bypass_probability = -1, second_probability = -1, probability = -1;
8579 if (target2 != pc_rtx)
8582 code = reverse_condition_maybe_unordered (code);
8587 condition = ix86_expand_fp_compare (code, op1, op2,
8588 tmp, &second, &bypass);
8590 if (split_branch_probability >= 0)
8592 /* Distribute the probabilities across the jumps.
8593 Assume the BYPASS and SECOND to be always test
8595 probability = split_branch_probability;
8597 /* Value of 1 is low enough to make no need for probability
8598 to be updated. Later we may run some experiments and see
8599 if unordered values are more frequent in practice. */
8601 bypass_probability = 1;
8603 second_probability = 1;
8605 if (bypass != NULL_RTX)
8607 label = gen_label_rtx ();
8608 i = emit_jump_insn (gen_rtx_SET
8610 gen_rtx_IF_THEN_ELSE (VOIDmode,
8612 gen_rtx_LABEL_REF (VOIDmode,
8615 if (bypass_probability >= 0)
8617 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8618 GEN_INT (bypass_probability),
8621 i = emit_jump_insn (gen_rtx_SET
8623 gen_rtx_IF_THEN_ELSE (VOIDmode,
8624 condition, target1, target2)));
8625 if (probability >= 0)
8627 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8628 GEN_INT (probability),
8630 if (second != NULL_RTX)
8632 i = emit_jump_insn (gen_rtx_SET
8634 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8636 if (second_probability >= 0)
8638 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8639 GEN_INT (second_probability),
8642 if (label != NULL_RTX)
8647 ix86_expand_setcc (code, dest)
8651 rtx ret, tmp, tmpreg;
8652 rtx second_test, bypass_test;
8654 if (GET_MODE (ix86_compare_op0) == DImode
8656 return 0; /* FAIL */
8658 if (GET_MODE (dest) != QImode)
8661 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8662 PUT_MODE (ret, QImode);
8667 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8668 if (bypass_test || second_test)
8670 rtx test = second_test;
8672 rtx tmp2 = gen_reg_rtx (QImode);
8679 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8681 PUT_MODE (test, QImode);
8682 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8685 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8687 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8690 return 1; /* DONE */
8694 ix86_expand_int_movcc (operands)
8697 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8698 rtx compare_seq, compare_op;
8699 rtx second_test, bypass_test;
8700 enum machine_mode mode = GET_MODE (operands[0]);
8702 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8703 In case comparsion is done with immediate, we can convert it to LTU or
8704 GEU by altering the integer. */
8706 if ((code == LEU || code == GTU)
8707 && GET_CODE (ix86_compare_op1) == CONST_INT
8709 && INTVAL (ix86_compare_op1) != -1
8710 /* For x86-64, the immediate field in the instruction is 32-bit
8711 signed, so we can't increment a DImode value above 0x7fffffff. */
8713 || GET_MODE (ix86_compare_op0) != DImode
8714 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8715 && GET_CODE (operands[2]) == CONST_INT
8716 && GET_CODE (operands[3]) == CONST_INT)
8722 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8723 GET_MODE (ix86_compare_op0));
8727 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8728 compare_seq = get_insns ();
8731 compare_code = GET_CODE (compare_op);
8733 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8734 HImode insns, we'd be swallowed in word prefix ops. */
8737 && (mode != DImode || TARGET_64BIT)
8738 && GET_CODE (operands[2]) == CONST_INT
8739 && GET_CODE (operands[3]) == CONST_INT)
8741 rtx out = operands[0];
8742 HOST_WIDE_INT ct = INTVAL (operands[2]);
8743 HOST_WIDE_INT cf = INTVAL (operands[3]);
8746 if ((compare_code == LTU || compare_code == GEU)
8747 && !second_test && !bypass_test)
8749 /* Detect overlap between destination and compare sources. */
8752 /* To simplify rest of code, restrict to the GEU case. */
8753 if (compare_code == LTU)
8758 compare_code = reverse_condition (compare_code);
8759 code = reverse_condition (code);
8763 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8764 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8765 tmp = gen_reg_rtx (mode);
8767 emit_insn (compare_seq);
8769 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8771 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8783 tmp = expand_simple_binop (mode, PLUS,
8785 tmp, 1, OPTAB_DIRECT);
8796 tmp = expand_simple_binop (mode, IOR,
8798 tmp, 1, OPTAB_DIRECT);
8800 else if (diff == -1 && ct)
8810 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8812 tmp = expand_simple_binop (mode, PLUS,
8814 tmp, 1, OPTAB_DIRECT);
8822 * andl cf - ct, dest
8832 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8835 tmp = expand_simple_binop (mode, AND,
8837 gen_int_mode (cf - ct, mode),
8838 tmp, 1, OPTAB_DIRECT);
8840 tmp = expand_simple_binop (mode, PLUS,
8842 tmp, 1, OPTAB_DIRECT);
8846 emit_move_insn (out, tmp);
8848 return 1; /* DONE */
8855 tmp = ct, ct = cf, cf = tmp;
8857 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8859 /* We may be reversing unordered compare to normal compare, that
8860 is not valid in general (we may convert non-trapping condition
8861 to trapping one), however on i386 we currently emit all
8862 comparisons unordered. */
8863 compare_code = reverse_condition_maybe_unordered (compare_code);
8864 code = reverse_condition_maybe_unordered (code);
8868 compare_code = reverse_condition (compare_code);
8869 code = reverse_condition (code);
8874 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8875 && GET_CODE (ix86_compare_op1) == CONST_INT)
8877 if (ix86_compare_op1 == const0_rtx
8878 && (code == LT || code == GE))
8879 compare_code = code;
8880 else if (ix86_compare_op1 == constm1_rtx)
8884 else if (code == GT)
8889 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8890 if (compare_code != NIL
8891 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8892 && (cf == -1 || ct == -1))
8894 /* If lea code below could be used, only optimize
8895 if it results in a 2 insn sequence. */
8897 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8898 || diff == 3 || diff == 5 || diff == 9)
8899 || (compare_code == LT && ct == -1)
8900 || (compare_code == GE && cf == -1))
8903 * notl op1 (if necessary)
8911 code = reverse_condition (code);
8914 out = emit_store_flag (out, code, ix86_compare_op0,
8915 ix86_compare_op1, VOIDmode, 0, -1);
8917 out = expand_simple_binop (mode, IOR,
8919 out, 1, OPTAB_DIRECT);
8920 if (out != operands[0])
8921 emit_move_insn (operands[0], out);
8923 return 1; /* DONE */
8927 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8928 || diff == 3 || diff == 5 || diff == 9)
8929 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8935 * lea cf(dest*(ct-cf)),dest
8939 * This also catches the degenerate setcc-only case.
8945 out = emit_store_flag (out, code, ix86_compare_op0,
8946 ix86_compare_op1, VOIDmode, 0, 1);
8949 /* On x86_64 the lea instruction operates on Pmode, so we need
8950 to get arithmetics done in proper mode to match. */
8957 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8961 tmp = gen_rtx_PLUS (mode, tmp, out1);
8967 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8971 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8977 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8978 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8980 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8981 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8985 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8987 if (out != operands[0])
8988 emit_move_insn (operands[0], out);
8990 return 1; /* DONE */
8994 * General case: Jumpful:
8995 * xorl dest,dest cmpl op1, op2
8996 * cmpl op1, op2 movl ct, dest
8998 * decl dest movl cf, dest
8999 * andl (cf-ct),dest 1:
9004 * This is reasonably steep, but branch mispredict costs are
9005 * high on modern cpus, so consider failing only if optimizing
9008 * %%% Parameterize branch_cost on the tuning architecture, then
9009 * use that. The 80386 couldn't care less about mispredicts.
9012 if (!optimize_size && !TARGET_CMOVE)
9018 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9019 /* We may be reversing unordered compare to normal compare,
9020 that is not valid in general (we may convert non-trapping
9021 condition to trapping one), however on i386 we currently
9022 emit all comparisons unordered. */
9023 code = reverse_condition_maybe_unordered (code);
9026 code = reverse_condition (code);
9027 if (compare_code != NIL)
9028 compare_code = reverse_condition (compare_code);
9032 if (compare_code != NIL)
9034 /* notl op1 (if needed)
9039 For x < 0 (resp. x <= -1) there will be no notl,
9040 so if possible swap the constants to get rid of the
9042 True/false will be -1/0 while code below (store flag
9043 followed by decrement) is 0/-1, so the constants need
9044 to be exchanged once more. */
9046 if (compare_code == GE || !cf)
9048 code = reverse_condition (code);
9053 HOST_WIDE_INT tmp = cf;
9058 out = emit_store_flag (out, code, ix86_compare_op0,
9059 ix86_compare_op1, VOIDmode, 0, -1);
9063 out = emit_store_flag (out, code, ix86_compare_op0,
9064 ix86_compare_op1, VOIDmode, 0, 1);
9066 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9067 out, 1, OPTAB_DIRECT);
9070 out = expand_simple_binop (mode, AND, out,
9071 gen_int_mode (cf - ct, mode),
9072 out, 1, OPTAB_DIRECT);
9074 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9075 out, 1, OPTAB_DIRECT);
9076 if (out != operands[0])
9077 emit_move_insn (operands[0], out);
9079 return 1; /* DONE */
9085 /* Try a few things more with specific constants and a variable. */
9088 rtx var, orig_out, out, tmp;
9091 return 0; /* FAIL */
9093 /* If one of the two operands is an interesting constant, load a
9094 constant with the above and mask it in with a logical operation. */
9096 if (GET_CODE (operands[2]) == CONST_INT)
9099 if (INTVAL (operands[2]) == 0)
9100 operands[3] = constm1_rtx, op = and_optab;
9101 else if (INTVAL (operands[2]) == -1)
9102 operands[3] = const0_rtx, op = ior_optab;
9104 return 0; /* FAIL */
9106 else if (GET_CODE (operands[3]) == CONST_INT)
9109 if (INTVAL (operands[3]) == 0)
9110 operands[2] = constm1_rtx, op = and_optab;
9111 else if (INTVAL (operands[3]) == -1)
9112 operands[2] = const0_rtx, op = ior_optab;
9114 return 0; /* FAIL */
9117 return 0; /* FAIL */
9119 orig_out = operands[0];
9120 tmp = gen_reg_rtx (mode);
9123 /* Recurse to get the constant loaded. */
9124 if (ix86_expand_int_movcc (operands) == 0)
9125 return 0; /* FAIL */
9127 /* Mask in the interesting variable. */
9128 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9130 if (out != orig_out)
9131 emit_move_insn (orig_out, out);
9133 return 1; /* DONE */
9137 * For comparison with above,
9147 if (! nonimmediate_operand (operands[2], mode))
9148 operands[2] = force_reg (mode, operands[2]);
9149 if (! nonimmediate_operand (operands[3], mode))
9150 operands[3] = force_reg (mode, operands[3]);
9152 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9154 rtx tmp = gen_reg_rtx (mode);
9155 emit_move_insn (tmp, operands[3]);
9158 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9160 rtx tmp = gen_reg_rtx (mode);
9161 emit_move_insn (tmp, operands[2]);
9164 if (! register_operand (operands[2], VOIDmode)
9165 && ! register_operand (operands[3], VOIDmode))
9166 operands[2] = force_reg (mode, operands[2]);
9168 emit_insn (compare_seq);
9169 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9170 gen_rtx_IF_THEN_ELSE (mode,
9171 compare_op, operands[2],
9174 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9175 gen_rtx_IF_THEN_ELSE (mode,
9180 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9181 gen_rtx_IF_THEN_ELSE (mode,
9186 return 1; /* DONE */
9190 ix86_expand_fp_movcc (operands)
9195 rtx compare_op, second_test, bypass_test;
9197 /* For SF/DFmode conditional moves based on comparisons
9198 in same mode, we may want to use SSE min/max instructions. */
9199 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9200 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9201 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9202 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9204 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9205 /* We may be called from the post-reload splitter. */
9206 && (!REG_P (operands[0])
9207 || SSE_REG_P (operands[0])
9208 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9210 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9211 code = GET_CODE (operands[1]);
9213 /* See if we have (cross) match between comparison operands and
9214 conditional move operands. */
9215 if (rtx_equal_p (operands[2], op1))
9220 code = reverse_condition_maybe_unordered (code);
9222 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9224 /* Check for min operation. */
9227 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9228 if (memory_operand (op0, VOIDmode))
9229 op0 = force_reg (GET_MODE (operands[0]), op0);
9230 if (GET_MODE (operands[0]) == SFmode)
9231 emit_insn (gen_minsf3 (operands[0], op0, op1));
9233 emit_insn (gen_mindf3 (operands[0], op0, op1));
9236 /* Check for max operation. */
9239 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9240 if (memory_operand (op0, VOIDmode))
9241 op0 = force_reg (GET_MODE (operands[0]), op0);
9242 if (GET_MODE (operands[0]) == SFmode)
9243 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9245 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9249 /* Manage condition to be sse_comparison_operator. In case we are
9250 in non-ieee mode, try to canonicalize the destination operand
9251 to be first in the comparison - this helps reload to avoid extra
9253 if (!sse_comparison_operator (operands[1], VOIDmode)
9254 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9256 rtx tmp = ix86_compare_op0;
9257 ix86_compare_op0 = ix86_compare_op1;
9258 ix86_compare_op1 = tmp;
9259 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9260 VOIDmode, ix86_compare_op0,
9263 /* Similary try to manage result to be first operand of conditional
9264 move. We also don't support the NE comparison on SSE, so try to
9266 if ((rtx_equal_p (operands[0], operands[3])
9267 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9268 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9270 rtx tmp = operands[2];
9271 operands[2] = operands[3];
9273 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9274 (GET_CODE (operands[1])),
9275 VOIDmode, ix86_compare_op0,
9278 if (GET_MODE (operands[0]) == SFmode)
9279 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9280 operands[2], operands[3],
9281 ix86_compare_op0, ix86_compare_op1));
9283 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9284 operands[2], operands[3],
9285 ix86_compare_op0, ix86_compare_op1));
9289 /* The floating point conditional move instructions don't directly
9290 support conditions resulting from a signed integer comparison. */
9292 code = GET_CODE (operands[1]);
9293 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9295 /* The floating point conditional move instructions don't directly
9296 support signed integer comparisons. */
9298 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9300 if (second_test != NULL || bypass_test != NULL)
9302 tmp = gen_reg_rtx (QImode);
9303 ix86_expand_setcc (code, tmp);
9305 ix86_compare_op0 = tmp;
9306 ix86_compare_op1 = const0_rtx;
9307 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9309 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9311 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9312 emit_move_insn (tmp, operands[3]);
9315 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9317 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9318 emit_move_insn (tmp, operands[2]);
9322 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9323 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9328 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9329 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9334 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9335 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9343 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9344 works for floating pointer parameters and nonoffsetable memories.
9345 For pushes, it returns just stack offsets; the values will be saved
9346 in the right order. Maximally three parts are generated. */
9349 ix86_split_to_parts (operand, parts, mode)
9352 enum machine_mode mode;
9357 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9359 size = (GET_MODE_SIZE (mode) + 4) / 8;
9361 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9363 if (size < 2 || size > 3)
9366 /* Optimize constant pool reference to immediates. This is used by fp
9367 moves, that force all constants to memory to allow combining. */
9368 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9370 rtx tmp = maybe_get_pool_constant (operand);
9375 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9377 /* The only non-offsetable memories we handle are pushes. */
9378 if (! push_operand (operand, VOIDmode))
9381 operand = copy_rtx (operand);
9382 PUT_MODE (operand, Pmode);
9383 parts[0] = parts[1] = parts[2] = operand;
9385 else if (!TARGET_64BIT)
9388 split_di (&operand, 1, &parts[0], &parts[1]);
9391 if (REG_P (operand))
9393 if (!reload_completed)
9395 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9396 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9398 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9400 else if (offsettable_memref_p (operand))
9402 operand = adjust_address (operand, SImode, 0);
9404 parts[1] = adjust_address (operand, SImode, 4);
9406 parts[2] = adjust_address (operand, SImode, 8);
9408 else if (GET_CODE (operand) == CONST_DOUBLE)
9413 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9418 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9419 parts[2] = gen_int_mode (l[2], SImode);
9422 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9427 parts[1] = gen_int_mode (l[1], SImode);
9428 parts[0] = gen_int_mode (l[0], SImode);
9437 split_ti (&operand, 1, &parts[0], &parts[1]);
9438 if (mode == XFmode || mode == TFmode)
9440 if (REG_P (operand))
9442 if (!reload_completed)
9444 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9445 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9447 else if (offsettable_memref_p (operand))
9449 operand = adjust_address (operand, DImode, 0);
9451 parts[1] = adjust_address (operand, SImode, 8);
9453 else if (GET_CODE (operand) == CONST_DOUBLE)
9458 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9459 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9460 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9461 if (HOST_BITS_PER_WIDE_INT >= 64)
9464 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9465 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9468 parts[0] = immed_double_const (l[0], l[1], DImode);
9469 parts[1] = gen_int_mode (l[2], SImode);
9479 /* Emit insns to perform a move or push of DI, DF, and XF values.
9480 Return false when normal moves are needed; true when all required
9481 insns have been emitted. Operands 2-4 contain the input values
9482 int the correct order; operands 5-7 contain the output values. */
9485 ix86_split_long_move (operands)
9492 enum machine_mode mode = GET_MODE (operands[0]);
9494 /* The DFmode expanders may ask us to move double.
9495 For 64bit target this is single move. By hiding the fact
9496 here we simplify i386.md splitters. */
9497 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9499 /* Optimize constant pool reference to immediates. This is used by
9500 fp moves, that force all constants to memory to allow combining. */
9502 if (GET_CODE (operands[1]) == MEM
9503 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9504 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9505 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9506 if (push_operand (operands[0], VOIDmode))
9508 operands[0] = copy_rtx (operands[0]);
9509 PUT_MODE (operands[0], Pmode);
9512 operands[0] = gen_lowpart (DImode, operands[0]);
9513 operands[1] = gen_lowpart (DImode, operands[1]);
9514 emit_move_insn (operands[0], operands[1]);
9518 /* The only non-offsettable memory we handle is push. */
9519 if (push_operand (operands[0], VOIDmode))
9521 else if (GET_CODE (operands[0]) == MEM
9522 && ! offsettable_memref_p (operands[0]))
9525 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9526 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9528 /* When emitting push, take care for source operands on the stack. */
9529 if (push && GET_CODE (operands[1]) == MEM
9530 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9533 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9534 XEXP (part[1][2], 0));
9535 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9536 XEXP (part[1][1], 0));
9539 /* We need to do copy in the right order in case an address register
9540 of the source overlaps the destination. */
9541 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9543 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9545 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9548 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9551 /* Collision in the middle part can be handled by reordering. */
9552 if (collisions == 1 && nparts == 3
9553 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9556 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9557 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9560 /* If there are more collisions, we can't handle it by reordering.
9561 Do an lea to the last part and use only one colliding move. */
9562 else if (collisions > 1)
9565 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9566 XEXP (part[1][0], 0)));
9567 part[1][0] = change_address (part[1][0],
9568 TARGET_64BIT ? DImode : SImode,
9569 part[0][nparts - 1]);
9570 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9572 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9582 /* We use only first 12 bytes of TFmode value, but for pushing we
9583 are required to adjust stack as if we were pushing real 16byte
9585 if (mode == TFmode && !TARGET_64BIT)
9586 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9588 emit_move_insn (part[0][2], part[1][2]);
9593 /* In 64bit mode we don't have 32bit push available. In case this is
9594 register, it is OK - we will just use larger counterpart. We also
9595 retype memory - these comes from attempt to avoid REX prefix on
9596 moving of second half of TFmode value. */
9597 if (GET_MODE (part[1][1]) == SImode)
9599 if (GET_CODE (part[1][1]) == MEM)
9600 part[1][1] = adjust_address (part[1][1], DImode, 0);
9601 else if (REG_P (part[1][1]))
9602 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9605 if (GET_MODE (part[1][0]) == SImode)
9606 part[1][0] = part[1][1];
9609 emit_move_insn (part[0][1], part[1][1]);
9610 emit_move_insn (part[0][0], part[1][0]);
9614 /* Choose correct order to not overwrite the source before it is copied. */
9615 if ((REG_P (part[0][0])
9616 && REG_P (part[1][1])
9617 && (REGNO (part[0][0]) == REGNO (part[1][1])
9619 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9621 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9625 operands[2] = part[0][2];
9626 operands[3] = part[0][1];
9627 operands[4] = part[0][0];
9628 operands[5] = part[1][2];
9629 operands[6] = part[1][1];
9630 operands[7] = part[1][0];
9634 operands[2] = part[0][1];
9635 operands[3] = part[0][0];
9636 operands[5] = part[1][1];
9637 operands[6] = part[1][0];
9644 operands[2] = part[0][0];
9645 operands[3] = part[0][1];
9646 operands[4] = part[0][2];
9647 operands[5] = part[1][0];
9648 operands[6] = part[1][1];
9649 operands[7] = part[1][2];
9653 operands[2] = part[0][0];
9654 operands[3] = part[0][1];
9655 operands[5] = part[1][0];
9656 operands[6] = part[1][1];
9659 emit_move_insn (operands[2], operands[5]);
9660 emit_move_insn (operands[3], operands[6]);
9662 emit_move_insn (operands[4], operands[7]);
9668 ix86_split_ashldi (operands, scratch)
9669 rtx *operands, scratch;
9671 rtx low[2], high[2];
9674 if (GET_CODE (operands[2]) == CONST_INT)
9676 split_di (operands, 2, low, high);
9677 count = INTVAL (operands[2]) & 63;
9681 emit_move_insn (high[0], low[1]);
9682 emit_move_insn (low[0], const0_rtx);
9685 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9689 if (!rtx_equal_p (operands[0], operands[1]))
9690 emit_move_insn (operands[0], operands[1]);
9691 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9692 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9697 if (!rtx_equal_p (operands[0], operands[1]))
9698 emit_move_insn (operands[0], operands[1]);
9700 split_di (operands, 1, low, high);
9702 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9703 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9705 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9707 if (! no_new_pseudos)
9708 scratch = force_reg (SImode, const0_rtx);
9710 emit_move_insn (scratch, const0_rtx);
9712 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9716 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9721 ix86_split_ashrdi (operands, scratch)
9722 rtx *operands, scratch;
9724 rtx low[2], high[2];
9727 if (GET_CODE (operands[2]) == CONST_INT)
9729 split_di (operands, 2, low, high);
9730 count = INTVAL (operands[2]) & 63;
9734 emit_move_insn (low[0], high[1]);
9736 if (! reload_completed)
9737 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9740 emit_move_insn (high[0], low[0]);
9741 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9745 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9749 if (!rtx_equal_p (operands[0], operands[1]))
9750 emit_move_insn (operands[0], operands[1]);
9751 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9752 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9757 if (!rtx_equal_p (operands[0], operands[1]))
9758 emit_move_insn (operands[0], operands[1]);
9760 split_di (operands, 1, low, high);
9762 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9763 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9765 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9767 if (! no_new_pseudos)
9768 scratch = gen_reg_rtx (SImode);
9769 emit_move_insn (scratch, high[0]);
9770 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9771 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9775 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9780 ix86_split_lshrdi (operands, scratch)
9781 rtx *operands, scratch;
9783 rtx low[2], high[2];
9786 if (GET_CODE (operands[2]) == CONST_INT)
9788 split_di (operands, 2, low, high);
9789 count = INTVAL (operands[2]) & 63;
9793 emit_move_insn (low[0], high[1]);
9794 emit_move_insn (high[0], const0_rtx);
9797 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9801 if (!rtx_equal_p (operands[0], operands[1]))
9802 emit_move_insn (operands[0], operands[1]);
9803 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9804 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9809 if (!rtx_equal_p (operands[0], operands[1]))
9810 emit_move_insn (operands[0], operands[1]);
9812 split_di (operands, 1, low, high);
9814 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9815 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9817 /* Heh. By reversing the arguments, we can reuse this pattern. */
9818 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9820 if (! no_new_pseudos)
9821 scratch = force_reg (SImode, const0_rtx);
9823 emit_move_insn (scratch, const0_rtx);
9825 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9829 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9833 /* Helper function for the string operations below. Dest VARIABLE whether
9834 it is aligned to VALUE bytes. If true, jump to the label. */
9836 ix86_expand_aligntest (variable, value)
9840 rtx label = gen_label_rtx ();
9841 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9842 if (GET_MODE (variable) == DImode)
9843 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9845 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9846 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9851 /* Adjust COUNTER by the VALUE. */
9853 ix86_adjust_counter (countreg, value)
9855 HOST_WIDE_INT value;
9857 if (GET_MODE (countreg) == DImode)
9858 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9860 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9863 /* Zero extend possibly SImode EXP to Pmode register. */
9865 ix86_zero_extend_to_Pmode (exp)
9869 if (GET_MODE (exp) == VOIDmode)
9870 return force_reg (Pmode, exp);
9871 if (GET_MODE (exp) == Pmode)
9872 return copy_to_mode_reg (Pmode, exp);
9873 r = gen_reg_rtx (Pmode);
9874 emit_insn (gen_zero_extendsidi2 (r, exp));
9878 /* Expand string move (memcpy) operation. Use i386 string operations when
9879 profitable. expand_clrstr contains similar code. */
9881 ix86_expand_movstr (dst, src, count_exp, align_exp)
9882 rtx dst, src, count_exp, align_exp;
9884 rtx srcreg, destreg, countreg;
9885 enum machine_mode counter_mode;
9886 HOST_WIDE_INT align = 0;
9887 unsigned HOST_WIDE_INT count = 0;
9892 if (GET_CODE (align_exp) == CONST_INT)
9893 align = INTVAL (align_exp);
9895 /* This simple hack avoids all inlining code and simplifies code below. */
9896 if (!TARGET_ALIGN_STRINGOPS)
9899 if (GET_CODE (count_exp) == CONST_INT)
9900 count = INTVAL (count_exp);
9902 /* Figure out proper mode for counter. For 32bits it is always SImode,
9903 for 64bits use SImode when possible, otherwise DImode.
9904 Set count to number of bytes copied when known at compile time. */
9905 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9906 || x86_64_zero_extended_value (count_exp))
9907 counter_mode = SImode;
9909 counter_mode = DImode;
9911 if (counter_mode != SImode && counter_mode != DImode)
9914 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9915 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9917 emit_insn (gen_cld ());
9919 /* When optimizing for size emit simple rep ; movsb instruction for
9920 counts not divisible by 4. */
9922 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9924 countreg = ix86_zero_extend_to_Pmode (count_exp);
9926 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9927 destreg, srcreg, countreg));
9929 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9930 destreg, srcreg, countreg));
9933 /* For constant aligned (or small unaligned) copies use rep movsl
9934 followed by code copying the rest. For PentiumPro ensure 8 byte
9935 alignment to allow rep movsl acceleration. */
9939 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9940 || optimize_size || count < (unsigned int) 64))
9942 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9943 if (count & ~(size - 1))
9945 countreg = copy_to_mode_reg (counter_mode,
9946 GEN_INT ((count >> (size == 4 ? 2 : 3))
9947 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9948 countreg = ix86_zero_extend_to_Pmode (countreg);
9952 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9953 destreg, srcreg, countreg));
9955 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9956 destreg, srcreg, countreg));
9959 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9960 destreg, srcreg, countreg));
9962 if (size == 8 && (count & 0x04))
9963 emit_insn (gen_strmovsi (destreg, srcreg));
9965 emit_insn (gen_strmovhi (destreg, srcreg));
9967 emit_insn (gen_strmovqi (destreg, srcreg));
9969 /* The generic code based on the glibc implementation:
9970 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9971 allowing accelerated copying there)
9972 - copy the data using rep movsl
9978 int desired_alignment = (TARGET_PENTIUMPRO
9979 && (count == 0 || count >= (unsigned int) 260)
9980 ? 8 : UNITS_PER_WORD);
9982 /* In case we don't know anything about the alignment, default to
9983 library version, since it is usually equally fast and result in
9985 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9991 if (TARGET_SINGLE_STRINGOP)
9992 emit_insn (gen_cld ());
9994 countreg2 = gen_reg_rtx (Pmode);
9995 countreg = copy_to_mode_reg (counter_mode, count_exp);
9997 /* We don't use loops to align destination and to copy parts smaller
9998 than 4 bytes, because gcc is able to optimize such code better (in
9999 the case the destination or the count really is aligned, gcc is often
10000 able to predict the branches) and also it is friendlier to the
10001 hardware branch prediction.
10003 Using loops is benefical for generic case, because we can
10004 handle small counts using the loops. Many CPUs (such as Athlon)
10005 have large REP prefix setup costs.
10007 This is quite costy. Maybe we can revisit this decision later or
10008 add some customizability to this code. */
10010 if (count == 0 && align < desired_alignment)
10012 label = gen_label_rtx ();
10013 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10014 LEU, 0, counter_mode, 1, label);
10018 rtx label = ix86_expand_aligntest (destreg, 1);
10019 emit_insn (gen_strmovqi (destreg, srcreg));
10020 ix86_adjust_counter (countreg, 1);
10021 emit_label (label);
10022 LABEL_NUSES (label) = 1;
10026 rtx label = ix86_expand_aligntest (destreg, 2);
10027 emit_insn (gen_strmovhi (destreg, srcreg));
10028 ix86_adjust_counter (countreg, 2);
10029 emit_label (label);
10030 LABEL_NUSES (label) = 1;
10032 if (align <= 4 && desired_alignment > 4)
10034 rtx label = ix86_expand_aligntest (destreg, 4);
10035 emit_insn (gen_strmovsi (destreg, srcreg));
10036 ix86_adjust_counter (countreg, 4);
10037 emit_label (label);
10038 LABEL_NUSES (label) = 1;
10041 if (label && desired_alignment > 4 && !TARGET_64BIT)
10043 emit_label (label);
10044 LABEL_NUSES (label) = 1;
10047 if (!TARGET_SINGLE_STRINGOP)
10048 emit_insn (gen_cld ());
10051 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10053 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10054 destreg, srcreg, countreg2));
10058 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10059 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10060 destreg, srcreg, countreg2));
10065 emit_label (label);
10066 LABEL_NUSES (label) = 1;
10068 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10069 emit_insn (gen_strmovsi (destreg, srcreg));
10070 if ((align <= 4 || count == 0) && TARGET_64BIT)
10072 rtx label = ix86_expand_aligntest (countreg, 4);
10073 emit_insn (gen_strmovsi (destreg, srcreg));
10074 emit_label (label);
10075 LABEL_NUSES (label) = 1;
10077 if (align > 2 && count != 0 && (count & 2))
10078 emit_insn (gen_strmovhi (destreg, srcreg));
10079 if (align <= 2 || count == 0)
10081 rtx label = ix86_expand_aligntest (countreg, 2);
10082 emit_insn (gen_strmovhi (destreg, srcreg));
10083 emit_label (label);
10084 LABEL_NUSES (label) = 1;
10086 if (align > 1 && count != 0 && (count & 1))
10087 emit_insn (gen_strmovqi (destreg, srcreg));
10088 if (align <= 1 || count == 0)
10090 rtx label = ix86_expand_aligntest (countreg, 1);
10091 emit_insn (gen_strmovqi (destreg, srcreg));
10092 emit_label (label);
10093 LABEL_NUSES (label) = 1;
10097 insns = get_insns ();
10100 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10105 /* Expand string clear operation (bzero). Use i386 string operations when
10106 profitable. expand_movstr contains similar code. */
10108 ix86_expand_clrstr (src, count_exp, align_exp)
10109 rtx src, count_exp, align_exp;
10111 rtx destreg, zeroreg, countreg;
10112 enum machine_mode counter_mode;
10113 HOST_WIDE_INT align = 0;
10114 unsigned HOST_WIDE_INT count = 0;
10116 if (GET_CODE (align_exp) == CONST_INT)
10117 align = INTVAL (align_exp);
10119 /* This simple hack avoids all inlining code and simplifies code below. */
10120 if (!TARGET_ALIGN_STRINGOPS)
10123 if (GET_CODE (count_exp) == CONST_INT)
10124 count = INTVAL (count_exp);
10125 /* Figure out proper mode for counter. For 32bits it is always SImode,
10126 for 64bits use SImode when possible, otherwise DImode.
10127 Set count to number of bytes copied when known at compile time. */
10128 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10129 || x86_64_zero_extended_value (count_exp))
10130 counter_mode = SImode;
10132 counter_mode = DImode;
10134 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10136 emit_insn (gen_cld ());
10138 /* When optimizing for size emit simple rep ; movsb instruction for
10139 counts not divisible by 4. */
10141 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10143 countreg = ix86_zero_extend_to_Pmode (count_exp);
10144 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10146 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10147 destreg, countreg));
10149 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10150 destreg, countreg));
10152 else if (count != 0
10154 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10155 || optimize_size || count < (unsigned int) 64))
10157 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10158 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10159 if (count & ~(size - 1))
10161 countreg = copy_to_mode_reg (counter_mode,
10162 GEN_INT ((count >> (size == 4 ? 2 : 3))
10163 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10164 countreg = ix86_zero_extend_to_Pmode (countreg);
10168 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10169 destreg, countreg));
10171 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10172 destreg, countreg));
10175 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10176 destreg, countreg));
10178 if (size == 8 && (count & 0x04))
10179 emit_insn (gen_strsetsi (destreg,
10180 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10182 emit_insn (gen_strsethi (destreg,
10183 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10185 emit_insn (gen_strsetqi (destreg,
10186 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10192 /* Compute desired alignment of the string operation. */
10193 int desired_alignment = (TARGET_PENTIUMPRO
10194 && (count == 0 || count >= (unsigned int) 260)
10195 ? 8 : UNITS_PER_WORD);
10197 /* In case we don't know anything about the alignment, default to
10198 library version, since it is usually equally fast and result in
10200 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10203 if (TARGET_SINGLE_STRINGOP)
10204 emit_insn (gen_cld ());
10206 countreg2 = gen_reg_rtx (Pmode);
10207 countreg = copy_to_mode_reg (counter_mode, count_exp);
10208 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10210 if (count == 0 && align < desired_alignment)
10212 label = gen_label_rtx ();
10213 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10214 LEU, 0, counter_mode, 1, label);
10218 rtx label = ix86_expand_aligntest (destreg, 1);
10219 emit_insn (gen_strsetqi (destreg,
10220 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10221 ix86_adjust_counter (countreg, 1);
10222 emit_label (label);
10223 LABEL_NUSES (label) = 1;
10227 rtx label = ix86_expand_aligntest (destreg, 2);
10228 emit_insn (gen_strsethi (destreg,
10229 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10230 ix86_adjust_counter (countreg, 2);
10231 emit_label (label);
10232 LABEL_NUSES (label) = 1;
10234 if (align <= 4 && desired_alignment > 4)
10236 rtx label = ix86_expand_aligntest (destreg, 4);
10237 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10238 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10240 ix86_adjust_counter (countreg, 4);
10241 emit_label (label);
10242 LABEL_NUSES (label) = 1;
10245 if (label && desired_alignment > 4 && !TARGET_64BIT)
10247 emit_label (label);
10248 LABEL_NUSES (label) = 1;
10252 if (!TARGET_SINGLE_STRINGOP)
10253 emit_insn (gen_cld ());
10256 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10258 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10259 destreg, countreg2));
10263 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10264 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10265 destreg, countreg2));
10269 emit_label (label);
10270 LABEL_NUSES (label) = 1;
10273 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10274 emit_insn (gen_strsetsi (destreg,
10275 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10276 if (TARGET_64BIT && (align <= 4 || count == 0))
10278 rtx label = ix86_expand_aligntest (countreg, 4);
10279 emit_insn (gen_strsetsi (destreg,
10280 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10281 emit_label (label);
10282 LABEL_NUSES (label) = 1;
10284 if (align > 2 && count != 0 && (count & 2))
10285 emit_insn (gen_strsethi (destreg,
10286 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10287 if (align <= 2 || count == 0)
10289 rtx label = ix86_expand_aligntest (countreg, 2);
10290 emit_insn (gen_strsethi (destreg,
10291 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10292 emit_label (label);
10293 LABEL_NUSES (label) = 1;
10295 if (align > 1 && count != 0 && (count & 1))
10296 emit_insn (gen_strsetqi (destreg,
10297 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10298 if (align <= 1 || count == 0)
10300 rtx label = ix86_expand_aligntest (countreg, 1);
10301 emit_insn (gen_strsetqi (destreg,
10302 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10303 emit_label (label);
10304 LABEL_NUSES (label) = 1;
10309 /* Expand strlen. */
10311 ix86_expand_strlen (out, src, eoschar, align)
10312 rtx out, src, eoschar, align;
10314 rtx addr, scratch1, scratch2, scratch3, scratch4;
10316 /* The generic case of strlen expander is long. Avoid it's
10317 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10319 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10320 && !TARGET_INLINE_ALL_STRINGOPS
10322 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10325 addr = force_reg (Pmode, XEXP (src, 0));
10326 scratch1 = gen_reg_rtx (Pmode);
10328 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10331 /* Well it seems that some optimizer does not combine a call like
10332 foo(strlen(bar), strlen(bar));
10333 when the move and the subtraction is done here. It does calculate
10334 the length just once when these instructions are done inside of
10335 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10336 often used and I use one fewer register for the lifetime of
10337 output_strlen_unroll() this is better. */
10339 emit_move_insn (out, addr);
10341 ix86_expand_strlensi_unroll_1 (out, align);
10343 /* strlensi_unroll_1 returns the address of the zero at the end of
10344 the string, like memchr(), so compute the length by subtracting
10345 the start address. */
10347 emit_insn (gen_subdi3 (out, out, addr));
10349 emit_insn (gen_subsi3 (out, out, addr));
10353 scratch2 = gen_reg_rtx (Pmode);
10354 scratch3 = gen_reg_rtx (Pmode);
10355 scratch4 = force_reg (Pmode, constm1_rtx);
10357 emit_move_insn (scratch3, addr);
10358 eoschar = force_reg (QImode, eoschar);
10360 emit_insn (gen_cld ());
10363 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10364 align, scratch4, scratch3));
10365 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10366 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10370 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10371 align, scratch4, scratch3));
10372 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10373 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10379 /* Expand the appropriate insns for doing strlen if not just doing
10382 out = result, initialized with the start address
10383 align_rtx = alignment of the address.
10384 scratch = scratch register, initialized with the startaddress when
10385 not aligned, otherwise undefined
10387 This is just the body. It needs the initialisations mentioned above and
10388 some address computing at the end. These things are done in i386.md. */
10391 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10392 rtx out, align_rtx;
10396 rtx align_2_label = NULL_RTX;
10397 rtx align_3_label = NULL_RTX;
10398 rtx align_4_label = gen_label_rtx ();
10399 rtx end_0_label = gen_label_rtx ();
10401 rtx tmpreg = gen_reg_rtx (SImode);
10402 rtx scratch = gen_reg_rtx (SImode);
10405 if (GET_CODE (align_rtx) == CONST_INT)
10406 align = INTVAL (align_rtx);
10408 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10410 /* Is there a known alignment and is it less than 4? */
10413 rtx scratch1 = gen_reg_rtx (Pmode);
10414 emit_move_insn (scratch1, out);
10415 /* Is there a known alignment and is it not 2? */
10418 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10419 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10421 /* Leave just the 3 lower bits. */
10422 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10423 NULL_RTX, 0, OPTAB_WIDEN);
10425 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10426 Pmode, 1, align_4_label);
10427 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10428 Pmode, 1, align_2_label);
10429 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10430 Pmode, 1, align_3_label);
10434 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10435 check if is aligned to 4 - byte. */
10437 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10438 NULL_RTX, 0, OPTAB_WIDEN);
10440 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10441 Pmode, 1, align_4_label);
10444 mem = gen_rtx_MEM (QImode, out);
10446 /* Now compare the bytes. */
10448 /* Compare the first n unaligned byte on a byte per byte basis. */
10449 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10450 QImode, 1, end_0_label);
10452 /* Increment the address. */
10454 emit_insn (gen_adddi3 (out, out, const1_rtx));
10456 emit_insn (gen_addsi3 (out, out, const1_rtx));
10458 /* Not needed with an alignment of 2 */
10461 emit_label (align_2_label);
10463 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10467 emit_insn (gen_adddi3 (out, out, const1_rtx));
10469 emit_insn (gen_addsi3 (out, out, const1_rtx));
10471 emit_label (align_3_label);
10474 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10478 emit_insn (gen_adddi3 (out, out, const1_rtx));
10480 emit_insn (gen_addsi3 (out, out, const1_rtx));
10483 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10484 align this loop. It gives only huge programs, but does not help to
10486 emit_label (align_4_label);
10488 mem = gen_rtx_MEM (SImode, out);
10489 emit_move_insn (scratch, mem);
10491 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10493 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10495 /* This formula yields a nonzero result iff one of the bytes is zero.
10496 This saves three branches inside loop and many cycles. */
10498 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10499 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10500 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10501 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10502 gen_int_mode (0x80808080, SImode)));
10503 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10508 rtx reg = gen_reg_rtx (SImode);
10509 rtx reg2 = gen_reg_rtx (Pmode);
10510 emit_move_insn (reg, tmpreg);
10511 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10513 /* If zero is not in the first two bytes, move two bytes forward. */
10514 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10515 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10516 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10517 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10518 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10521 /* Emit lea manually to avoid clobbering of flags. */
10522 emit_insn (gen_rtx_SET (SImode, reg2,
10523 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10525 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10526 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10527 emit_insn (gen_rtx_SET (VOIDmode, out,
10528 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10535 rtx end_2_label = gen_label_rtx ();
10536 /* Is zero in the first two bytes? */
10538 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10539 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10540 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10541 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10542 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10544 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10545 JUMP_LABEL (tmp) = end_2_label;
10547 /* Not in the first two. Move two bytes forward. */
10548 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10550 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10552 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10554 emit_label (end_2_label);
10558 /* Avoid branch in fixing the byte. */
10559 tmpreg = gen_lowpart (QImode, tmpreg);
10560 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10562 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10564 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10566 emit_label (end_0_label);
10570 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10571 rtx retval, fnaddr, callarg1, callarg2, pop;
10573 rtx use = NULL, call;
10575 if (pop == const0_rtx)
10577 if (TARGET_64BIT && pop)
10581 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10582 fnaddr = machopic_indirect_call_target (fnaddr);
10584 /* Static functions and indirect calls don't need the pic register. */
10585 if (! TARGET_64BIT && flag_pic
10586 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10587 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10588 use_reg (&use, pic_offset_table_rtx);
10590 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10592 rtx al = gen_rtx_REG (QImode, 0);
10593 emit_move_insn (al, callarg2);
10594 use_reg (&use, al);
10596 #endif /* TARGET_MACHO */
10598 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10600 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10601 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10604 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10606 call = gen_rtx_SET (VOIDmode, retval, call);
10609 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10610 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10611 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10614 call = emit_call_insn (call);
10616 CALL_INSN_FUNCTION_USAGE (call) = use;
10620 /* Clear stack slot assignments remembered from previous functions.
10621 This is called from INIT_EXPANDERS once before RTL is emitted for each
10624 static struct machine_function *
10625 ix86_init_machine_status ()
10627 return ggc_alloc_cleared (sizeof (struct machine_function));
10630 /* Return a MEM corresponding to a stack slot with mode MODE.
10631 Allocate a new slot if necessary.
10633 The RTL for a function can have several slots available: N is
10634 which slot to use. */
10637 assign_386_stack_local (mode, n)
10638 enum machine_mode mode;
10641 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10644 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10645 ix86_stack_locals[(int) mode][n]
10646 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10648 return ix86_stack_locals[(int) mode][n];
10651 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10653 static GTY(()) rtx ix86_tls_symbol;
10655 ix86_tls_get_addr ()
10658 if (!ix86_tls_symbol)
10660 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10661 ? "___tls_get_addr"
10662 : "__tls_get_addr"));
10665 return ix86_tls_symbol;
10668 /* Calculate the length of the memory address in the instruction
10669 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10672 memory_address_length (addr)
10675 struct ix86_address parts;
10676 rtx base, index, disp;
10679 if (GET_CODE (addr) == PRE_DEC
10680 || GET_CODE (addr) == POST_INC
10681 || GET_CODE (addr) == PRE_MODIFY
10682 || GET_CODE (addr) == POST_MODIFY)
10685 if (! ix86_decompose_address (addr, &parts))
10689 index = parts.index;
10693 /* Register Indirect. */
10694 if (base && !index && !disp)
10696 /* Special cases: ebp and esp need the two-byte modrm form. */
10697 if (addr == stack_pointer_rtx
10698 || addr == arg_pointer_rtx
10699 || addr == frame_pointer_rtx
10700 || addr == hard_frame_pointer_rtx)
10704 /* Direct Addressing. */
10705 else if (disp && !base && !index)
10710 /* Find the length of the displacement constant. */
10713 if (GET_CODE (disp) == CONST_INT
10714 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10720 /* An index requires the two-byte modrm form. */
10728 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10729 is set, expect that insn have 8bit immediate alternative. */
10731 ix86_attr_length_immediate_default (insn, shortform)
10737 extract_insn_cached (insn);
10738 for (i = recog_data.n_operands - 1; i >= 0; --i)
10739 if (CONSTANT_P (recog_data.operand[i]))
10744 && GET_CODE (recog_data.operand[i]) == CONST_INT
10745 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10749 switch (get_attr_mode (insn))
10760 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10765 fatal_insn ("unknown insn mode", insn);
10771 /* Compute default value for "length_address" attribute. */
10773 ix86_attr_length_address_default (insn)
10777 extract_insn_cached (insn);
10778 for (i = recog_data.n_operands - 1; i >= 0; --i)
10779 if (GET_CODE (recog_data.operand[i]) == MEM)
10781 return memory_address_length (XEXP (recog_data.operand[i], 0));
10787 /* Return the maximum number of instructions a cpu can issue. */
10794 case PROCESSOR_PENTIUM:
10798 case PROCESSOR_PENTIUMPRO:
10799 case PROCESSOR_PENTIUM4:
10800 case PROCESSOR_ATHLON:
10808 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10809 by DEP_INSN and nothing set by DEP_INSN. */
10812 ix86_flags_dependant (insn, dep_insn, insn_type)
10813 rtx insn, dep_insn;
10814 enum attr_type insn_type;
10818 /* Simplify the test for uninteresting insns. */
10819 if (insn_type != TYPE_SETCC
10820 && insn_type != TYPE_ICMOV
10821 && insn_type != TYPE_FCMOV
10822 && insn_type != TYPE_IBR)
10825 if ((set = single_set (dep_insn)) != 0)
10827 set = SET_DEST (set);
10830 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10831 && XVECLEN (PATTERN (dep_insn), 0) == 2
10832 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10833 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10835 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10836 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10841 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10844 /* This test is true if the dependent insn reads the flags but
10845 not any other potentially set register. */
10846 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10849 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10855 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10856 address with operands set by DEP_INSN. */
10859 ix86_agi_dependant (insn, dep_insn, insn_type)
10860 rtx insn, dep_insn;
10861 enum attr_type insn_type;
10865 if (insn_type == TYPE_LEA
10868 addr = PATTERN (insn);
10869 if (GET_CODE (addr) == SET)
10871 else if (GET_CODE (addr) == PARALLEL
10872 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10873 addr = XVECEXP (addr, 0, 0);
10876 addr = SET_SRC (addr);
10881 extract_insn_cached (insn);
10882 for (i = recog_data.n_operands - 1; i >= 0; --i)
10883 if (GET_CODE (recog_data.operand[i]) == MEM)
10885 addr = XEXP (recog_data.operand[i], 0);
10892 return modified_in_p (addr, dep_insn);
10896 ix86_adjust_cost (insn, link, dep_insn, cost)
10897 rtx insn, link, dep_insn;
10900 enum attr_type insn_type, dep_insn_type;
10901 enum attr_memory memory, dep_memory;
10903 int dep_insn_code_number;
10905 /* Anti and output depenancies have zero cost on all CPUs. */
10906 if (REG_NOTE_KIND (link) != 0)
10909 dep_insn_code_number = recog_memoized (dep_insn);
10911 /* If we can't recognize the insns, we can't really do anything. */
10912 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10915 insn_type = get_attr_type (insn);
10916 dep_insn_type = get_attr_type (dep_insn);
10920 case PROCESSOR_PENTIUM:
10921 /* Address Generation Interlock adds a cycle of latency. */
10922 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10925 /* ??? Compares pair with jump/setcc. */
10926 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10929 /* Floating point stores require value to be ready one cycle ealier. */
10930 if (insn_type == TYPE_FMOV
10931 && get_attr_memory (insn) == MEMORY_STORE
10932 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10936 case PROCESSOR_PENTIUMPRO:
10937 memory = get_attr_memory (insn);
10938 dep_memory = get_attr_memory (dep_insn);
10940 /* Since we can't represent delayed latencies of load+operation,
10941 increase the cost here for non-imov insns. */
10942 if (dep_insn_type != TYPE_IMOV
10943 && dep_insn_type != TYPE_FMOV
10944 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10947 /* INT->FP conversion is expensive. */
10948 if (get_attr_fp_int_src (dep_insn))
10951 /* There is one cycle extra latency between an FP op and a store. */
10952 if (insn_type == TYPE_FMOV
10953 && (set = single_set (dep_insn)) != NULL_RTX
10954 && (set2 = single_set (insn)) != NULL_RTX
10955 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10956 && GET_CODE (SET_DEST (set2)) == MEM)
10959 /* Show ability of reorder buffer to hide latency of load by executing
10960 in parallel with previous instruction in case
10961 previous instruction is not needed to compute the address. */
10962 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10963 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10965 /* Claim moves to take one cycle, as core can issue one load
10966 at time and the next load can start cycle later. */
10967 if (dep_insn_type == TYPE_IMOV
10968 || dep_insn_type == TYPE_FMOV)
10976 memory = get_attr_memory (insn);
10977 dep_memory = get_attr_memory (dep_insn);
10978 /* The esp dependency is resolved before the instruction is really
10980 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10981 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10984 /* Since we can't represent delayed latencies of load+operation,
10985 increase the cost here for non-imov insns. */
10986 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10987 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10989 /* INT->FP conversion is expensive. */
10990 if (get_attr_fp_int_src (dep_insn))
10993 /* Show ability of reorder buffer to hide latency of load by executing
10994 in parallel with previous instruction in case
10995 previous instruction is not needed to compute the address. */
10996 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10997 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10999 /* Claim moves to take one cycle, as core can issue one load
11000 at time and the next load can start cycle later. */
11001 if (dep_insn_type == TYPE_IMOV
11002 || dep_insn_type == TYPE_FMOV)
11011 case PROCESSOR_ATHLON:
11012 memory = get_attr_memory (insn);
11013 dep_memory = get_attr_memory (dep_insn);
11015 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11017 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11022 /* Show ability of reorder buffer to hide latency of load by executing
11023 in parallel with previous instruction in case
11024 previous instruction is not needed to compute the address. */
11025 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11026 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11028 /* Claim moves to take one cycle, as core can issue one load
11029 at time and the next load can start cycle later. */
11030 if (dep_insn_type == TYPE_IMOV
11031 || dep_insn_type == TYPE_FMOV)
11033 else if (cost >= 3)
11048 struct ppro_sched_data
11051 int issued_this_cycle;
11055 static enum attr_ppro_uops
11056 ix86_safe_ppro_uops (insn)
11059 if (recog_memoized (insn) >= 0)
11060 return get_attr_ppro_uops (insn);
11062 return PPRO_UOPS_MANY;
11066 ix86_dump_ppro_packet (dump)
11069 if (ix86_sched_data.ppro.decode[0])
11071 fprintf (dump, "PPRO packet: %d",
11072 INSN_UID (ix86_sched_data.ppro.decode[0]));
11073 if (ix86_sched_data.ppro.decode[1])
11074 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11075 if (ix86_sched_data.ppro.decode[2])
11076 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11077 fputc ('\n', dump);
11081 /* We're beginning a new block. Initialize data structures as necessary. */
11084 ix86_sched_init (dump, sched_verbose, veclen)
11085 FILE *dump ATTRIBUTE_UNUSED;
11086 int sched_verbose ATTRIBUTE_UNUSED;
11087 int veclen ATTRIBUTE_UNUSED;
11089 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11092 /* Shift INSN to SLOT, and shift everything else down. */
11095 ix86_reorder_insn (insnp, slot)
11102 insnp[0] = insnp[1];
11103 while (++insnp != slot);
11109 ix86_sched_reorder_ppro (ready, e_ready)
11114 enum attr_ppro_uops cur_uops;
11115 int issued_this_cycle;
11119 /* At this point .ppro.decode contains the state of the three
11120 decoders from last "cycle". That is, those insns that were
11121 actually independent. But here we're scheduling for the
11122 decoder, and we may find things that are decodable in the
11125 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11126 issued_this_cycle = 0;
11129 cur_uops = ix86_safe_ppro_uops (*insnp);
11131 /* If the decoders are empty, and we've a complex insn at the
11132 head of the priority queue, let it issue without complaint. */
11133 if (decode[0] == NULL)
11135 if (cur_uops == PPRO_UOPS_MANY)
11137 decode[0] = *insnp;
11141 /* Otherwise, search for a 2-4 uop unsn to issue. */
11142 while (cur_uops != PPRO_UOPS_FEW)
11144 if (insnp == ready)
11146 cur_uops = ix86_safe_ppro_uops (*--insnp);
11149 /* If so, move it to the head of the line. */
11150 if (cur_uops == PPRO_UOPS_FEW)
11151 ix86_reorder_insn (insnp, e_ready);
11153 /* Issue the head of the queue. */
11154 issued_this_cycle = 1;
11155 decode[0] = *e_ready--;
11158 /* Look for simple insns to fill in the other two slots. */
11159 for (i = 1; i < 3; ++i)
11160 if (decode[i] == NULL)
11162 if (ready > e_ready)
11166 cur_uops = ix86_safe_ppro_uops (*insnp);
11167 while (cur_uops != PPRO_UOPS_ONE)
11169 if (insnp == ready)
11171 cur_uops = ix86_safe_ppro_uops (*--insnp);
11174 /* Found one. Move it to the head of the queue and issue it. */
11175 if (cur_uops == PPRO_UOPS_ONE)
11177 ix86_reorder_insn (insnp, e_ready);
11178 decode[i] = *e_ready--;
11179 issued_this_cycle++;
11183 /* ??? Didn't find one. Ideally, here we would do a lazy split
11184 of 2-uop insns, issue one and queue the other. */
11188 if (issued_this_cycle == 0)
11189 issued_this_cycle = 1;
11190 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11193 /* We are about to being issuing insns for this clock cycle.
11194 Override the default sort algorithm to better slot instructions. */
11196 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11197 FILE *dump ATTRIBUTE_UNUSED;
11198 int sched_verbose ATTRIBUTE_UNUSED;
11201 int clock_var ATTRIBUTE_UNUSED;
11203 int n_ready = *n_readyp;
11204 rtx *e_ready = ready + n_ready - 1;
11206 /* Make sure to go ahead and initialize key items in
11207 ix86_sched_data if we are not going to bother trying to
11208 reorder the ready queue. */
11211 ix86_sched_data.ppro.issued_this_cycle = 1;
11220 case PROCESSOR_PENTIUMPRO:
11221 ix86_sched_reorder_ppro (ready, e_ready);
11226 return ix86_issue_rate ();
11229 /* We are about to issue INSN. Return the number of insns left on the
11230 ready queue that can be issued this cycle. */
11233 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11237 int can_issue_more;
11243 return can_issue_more - 1;
11245 case PROCESSOR_PENTIUMPRO:
11247 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11249 if (uops == PPRO_UOPS_MANY)
11252 ix86_dump_ppro_packet (dump);
11253 ix86_sched_data.ppro.decode[0] = insn;
11254 ix86_sched_data.ppro.decode[1] = NULL;
11255 ix86_sched_data.ppro.decode[2] = NULL;
11257 ix86_dump_ppro_packet (dump);
11258 ix86_sched_data.ppro.decode[0] = NULL;
11260 else if (uops == PPRO_UOPS_FEW)
11263 ix86_dump_ppro_packet (dump);
11264 ix86_sched_data.ppro.decode[0] = insn;
11265 ix86_sched_data.ppro.decode[1] = NULL;
11266 ix86_sched_data.ppro.decode[2] = NULL;
11270 for (i = 0; i < 3; ++i)
11271 if (ix86_sched_data.ppro.decode[i] == NULL)
11273 ix86_sched_data.ppro.decode[i] = insn;
11281 ix86_dump_ppro_packet (dump);
11282 ix86_sched_data.ppro.decode[0] = NULL;
11283 ix86_sched_data.ppro.decode[1] = NULL;
11284 ix86_sched_data.ppro.decode[2] = NULL;
11288 return --ix86_sched_data.ppro.issued_this_cycle;
11293 ia32_use_dfa_pipeline_interface ()
11295 if (ix86_cpu == PROCESSOR_PENTIUM)
11300 /* How many alternative schedules to try. This should be as wide as the
11301 scheduling freedom in the DFA, but no wider. Making this value too
11302 large results extra work for the scheduler. */
11305 ia32_multipass_dfa_lookahead ()
11307 if (ix86_cpu == PROCESSOR_PENTIUM)
11314 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11315 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11319 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11321 rtx dstref, srcref, dstreg, srcreg;
11325 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11327 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11331 /* Subroutine of above to actually do the updating by recursively walking
11335 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11337 rtx dstref, srcref, dstreg, srcreg;
11339 enum rtx_code code = GET_CODE (x);
11340 const char *format_ptr = GET_RTX_FORMAT (code);
11343 if (code == MEM && XEXP (x, 0) == dstreg)
11344 MEM_COPY_ATTRIBUTES (x, dstref);
11345 else if (code == MEM && XEXP (x, 0) == srcreg)
11346 MEM_COPY_ATTRIBUTES (x, srcref);
11348 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11350 if (*format_ptr == 'e')
11351 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11353 else if (*format_ptr == 'E')
11354 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11355 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11360 /* Compute the alignment given to a constant that is being placed in memory.
11361 EXP is the constant and ALIGN is the alignment that the object would
11363 The value of this function is used instead of that alignment to align
11367 ix86_constant_alignment (exp, align)
11371 if (TREE_CODE (exp) == REAL_CST)
11373 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11375 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11378 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11385 /* Compute the alignment for a static variable.
11386 TYPE is the data type, and ALIGN is the alignment that
11387 the object would ordinarily have. The value of this function is used
11388 instead of that alignment to align the object. */
11391 ix86_data_alignment (type, align)
11395 if (AGGREGATE_TYPE_P (type)
11396 && TYPE_SIZE (type)
11397 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11398 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11399 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11402 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11403 to 16byte boundary. */
11406 if (AGGREGATE_TYPE_P (type)
11407 && TYPE_SIZE (type)
11408 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11409 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11410 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11414 if (TREE_CODE (type) == ARRAY_TYPE)
11416 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11418 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11421 else if (TREE_CODE (type) == COMPLEX_TYPE)
11424 if (TYPE_MODE (type) == DCmode && align < 64)
11426 if (TYPE_MODE (type) == XCmode && align < 128)
11429 else if ((TREE_CODE (type) == RECORD_TYPE
11430 || TREE_CODE (type) == UNION_TYPE
11431 || TREE_CODE (type) == QUAL_UNION_TYPE)
11432 && TYPE_FIELDS (type))
11434 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11436 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11439 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11440 || TREE_CODE (type) == INTEGER_TYPE)
11442 if (TYPE_MODE (type) == DFmode && align < 64)
11444 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11451 /* Compute the alignment for a local variable.
11452 TYPE is the data type, and ALIGN is the alignment that
11453 the object would ordinarily have. The value of this macro is used
11454 instead of that alignment to align the object. */
11457 ix86_local_alignment (type, align)
11461 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11462 to 16byte boundary. */
11465 if (AGGREGATE_TYPE_P (type)
11466 && TYPE_SIZE (type)
11467 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11468 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11469 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11472 if (TREE_CODE (type) == ARRAY_TYPE)
11474 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11476 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11479 else if (TREE_CODE (type) == COMPLEX_TYPE)
11481 if (TYPE_MODE (type) == DCmode && align < 64)
11483 if (TYPE_MODE (type) == XCmode && align < 128)
11486 else if ((TREE_CODE (type) == RECORD_TYPE
11487 || TREE_CODE (type) == UNION_TYPE
11488 || TREE_CODE (type) == QUAL_UNION_TYPE)
11489 && TYPE_FIELDS (type))
11491 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11493 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11496 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11497 || TREE_CODE (type) == INTEGER_TYPE)
11500 if (TYPE_MODE (type) == DFmode && align < 64)
11502 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11508 /* Emit RTL insns to initialize the variable parts of a trampoline.
11509 FNADDR is an RTX for the address of the function's pure code.
11510 CXT is an RTX for the static chain value for the function. */
11512 x86_initialize_trampoline (tramp, fnaddr, cxt)
11513 rtx tramp, fnaddr, cxt;
11517 /* Compute offset from the end of the jmp to the target function. */
11518 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11519 plus_constant (tramp, 10),
11520 NULL_RTX, 1, OPTAB_DIRECT);
11521 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11522 gen_int_mode (0xb9, QImode));
11523 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11524 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11525 gen_int_mode (0xe9, QImode));
11526 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11531 /* Try to load address using shorter movl instead of movabs.
11532 We may want to support movq for kernel mode, but kernel does not use
11533 trampolines at the moment. */
11534 if (x86_64_zero_extended_value (fnaddr))
11536 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11537 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11538 gen_int_mode (0xbb41, HImode));
11539 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11540 gen_lowpart (SImode, fnaddr));
11545 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11546 gen_int_mode (0xbb49, HImode));
11547 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11551 /* Load static chain using movabs to r10. */
11552 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11553 gen_int_mode (0xba49, HImode));
11554 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11557 /* Jump to the r11 */
11558 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11559 gen_int_mode (0xff49, HImode));
11560 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11561 gen_int_mode (0xe3, QImode));
11563 if (offset > TRAMPOLINE_SIZE)
11568 #define def_builtin(MASK, NAME, TYPE, CODE) \
11570 if ((MASK) & target_flags) \
11571 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11572 NULL, NULL_TREE); \
11575 struct builtin_description
11577 const unsigned int mask;
11578 const enum insn_code icode;
11579 const char *const name;
11580 const enum ix86_builtins code;
11581 const enum rtx_code comparison;
11582 const unsigned int flag;
11585 /* Used for builtins that are enabled both by -msse and -msse2. */
11586 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11588 static const struct builtin_description bdesc_comi[] =
11590 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11591 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11592 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11593 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11594 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11595 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11596 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11597 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11598 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11599 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11600 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11601 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11602 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11603 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11604 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11605 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11606 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11607 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11608 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11609 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11610 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11611 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11612 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11613 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11616 static const struct builtin_description bdesc_2arg[] =
11619 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11620 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11621 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11622 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11623 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11624 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11625 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11626 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11628 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11629 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11630 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11631 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11632 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11633 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11634 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11635 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11636 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11637 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11638 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11639 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11640 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11641 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11642 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11643 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11644 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11645 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11646 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11647 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11648 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11649 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11650 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11651 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11653 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11654 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11655 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11656 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11658 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11659 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11660 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11661 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11662 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11665 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11666 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11667 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11668 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11669 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11670 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11672 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11673 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11674 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11675 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11676 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11677 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11678 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11679 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11681 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11682 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11683 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11685 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11686 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11687 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11688 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11690 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11691 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11693 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11694 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11695 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11696 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11697 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11698 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11700 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11701 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11702 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11703 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11705 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11706 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11707 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11708 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11709 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11710 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11713 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11714 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11715 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11717 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11718 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11720 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11721 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11722 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11723 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11724 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11725 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11727 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11728 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11729 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11730 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11731 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11732 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11734 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11735 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11736 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11737 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11739 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11740 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11743 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11744 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11745 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11746 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11747 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11750 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11752 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11753 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11754 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11755 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11756 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11757 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11758 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11759 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11760 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11761 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11762 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11763 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11764 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11765 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11766 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11767 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11768 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11769 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11770 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11771 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11772 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11773 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11774 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11775 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11777 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11778 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11779 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11780 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11782 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11783 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11784 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11785 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11787 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11788 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11789 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11792 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11793 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11794 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11795 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11796 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11797 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11798 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11799 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11801 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11802 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11803 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11804 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11805 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11806 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11807 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11808 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11810 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11811 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11812 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11813 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11815 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11816 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11817 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11818 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11820 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11821 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11823 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11824 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11825 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11826 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11827 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11828 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11830 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11831 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11832 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11833 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11835 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11836 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11837 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11838 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11839 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11840 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11842 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11843 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11844 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11846 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11847 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11849 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11850 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11851 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11852 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11853 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11854 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11856 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11857 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11858 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11859 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11860 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11861 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11863 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11864 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11865 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11866 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11868 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11870 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11871 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11872 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11875 static const struct builtin_description bdesc_1arg[] =
11877 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11878 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11880 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11881 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11882 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11884 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11885 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11886 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11887 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11889 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11890 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11891 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11893 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11898 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11899 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11909 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11910 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11915 ix86_init_builtins ()
11918 ix86_init_mmx_sse_builtins ();
11921 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11922 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11925 ix86_init_mmx_sse_builtins ()
11927 const struct builtin_description * d;
11930 tree pchar_type_node = build_pointer_type (char_type_node);
11931 tree pfloat_type_node = build_pointer_type (float_type_node);
11932 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11933 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11934 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11937 tree int_ftype_v4sf_v4sf
11938 = build_function_type_list (integer_type_node,
11939 V4SF_type_node, V4SF_type_node, NULL_TREE);
11940 tree v4si_ftype_v4sf_v4sf
11941 = build_function_type_list (V4SI_type_node,
11942 V4SF_type_node, V4SF_type_node, NULL_TREE);
11943 /* MMX/SSE/integer conversions. */
11944 tree int_ftype_v4sf
11945 = build_function_type_list (integer_type_node,
11946 V4SF_type_node, NULL_TREE);
11947 tree int_ftype_v8qi
11948 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
11949 tree v4sf_ftype_v4sf_int
11950 = build_function_type_list (V4SF_type_node,
11951 V4SF_type_node, integer_type_node, NULL_TREE);
11952 tree v4sf_ftype_v4sf_v2si
11953 = build_function_type_list (V4SF_type_node,
11954 V4SF_type_node, V2SI_type_node, NULL_TREE);
11955 tree int_ftype_v4hi_int
11956 = build_function_type_list (integer_type_node,
11957 V4HI_type_node, integer_type_node, NULL_TREE);
11958 tree v4hi_ftype_v4hi_int_int
11959 = build_function_type_list (V4HI_type_node, V4HI_type_node,
11960 integer_type_node, integer_type_node,
11962 /* Miscellaneous. */
11963 tree v8qi_ftype_v4hi_v4hi
11964 = build_function_type_list (V8QI_type_node,
11965 V4HI_type_node, V4HI_type_node, NULL_TREE);
11966 tree v4hi_ftype_v2si_v2si
11967 = build_function_type_list (V4HI_type_node,
11968 V2SI_type_node, V2SI_type_node, NULL_TREE);
11969 tree v4sf_ftype_v4sf_v4sf_int
11970 = build_function_type_list (V4SF_type_node,
11971 V4SF_type_node, V4SF_type_node,
11972 integer_type_node, NULL_TREE);
11973 tree v2si_ftype_v4hi_v4hi
11974 = build_function_type_list (V2SI_type_node,
11975 V4HI_type_node, V4HI_type_node, NULL_TREE);
11976 tree v4hi_ftype_v4hi_int
11977 = build_function_type_list (V4HI_type_node,
11978 V4HI_type_node, integer_type_node, NULL_TREE);
11979 tree v4hi_ftype_v4hi_di
11980 = build_function_type_list (V4HI_type_node,
11981 V4HI_type_node, long_long_unsigned_type_node,
11983 tree v2si_ftype_v2si_di
11984 = build_function_type_list (V2SI_type_node,
11985 V2SI_type_node, long_long_unsigned_type_node,
11987 tree void_ftype_void
11988 = build_function_type (void_type_node, void_list_node);
11989 tree void_ftype_unsigned
11990 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
11991 tree unsigned_ftype_void
11992 = build_function_type (unsigned_type_node, void_list_node);
11994 = build_function_type (long_long_unsigned_type_node, void_list_node);
11995 tree v4sf_ftype_void
11996 = build_function_type (V4SF_type_node, void_list_node);
11997 tree v2si_ftype_v4sf
11998 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
11999 /* Loads/stores. */
12000 tree void_ftype_v8qi_v8qi_pchar
12001 = build_function_type_list (void_type_node,
12002 V8QI_type_node, V8QI_type_node,
12003 pchar_type_node, NULL_TREE);
12004 tree v4sf_ftype_pfloat
12005 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12006 /* @@@ the type is bogus */
12007 tree v4sf_ftype_v4sf_pv2si
12008 = build_function_type_list (V4SF_type_node,
12009 V4SF_type_node, pv2di_type_node, NULL_TREE);
12010 tree void_ftype_pv2si_v4sf
12011 = build_function_type_list (void_type_node,
12012 pv2di_type_node, V4SF_type_node, NULL_TREE);
12013 tree void_ftype_pfloat_v4sf
12014 = build_function_type_list (void_type_node,
12015 pfloat_type_node, V4SF_type_node, NULL_TREE);
12016 tree void_ftype_pdi_di
12017 = build_function_type_list (void_type_node,
12018 pdi_type_node, long_long_unsigned_type_node,
12020 tree void_ftype_pv2di_v2di
12021 = build_function_type_list (void_type_node,
12022 pv2di_type_node, V2DI_type_node, NULL_TREE);
12023 /* Normal vector unops. */
12024 tree v4sf_ftype_v4sf
12025 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12027 /* Normal vector binops. */
12028 tree v4sf_ftype_v4sf_v4sf
12029 = build_function_type_list (V4SF_type_node,
12030 V4SF_type_node, V4SF_type_node, NULL_TREE);
12031 tree v8qi_ftype_v8qi_v8qi
12032 = build_function_type_list (V8QI_type_node,
12033 V8QI_type_node, V8QI_type_node, NULL_TREE);
12034 tree v4hi_ftype_v4hi_v4hi
12035 = build_function_type_list (V4HI_type_node,
12036 V4HI_type_node, V4HI_type_node, NULL_TREE);
12037 tree v2si_ftype_v2si_v2si
12038 = build_function_type_list (V2SI_type_node,
12039 V2SI_type_node, V2SI_type_node, NULL_TREE);
12040 tree di_ftype_di_di
12041 = build_function_type_list (long_long_unsigned_type_node,
12042 long_long_unsigned_type_node,
12043 long_long_unsigned_type_node, NULL_TREE);
12045 tree v2si_ftype_v2sf
12046 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12047 tree v2sf_ftype_v2si
12048 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12049 tree v2si_ftype_v2si
12050 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12051 tree v2sf_ftype_v2sf
12052 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12053 tree v2sf_ftype_v2sf_v2sf
12054 = build_function_type_list (V2SF_type_node,
12055 V2SF_type_node, V2SF_type_node, NULL_TREE);
12056 tree v2si_ftype_v2sf_v2sf
12057 = build_function_type_list (V2SI_type_node,
12058 V2SF_type_node, V2SF_type_node, NULL_TREE);
12059 tree pint_type_node = build_pointer_type (integer_type_node);
12060 tree pdouble_type_node = build_pointer_type (double_type_node);
12061 tree int_ftype_v2df_v2df
12062 = build_function_type_list (integer_type_node,
12063 V2DF_type_node, V2DF_type_node, NULL_TREE);
12066 = build_function_type (intTI_type_node, void_list_node);
12067 tree ti_ftype_ti_ti
12068 = build_function_type_list (intTI_type_node,
12069 intTI_type_node, intTI_type_node, NULL_TREE);
12070 tree void_ftype_pvoid
12071 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12073 = build_function_type_list (V2DI_type_node,
12074 long_long_unsigned_type_node, NULL_TREE);
12075 tree v4sf_ftype_v4si
12076 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12077 tree v4si_ftype_v4sf
12078 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12079 tree v2df_ftype_v4si
12080 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12081 tree v4si_ftype_v2df
12082 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12083 tree v2si_ftype_v2df
12084 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12085 tree v4sf_ftype_v2df
12086 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12087 tree v2df_ftype_v2si
12088 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12089 tree v2df_ftype_v4sf
12090 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12091 tree int_ftype_v2df
12092 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12093 tree v2df_ftype_v2df_int
12094 = build_function_type_list (V2DF_type_node,
12095 V2DF_type_node, integer_type_node, NULL_TREE);
12096 tree v4sf_ftype_v4sf_v2df
12097 = build_function_type_list (V4SF_type_node,
12098 V4SF_type_node, V2DF_type_node, NULL_TREE);
12099 tree v2df_ftype_v2df_v4sf
12100 = build_function_type_list (V2DF_type_node,
12101 V2DF_type_node, V4SF_type_node, NULL_TREE);
12102 tree v2df_ftype_v2df_v2df_int
12103 = build_function_type_list (V2DF_type_node,
12104 V2DF_type_node, V2DF_type_node,
12107 tree v2df_ftype_v2df_pv2si
12108 = build_function_type_list (V2DF_type_node,
12109 V2DF_type_node, pv2si_type_node, NULL_TREE);
12110 tree void_ftype_pv2si_v2df
12111 = build_function_type_list (void_type_node,
12112 pv2si_type_node, V2DF_type_node, NULL_TREE);
12113 tree void_ftype_pdouble_v2df
12114 = build_function_type_list (void_type_node,
12115 pdouble_type_node, V2DF_type_node, NULL_TREE);
12116 tree void_ftype_pint_int
12117 = build_function_type_list (void_type_node,
12118 pint_type_node, integer_type_node, NULL_TREE);
12119 tree void_ftype_v16qi_v16qi_pchar
12120 = build_function_type_list (void_type_node,
12121 V16QI_type_node, V16QI_type_node,
12122 pchar_type_node, NULL_TREE);
12123 tree v2df_ftype_pdouble
12124 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12125 tree v2df_ftype_v2df_v2df
12126 = build_function_type_list (V2DF_type_node,
12127 V2DF_type_node, V2DF_type_node, NULL_TREE);
12128 tree v16qi_ftype_v16qi_v16qi
12129 = build_function_type_list (V16QI_type_node,
12130 V16QI_type_node, V16QI_type_node, NULL_TREE);
12131 tree v8hi_ftype_v8hi_v8hi
12132 = build_function_type_list (V8HI_type_node,
12133 V8HI_type_node, V8HI_type_node, NULL_TREE);
12134 tree v4si_ftype_v4si_v4si
12135 = build_function_type_list (V4SI_type_node,
12136 V4SI_type_node, V4SI_type_node, NULL_TREE);
12137 tree v2di_ftype_v2di_v2di
12138 = build_function_type_list (V2DI_type_node,
12139 V2DI_type_node, V2DI_type_node, NULL_TREE);
12140 tree v2di_ftype_v2df_v2df
12141 = build_function_type_list (V2DI_type_node,
12142 V2DF_type_node, V2DF_type_node, NULL_TREE);
12143 tree v2df_ftype_v2df
12144 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12145 tree v2df_ftype_double
12146 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12147 tree v2df_ftype_double_double
12148 = build_function_type_list (V2DF_type_node,
12149 double_type_node, double_type_node, NULL_TREE);
12150 tree int_ftype_v8hi_int
12151 = build_function_type_list (integer_type_node,
12152 V8HI_type_node, integer_type_node, NULL_TREE);
12153 tree v8hi_ftype_v8hi_int_int
12154 = build_function_type_list (V8HI_type_node,
12155 V8HI_type_node, integer_type_node,
12156 integer_type_node, NULL_TREE);
12157 tree v2di_ftype_v2di_int
12158 = build_function_type_list (V2DI_type_node,
12159 V2DI_type_node, integer_type_node, NULL_TREE);
12160 tree v4si_ftype_v4si_int
12161 = build_function_type_list (V4SI_type_node,
12162 V4SI_type_node, integer_type_node, NULL_TREE);
12163 tree v8hi_ftype_v8hi_int
12164 = build_function_type_list (V8HI_type_node,
12165 V8HI_type_node, integer_type_node, NULL_TREE);
12166 tree v8hi_ftype_v8hi_v2di
12167 = build_function_type_list (V8HI_type_node,
12168 V8HI_type_node, V2DI_type_node, NULL_TREE);
12169 tree v4si_ftype_v4si_v2di
12170 = build_function_type_list (V4SI_type_node,
12171 V4SI_type_node, V2DI_type_node, NULL_TREE);
12172 tree v4si_ftype_v8hi_v8hi
12173 = build_function_type_list (V4SI_type_node,
12174 V8HI_type_node, V8HI_type_node, NULL_TREE);
12175 tree di_ftype_v8qi_v8qi
12176 = build_function_type_list (long_long_unsigned_type_node,
12177 V8QI_type_node, V8QI_type_node, NULL_TREE);
12178 tree v2di_ftype_v16qi_v16qi
12179 = build_function_type_list (V2DI_type_node,
12180 V16QI_type_node, V16QI_type_node, NULL_TREE);
12181 tree int_ftype_v16qi
12182 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12184 /* Add all builtins that are more or less simple operations on two
12186 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12188 /* Use one of the operands; the target can have a different mode for
12189 mask-generating compares. */
12190 enum machine_mode mode;
12195 mode = insn_data[d->icode].operand[1].mode;
12200 type = v16qi_ftype_v16qi_v16qi;
12203 type = v8hi_ftype_v8hi_v8hi;
12206 type = v4si_ftype_v4si_v4si;
12209 type = v2di_ftype_v2di_v2di;
12212 type = v2df_ftype_v2df_v2df;
12215 type = ti_ftype_ti_ti;
12218 type = v4sf_ftype_v4sf_v4sf;
12221 type = v8qi_ftype_v8qi_v8qi;
12224 type = v4hi_ftype_v4hi_v4hi;
12227 type = v2si_ftype_v2si_v2si;
12230 type = di_ftype_di_di;
12237 /* Override for comparisons. */
12238 if (d->icode == CODE_FOR_maskcmpv4sf3
12239 || d->icode == CODE_FOR_maskncmpv4sf3
12240 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12241 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12242 type = v4si_ftype_v4sf_v4sf;
12244 if (d->icode == CODE_FOR_maskcmpv2df3
12245 || d->icode == CODE_FOR_maskncmpv2df3
12246 || d->icode == CODE_FOR_vmmaskcmpv2df3
12247 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12248 type = v2di_ftype_v2df_v2df;
12250 def_builtin (d->mask, d->name, type, d->code);
12253 /* Add the remaining MMX insns with somewhat more complicated types. */
12254 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12255 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12256 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12257 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12258 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12259 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12260 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12262 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12263 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12264 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12266 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12267 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12269 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12270 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12272 /* comi/ucomi insns. */
12273 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12274 if (d->mask == MASK_SSE2)
12275 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12277 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12279 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12280 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12281 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12283 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12284 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12285 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12286 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12287 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12288 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12290 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12291 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12292 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12293 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12295 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12296 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12298 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12300 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12301 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12302 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12303 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12304 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12305 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12307 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12308 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12309 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12310 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12312 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12313 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12314 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12315 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12317 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12319 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12321 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12322 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12323 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12324 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12325 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12326 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12328 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12330 /* Original 3DNow! */
12331 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12332 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12333 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12334 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12335 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12336 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12337 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12338 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12339 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12340 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12341 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12342 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12343 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12344 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12345 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12346 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12347 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12348 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12349 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12350 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12352 /* 3DNow! extension as used in the Athlon CPU. */
12353 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12354 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12355 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12356 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12357 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12358 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12360 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12363 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12364 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12366 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12367 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12369 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12370 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12371 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12372 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12373 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12374 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12376 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12377 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12378 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12379 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12381 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12382 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12383 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12384 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12385 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12387 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12388 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12389 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12390 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12392 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12393 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12395 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12397 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12398 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12400 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12401 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12402 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12403 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12404 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12406 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12408 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12409 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12411 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12412 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12413 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12415 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12416 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12417 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12419 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12420 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12421 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12422 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12423 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12424 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12425 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12427 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12428 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12429 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12431 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12432 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12433 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12435 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12436 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12437 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12439 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12440 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12442 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12443 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12444 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12446 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12447 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12448 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12450 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12451 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12453 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12456 /* Errors in the source file can cause expand_expr to return const0_rtx
12457 where we expect a vector. To avoid crashing, use one of the vector
12458 clear instructions. */
12460 safe_vector_operand (x, mode)
12462 enum machine_mode mode;
12464 if (x != const0_rtx)
12466 x = gen_reg_rtx (mode);
12468 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12469 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12470 : gen_rtx_SUBREG (DImode, x, 0)));
12472 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12473 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12477 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12480 ix86_expand_binop_builtin (icode, arglist, target)
12481 enum insn_code icode;
12486 tree arg0 = TREE_VALUE (arglist);
12487 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12488 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12489 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12490 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12491 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12492 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12494 if (VECTOR_MODE_P (mode0))
12495 op0 = safe_vector_operand (op0, mode0);
12496 if (VECTOR_MODE_P (mode1))
12497 op1 = safe_vector_operand (op1, mode1);
12500 || GET_MODE (target) != tmode
12501 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12502 target = gen_reg_rtx (tmode);
12504 /* In case the insn wants input operands in modes different from
12505 the result, abort. */
12506 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12509 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12510 op0 = copy_to_mode_reg (mode0, op0);
12511 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12512 op1 = copy_to_mode_reg (mode1, op1);
12514 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12515 yet one of the two must not be a memory. This is normally enforced
12516 by expanders, but we didn't bother to create one here. */
12517 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12518 op0 = copy_to_mode_reg (mode0, op0);
12520 pat = GEN_FCN (icode) (target, op0, op1);
12527 /* In type_for_mode we restrict the ability to create TImode types
12528 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12529 to have a V4SFmode signature. Convert them in-place to TImode. */
12532 ix86_expand_timode_binop_builtin (icode, arglist, target)
12533 enum insn_code icode;
12538 tree arg0 = TREE_VALUE (arglist);
12539 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12540 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12541 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12543 op0 = gen_lowpart (TImode, op0);
12544 op1 = gen_lowpart (TImode, op1);
12545 target = gen_reg_rtx (TImode);
12547 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12548 op0 = copy_to_mode_reg (TImode, op0);
12549 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12550 op1 = copy_to_mode_reg (TImode, op1);
12552 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12553 yet one of the two must not be a memory. This is normally enforced
12554 by expanders, but we didn't bother to create one here. */
12555 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12556 op0 = copy_to_mode_reg (TImode, op0);
12558 pat = GEN_FCN (icode) (target, op0, op1);
12563 return gen_lowpart (V4SFmode, target);
12566 /* Subroutine of ix86_expand_builtin to take care of stores. */
12569 ix86_expand_store_builtin (icode, arglist)
12570 enum insn_code icode;
12574 tree arg0 = TREE_VALUE (arglist);
12575 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12576 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12577 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12578 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12579 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12581 if (VECTOR_MODE_P (mode1))
12582 op1 = safe_vector_operand (op1, mode1);
12584 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12586 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12587 op1 = copy_to_mode_reg (mode1, op1);
12589 pat = GEN_FCN (icode) (op0, op1);
12595 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12598 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12599 enum insn_code icode;
12605 tree arg0 = TREE_VALUE (arglist);
12606 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12607 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12608 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12611 || GET_MODE (target) != tmode
12612 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12613 target = gen_reg_rtx (tmode);
12615 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12618 if (VECTOR_MODE_P (mode0))
12619 op0 = safe_vector_operand (op0, mode0);
12621 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12622 op0 = copy_to_mode_reg (mode0, op0);
12625 pat = GEN_FCN (icode) (target, op0);
12632 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12633 sqrtss, rsqrtss, rcpss. */
12636 ix86_expand_unop1_builtin (icode, arglist, target)
12637 enum insn_code icode;
12642 tree arg0 = TREE_VALUE (arglist);
12643 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12644 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12645 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12648 || GET_MODE (target) != tmode
12649 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12650 target = gen_reg_rtx (tmode);
12652 if (VECTOR_MODE_P (mode0))
12653 op0 = safe_vector_operand (op0, mode0);
12655 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12656 op0 = copy_to_mode_reg (mode0, op0);
12659 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12660 op1 = copy_to_mode_reg (mode0, op1);
12662 pat = GEN_FCN (icode) (target, op0, op1);
12669 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12672 ix86_expand_sse_compare (d, arglist, target)
12673 const struct builtin_description *d;
12678 tree arg0 = TREE_VALUE (arglist);
12679 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12680 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12681 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12683 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12684 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12685 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12686 enum rtx_code comparison = d->comparison;
12688 if (VECTOR_MODE_P (mode0))
12689 op0 = safe_vector_operand (op0, mode0);
12690 if (VECTOR_MODE_P (mode1))
12691 op1 = safe_vector_operand (op1, mode1);
12693 /* Swap operands if we have a comparison that isn't available in
12697 rtx tmp = gen_reg_rtx (mode1);
12698 emit_move_insn (tmp, op1);
12704 || GET_MODE (target) != tmode
12705 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12706 target = gen_reg_rtx (tmode);
12708 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12709 op0 = copy_to_mode_reg (mode0, op0);
12710 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12711 op1 = copy_to_mode_reg (mode1, op1);
12713 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12714 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12721 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12724 ix86_expand_sse_comi (d, arglist, target)
12725 const struct builtin_description *d;
12730 tree arg0 = TREE_VALUE (arglist);
12731 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12732 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12733 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12735 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12736 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12737 enum rtx_code comparison = d->comparison;
12739 if (VECTOR_MODE_P (mode0))
12740 op0 = safe_vector_operand (op0, mode0);
12741 if (VECTOR_MODE_P (mode1))
12742 op1 = safe_vector_operand (op1, mode1);
12744 /* Swap operands if we have a comparison that isn't available in
12753 target = gen_reg_rtx (SImode);
12754 emit_move_insn (target, const0_rtx);
12755 target = gen_rtx_SUBREG (QImode, target, 0);
12757 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12758 op0 = copy_to_mode_reg (mode0, op0);
12759 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12760 op1 = copy_to_mode_reg (mode1, op1);
12762 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12763 pat = GEN_FCN (d->icode) (op0, op1, op2);
12767 emit_insn (gen_rtx_SET (VOIDmode,
12768 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12769 gen_rtx_fmt_ee (comparison, QImode,
12770 gen_rtx_REG (CCmode, FLAGS_REG),
12773 return SUBREG_REG (target);
12776 /* Expand an expression EXP that calls a built-in function,
12777 with result going to TARGET if that's convenient
12778 (and in mode MODE if that's convenient).
12779 SUBTARGET may be used as the target for computing one of EXP's operands.
12780 IGNORE is nonzero if the value is to be ignored. */
12783 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12786 rtx subtarget ATTRIBUTE_UNUSED;
12787 enum machine_mode mode ATTRIBUTE_UNUSED;
12788 int ignore ATTRIBUTE_UNUSED;
12790 const struct builtin_description *d;
12792 enum insn_code icode;
12793 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12794 tree arglist = TREE_OPERAND (exp, 1);
12795 tree arg0, arg1, arg2;
12796 rtx op0, op1, op2, pat;
12797 enum machine_mode tmode, mode0, mode1, mode2;
12798 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12802 case IX86_BUILTIN_EMMS:
12803 emit_insn (gen_emms ());
12806 case IX86_BUILTIN_SFENCE:
12807 emit_insn (gen_sfence ());
12810 case IX86_BUILTIN_PEXTRW:
12811 case IX86_BUILTIN_PEXTRW128:
12812 icode = (fcode == IX86_BUILTIN_PEXTRW
12813 ? CODE_FOR_mmx_pextrw
12814 : CODE_FOR_sse2_pextrw);
12815 arg0 = TREE_VALUE (arglist);
12816 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12817 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12818 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12819 tmode = insn_data[icode].operand[0].mode;
12820 mode0 = insn_data[icode].operand[1].mode;
12821 mode1 = insn_data[icode].operand[2].mode;
12823 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12824 op0 = copy_to_mode_reg (mode0, op0);
12825 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12827 /* @@@ better error message */
12828 error ("selector must be an immediate");
12829 return gen_reg_rtx (tmode);
12832 || GET_MODE (target) != tmode
12833 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12834 target = gen_reg_rtx (tmode);
12835 pat = GEN_FCN (icode) (target, op0, op1);
12841 case IX86_BUILTIN_PINSRW:
12842 case IX86_BUILTIN_PINSRW128:
12843 icode = (fcode == IX86_BUILTIN_PINSRW
12844 ? CODE_FOR_mmx_pinsrw
12845 : CODE_FOR_sse2_pinsrw);
12846 arg0 = TREE_VALUE (arglist);
12847 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12848 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12849 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12850 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12851 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12852 tmode = insn_data[icode].operand[0].mode;
12853 mode0 = insn_data[icode].operand[1].mode;
12854 mode1 = insn_data[icode].operand[2].mode;
12855 mode2 = insn_data[icode].operand[3].mode;
12857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12858 op0 = copy_to_mode_reg (mode0, op0);
12859 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12860 op1 = copy_to_mode_reg (mode1, op1);
12861 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12863 /* @@@ better error message */
12864 error ("selector must be an immediate");
12868 || GET_MODE (target) != tmode
12869 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12870 target = gen_reg_rtx (tmode);
12871 pat = GEN_FCN (icode) (target, op0, op1, op2);
12877 case IX86_BUILTIN_MASKMOVQ:
12878 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12879 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12880 : CODE_FOR_sse2_maskmovdqu);
12881 /* Note the arg order is different from the operand order. */
12882 arg1 = TREE_VALUE (arglist);
12883 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12884 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12885 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12886 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12887 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12888 mode0 = insn_data[icode].operand[0].mode;
12889 mode1 = insn_data[icode].operand[1].mode;
12890 mode2 = insn_data[icode].operand[2].mode;
12892 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12893 op0 = copy_to_mode_reg (mode0, op0);
12894 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12895 op1 = copy_to_mode_reg (mode1, op1);
12896 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12897 op2 = copy_to_mode_reg (mode2, op2);
12898 pat = GEN_FCN (icode) (op0, op1, op2);
12904 case IX86_BUILTIN_SQRTSS:
12905 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12906 case IX86_BUILTIN_RSQRTSS:
12907 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12908 case IX86_BUILTIN_RCPSS:
12909 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12911 case IX86_BUILTIN_ANDPS:
12912 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12914 case IX86_BUILTIN_ANDNPS:
12915 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12917 case IX86_BUILTIN_ORPS:
12918 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12920 case IX86_BUILTIN_XORPS:
12921 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12924 case IX86_BUILTIN_LOADAPS:
12925 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12927 case IX86_BUILTIN_LOADUPS:
12928 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12930 case IX86_BUILTIN_STOREAPS:
12931 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12932 case IX86_BUILTIN_STOREUPS:
12933 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12935 case IX86_BUILTIN_LOADSS:
12936 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12938 case IX86_BUILTIN_STORESS:
12939 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12941 case IX86_BUILTIN_LOADHPS:
12942 case IX86_BUILTIN_LOADLPS:
12943 case IX86_BUILTIN_LOADHPD:
12944 case IX86_BUILTIN_LOADLPD:
12945 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12946 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12947 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12948 : CODE_FOR_sse2_movlpd);
12949 arg0 = TREE_VALUE (arglist);
12950 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12951 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12952 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12953 tmode = insn_data[icode].operand[0].mode;
12954 mode0 = insn_data[icode].operand[1].mode;
12955 mode1 = insn_data[icode].operand[2].mode;
12957 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12958 op0 = copy_to_mode_reg (mode0, op0);
12959 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12961 || GET_MODE (target) != tmode
12962 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12963 target = gen_reg_rtx (tmode);
12964 pat = GEN_FCN (icode) (target, op0, op1);
12970 case IX86_BUILTIN_STOREHPS:
12971 case IX86_BUILTIN_STORELPS:
12972 case IX86_BUILTIN_STOREHPD:
12973 case IX86_BUILTIN_STORELPD:
12974 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12975 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12976 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12977 : CODE_FOR_sse2_movlpd);
12978 arg0 = TREE_VALUE (arglist);
12979 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12980 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12981 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12982 mode0 = insn_data[icode].operand[1].mode;
12983 mode1 = insn_data[icode].operand[2].mode;
12985 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12986 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12987 op1 = copy_to_mode_reg (mode1, op1);
12989 pat = GEN_FCN (icode) (op0, op0, op1);
12995 case IX86_BUILTIN_MOVNTPS:
12996 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12997 case IX86_BUILTIN_MOVNTQ:
12998 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13000 case IX86_BUILTIN_LDMXCSR:
13001 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13002 target = assign_386_stack_local (SImode, 0);
13003 emit_move_insn (target, op0);
13004 emit_insn (gen_ldmxcsr (target));
13007 case IX86_BUILTIN_STMXCSR:
13008 target = assign_386_stack_local (SImode, 0);
13009 emit_insn (gen_stmxcsr (target));
13010 return copy_to_mode_reg (SImode, target);
13012 case IX86_BUILTIN_SHUFPS:
13013 case IX86_BUILTIN_SHUFPD:
13014 icode = (fcode == IX86_BUILTIN_SHUFPS
13015 ? CODE_FOR_sse_shufps
13016 : CODE_FOR_sse2_shufpd);
13017 arg0 = TREE_VALUE (arglist);
13018 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13019 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13020 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13021 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13022 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13023 tmode = insn_data[icode].operand[0].mode;
13024 mode0 = insn_data[icode].operand[1].mode;
13025 mode1 = insn_data[icode].operand[2].mode;
13026 mode2 = insn_data[icode].operand[3].mode;
13028 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13029 op0 = copy_to_mode_reg (mode0, op0);
13030 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13031 op1 = copy_to_mode_reg (mode1, op1);
13032 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13034 /* @@@ better error message */
13035 error ("mask must be an immediate");
13036 return gen_reg_rtx (tmode);
13039 || GET_MODE (target) != tmode
13040 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13041 target = gen_reg_rtx (tmode);
13042 pat = GEN_FCN (icode) (target, op0, op1, op2);
13048 case IX86_BUILTIN_PSHUFW:
13049 case IX86_BUILTIN_PSHUFD:
13050 case IX86_BUILTIN_PSHUFHW:
13051 case IX86_BUILTIN_PSHUFLW:
13052 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13053 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13054 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13055 : CODE_FOR_mmx_pshufw);
13056 arg0 = TREE_VALUE (arglist);
13057 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13058 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13059 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13060 tmode = insn_data[icode].operand[0].mode;
13061 mode1 = insn_data[icode].operand[1].mode;
13062 mode2 = insn_data[icode].operand[2].mode;
13064 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13065 op0 = copy_to_mode_reg (mode1, op0);
13066 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13068 /* @@@ better error message */
13069 error ("mask must be an immediate");
13073 || GET_MODE (target) != tmode
13074 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13075 target = gen_reg_rtx (tmode);
13076 pat = GEN_FCN (icode) (target, op0, op1);
13082 case IX86_BUILTIN_FEMMS:
13083 emit_insn (gen_femms ());
13086 case IX86_BUILTIN_PAVGUSB:
13087 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13089 case IX86_BUILTIN_PF2ID:
13090 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13092 case IX86_BUILTIN_PFACC:
13093 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13095 case IX86_BUILTIN_PFADD:
13096 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13098 case IX86_BUILTIN_PFCMPEQ:
13099 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13101 case IX86_BUILTIN_PFCMPGE:
13102 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13104 case IX86_BUILTIN_PFCMPGT:
13105 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13107 case IX86_BUILTIN_PFMAX:
13108 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13110 case IX86_BUILTIN_PFMIN:
13111 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13113 case IX86_BUILTIN_PFMUL:
13114 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13116 case IX86_BUILTIN_PFRCP:
13117 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13119 case IX86_BUILTIN_PFRCPIT1:
13120 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13122 case IX86_BUILTIN_PFRCPIT2:
13123 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13125 case IX86_BUILTIN_PFRSQIT1:
13126 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13128 case IX86_BUILTIN_PFRSQRT:
13129 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13131 case IX86_BUILTIN_PFSUB:
13132 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13134 case IX86_BUILTIN_PFSUBR:
13135 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13137 case IX86_BUILTIN_PI2FD:
13138 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13140 case IX86_BUILTIN_PMULHRW:
13141 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13143 case IX86_BUILTIN_PF2IW:
13144 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13146 case IX86_BUILTIN_PFNACC:
13147 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13149 case IX86_BUILTIN_PFPNACC:
13150 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13152 case IX86_BUILTIN_PI2FW:
13153 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13155 case IX86_BUILTIN_PSWAPDSI:
13156 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13158 case IX86_BUILTIN_PSWAPDSF:
13159 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13161 case IX86_BUILTIN_SSE_ZERO:
13162 target = gen_reg_rtx (V4SFmode);
13163 emit_insn (gen_sse_clrv4sf (target));
13166 case IX86_BUILTIN_MMX_ZERO:
13167 target = gen_reg_rtx (DImode);
13168 emit_insn (gen_mmx_clrdi (target));
13171 case IX86_BUILTIN_SQRTSD:
13172 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13173 case IX86_BUILTIN_LOADAPD:
13174 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13175 case IX86_BUILTIN_LOADUPD:
13176 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13178 case IX86_BUILTIN_STOREAPD:
13179 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13180 case IX86_BUILTIN_STOREUPD:
13181 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13183 case IX86_BUILTIN_LOADSD:
13184 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13186 case IX86_BUILTIN_STORESD:
13187 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13189 case IX86_BUILTIN_SETPD1:
13190 target = assign_386_stack_local (DFmode, 0);
13191 arg0 = TREE_VALUE (arglist);
13192 emit_move_insn (adjust_address (target, DFmode, 0),
13193 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13194 op0 = gen_reg_rtx (V2DFmode);
13195 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13196 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13199 case IX86_BUILTIN_SETPD:
13200 target = assign_386_stack_local (V2DFmode, 0);
13201 arg0 = TREE_VALUE (arglist);
13202 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13203 emit_move_insn (adjust_address (target, DFmode, 0),
13204 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13205 emit_move_insn (adjust_address (target, DFmode, 8),
13206 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13207 op0 = gen_reg_rtx (V2DFmode);
13208 emit_insn (gen_sse2_movapd (op0, target));
13211 case IX86_BUILTIN_LOADRPD:
13212 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13213 gen_reg_rtx (V2DFmode), 1);
13214 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13217 case IX86_BUILTIN_LOADPD1:
13218 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13219 gen_reg_rtx (V2DFmode), 1);
13220 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13223 case IX86_BUILTIN_STOREPD1:
13224 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13225 case IX86_BUILTIN_STORERPD:
13226 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13228 case IX86_BUILTIN_MFENCE:
13229 emit_insn (gen_sse2_mfence ());
13231 case IX86_BUILTIN_LFENCE:
13232 emit_insn (gen_sse2_lfence ());
13235 case IX86_BUILTIN_CLFLUSH:
13236 arg0 = TREE_VALUE (arglist);
13237 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13238 icode = CODE_FOR_sse2_clflush;
13239 mode0 = insn_data[icode].operand[0].mode;
13240 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13241 op0 = copy_to_mode_reg (mode0, op0);
13243 emit_insn (gen_sse2_clflush (op0));
13246 case IX86_BUILTIN_MOVNTPD:
13247 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13248 case IX86_BUILTIN_MOVNTDQ:
13249 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13250 case IX86_BUILTIN_MOVNTI:
13251 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13257 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13258 if (d->code == fcode)
13260 /* Compares are treated specially. */
13261 if (d->icode == CODE_FOR_maskcmpv4sf3
13262 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13263 || d->icode == CODE_FOR_maskncmpv4sf3
13264 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13265 || d->icode == CODE_FOR_maskcmpv2df3
13266 || d->icode == CODE_FOR_vmmaskcmpv2df3
13267 || d->icode == CODE_FOR_maskncmpv2df3
13268 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13269 return ix86_expand_sse_compare (d, arglist, target);
13271 return ix86_expand_binop_builtin (d->icode, arglist, target);
13274 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13275 if (d->code == fcode)
13276 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13278 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13279 if (d->code == fcode)
13280 return ix86_expand_sse_comi (d, arglist, target);
13282 /* @@@ Should really do something sensible here. */
13286 /* Store OPERAND to the memory after reload is completed. This means
13287 that we can't easily use assign_stack_local. */
13289 ix86_force_to_memory (mode, operand)
13290 enum machine_mode mode;
13294 if (!reload_completed)
13296 if (TARGET_64BIT && TARGET_RED_ZONE)
13298 result = gen_rtx_MEM (mode,
13299 gen_rtx_PLUS (Pmode,
13301 GEN_INT (-RED_ZONE_SIZE)));
13302 emit_move_insn (result, operand);
13304 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13310 operand = gen_lowpart (DImode, operand);
13314 gen_rtx_SET (VOIDmode,
13315 gen_rtx_MEM (DImode,
13316 gen_rtx_PRE_DEC (DImode,
13317 stack_pointer_rtx)),
13323 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13332 split_di (&operand, 1, operands, operands + 1);
13334 gen_rtx_SET (VOIDmode,
13335 gen_rtx_MEM (SImode,
13336 gen_rtx_PRE_DEC (Pmode,
13337 stack_pointer_rtx)),
13340 gen_rtx_SET (VOIDmode,
13341 gen_rtx_MEM (SImode,
13342 gen_rtx_PRE_DEC (Pmode,
13343 stack_pointer_rtx)),
13348 /* It is better to store HImodes as SImodes. */
13349 if (!TARGET_PARTIAL_REG_STALL)
13350 operand = gen_lowpart (SImode, operand);
13354 gen_rtx_SET (VOIDmode,
13355 gen_rtx_MEM (GET_MODE (operand),
13356 gen_rtx_PRE_DEC (SImode,
13357 stack_pointer_rtx)),
13363 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13368 /* Free operand from the memory. */
13370 ix86_free_from_memory (mode)
13371 enum machine_mode mode;
13373 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13377 if (mode == DImode || TARGET_64BIT)
13379 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13383 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13384 to pop or add instruction if registers are available. */
13385 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13386 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13391 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13392 QImode must go into class Q_REGS.
13393 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13394 movdf to do mem-to-mem moves through integer regs. */
13396 ix86_preferred_reload_class (x, class)
13398 enum reg_class class;
13400 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13402 /* SSE can't load any constant directly yet. */
13403 if (SSE_CLASS_P (class))
13405 /* Floats can load 0 and 1. */
13406 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13408 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13409 if (MAYBE_SSE_CLASS_P (class))
13410 return (reg_class_subset_p (class, GENERAL_REGS)
13411 ? GENERAL_REGS : FLOAT_REGS);
13415 /* General regs can load everything. */
13416 if (reg_class_subset_p (class, GENERAL_REGS))
13417 return GENERAL_REGS;
13418 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13419 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13422 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13424 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13429 /* If we are copying between general and FP registers, we need a memory
13430 location. The same is true for SSE and MMX registers.
13432 The macro can't work reliably when one of the CLASSES is class containing
13433 registers from multiple units (SSE, MMX, integer). We avoid this by never
13434 combining those units in single alternative in the machine description.
13435 Ensure that this constraint holds to avoid unexpected surprises.
13437 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13438 enforce these sanity checks. */
13440 ix86_secondary_memory_needed (class1, class2, mode, strict)
13441 enum reg_class class1, class2;
13442 enum machine_mode mode;
13445 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13446 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13447 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13448 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13449 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13450 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13457 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13458 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13459 && (mode) != SImode)
13460 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13461 && (mode) != SImode));
13463 /* Return the cost of moving data from a register in class CLASS1 to
13464 one in class CLASS2.
13466 It is not required that the cost always equal 2 when FROM is the same as TO;
13467 on some machines it is expensive to move between registers if they are not
13468 general registers. */
13470 ix86_register_move_cost (mode, class1, class2)
13471 enum machine_mode mode;
13472 enum reg_class class1, class2;
13474 /* In case we require secondary memory, compute cost of the store followed
13475 by load. In case of copying from general_purpose_register we may emit
13476 multiple stores followed by single load causing memory size mismatch
13477 stall. Count this as arbitarily high cost of 20. */
13478 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13481 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13483 return (MEMORY_MOVE_COST (mode, class1, 0)
13484 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13486 /* Moves between SSE/MMX and integer unit are expensive. */
13487 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13488 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13489 return ix86_cost->mmxsse_to_integer;
13490 if (MAYBE_FLOAT_CLASS_P (class1))
13491 return ix86_cost->fp_move;
13492 if (MAYBE_SSE_CLASS_P (class1))
13493 return ix86_cost->sse_move;
13494 if (MAYBE_MMX_CLASS_P (class1))
13495 return ix86_cost->mmx_move;
13499 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13501 ix86_hard_regno_mode_ok (regno, mode)
13503 enum machine_mode mode;
13505 /* Flags and only flags can only hold CCmode values. */
13506 if (CC_REGNO_P (regno))
13507 return GET_MODE_CLASS (mode) == MODE_CC;
13508 if (GET_MODE_CLASS (mode) == MODE_CC
13509 || GET_MODE_CLASS (mode) == MODE_RANDOM
13510 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13512 if (FP_REGNO_P (regno))
13513 return VALID_FP_MODE_P (mode);
13514 if (SSE_REGNO_P (regno))
13515 return VALID_SSE_REG_MODE (mode);
13516 if (MMX_REGNO_P (regno))
13517 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13518 /* We handle both integer and floats in the general purpose registers.
13519 In future we should be able to handle vector modes as well. */
13520 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13522 /* Take care for QImode values - they can be in non-QI regs, but then
13523 they do cause partial register stalls. */
13524 if (regno < 4 || mode != QImode || TARGET_64BIT)
13526 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13529 /* Return the cost of moving data of mode M between a
13530 register and memory. A value of 2 is the default; this cost is
13531 relative to those in `REGISTER_MOVE_COST'.
13533 If moving between registers and memory is more expensive than
13534 between two registers, you should define this macro to express the
13537 Model also increased moving costs of QImode registers in non
13541 ix86_memory_move_cost (mode, class, in)
13542 enum machine_mode mode;
13543 enum reg_class class;
13546 if (FLOAT_CLASS_P (class))
13564 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13566 if (SSE_CLASS_P (class))
13569 switch (GET_MODE_SIZE (mode))
13583 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13585 if (MMX_CLASS_P (class))
13588 switch (GET_MODE_SIZE (mode))
13599 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13601 switch (GET_MODE_SIZE (mode))
13605 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13606 : ix86_cost->movzbl_load);
13608 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13609 : ix86_cost->int_store[0] + 4);
13612 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13614 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13615 if (mode == TFmode)
13617 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13618 * (int) GET_MODE_SIZE (mode) / 4);
13622 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13624 ix86_svr3_asm_out_constructor (symbol, priority)
13626 int priority ATTRIBUTE_UNUSED;
13629 fputs ("\tpushl $", asm_out_file);
13630 assemble_name (asm_out_file, XSTR (symbol, 0));
13631 fputc ('\n', asm_out_file);
13637 static int current_machopic_label_num;
13639 /* Given a symbol name and its associated stub, write out the
13640 definition of the stub. */
13643 machopic_output_stub (file, symb, stub)
13645 const char *symb, *stub;
13647 unsigned int length;
13648 char *binder_name, *symbol_name, lazy_ptr_name[32];
13649 int label = ++current_machopic_label_num;
13651 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13652 symb = (*targetm.strip_name_encoding) (symb);
13654 length = strlen (stub);
13655 binder_name = alloca (length + 32);
13656 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13658 length = strlen (symb);
13659 symbol_name = alloca (length + 32);
13660 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13662 sprintf (lazy_ptr_name, "L%d$lz", label);
13665 machopic_picsymbol_stub_section ();
13667 machopic_symbol_stub_section ();
13669 fprintf (file, "%s:\n", stub);
13670 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13674 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13675 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13676 fprintf (file, "\tjmp %%edx\n");
13679 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13681 fprintf (file, "%s:\n", binder_name);
13685 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13686 fprintf (file, "\tpushl %%eax\n");
13689 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13691 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13693 machopic_lazy_symbol_ptr_section ();
13694 fprintf (file, "%s:\n", lazy_ptr_name);
13695 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13696 fprintf (file, "\t.long %s\n", binder_name);
13698 #endif /* TARGET_MACHO */
13700 /* Order the registers for register allocator. */
13703 x86_order_regs_for_local_alloc ()
13708 /* First allocate the local general purpose registers. */
13709 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13710 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13711 reg_alloc_order [pos++] = i;
13713 /* Global general purpose registers. */
13714 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13715 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13716 reg_alloc_order [pos++] = i;
13718 /* x87 registers come first in case we are doing FP math
13720 if (!TARGET_SSE_MATH)
13721 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13722 reg_alloc_order [pos++] = i;
13724 /* SSE registers. */
13725 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13726 reg_alloc_order [pos++] = i;
13727 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13728 reg_alloc_order [pos++] = i;
13730 /* x87 registerts. */
13731 if (TARGET_SSE_MATH)
13732 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13733 reg_alloc_order [pos++] = i;
13735 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13736 reg_alloc_order [pos++] = i;
13738 /* Initialize the rest of array as we do not allocate some registers
13740 while (pos < FIRST_PSEUDO_REGISTER)
13741 reg_alloc_order [pos++] = 0;
13745 x86_output_mi_thunk (file, delta, function)
13753 if (ix86_regparm > 0)
13754 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13757 for (; parm; parm = TREE_CHAIN (parm))
13758 if (TREE_VALUE (parm) == void_type_node)
13761 xops[0] = GEN_INT (delta);
13764 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13765 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13766 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13769 fprintf (file, "\tjmp *");
13770 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13771 fprintf (file, "@GOTPCREL(%%rip)\n");
13775 fprintf (file, "\tjmp ");
13776 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13777 fprintf (file, "\n");
13783 xops[1] = gen_rtx_REG (SImode, 0);
13784 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13785 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13787 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13788 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13792 xops[0] = pic_offset_table_rtx;
13793 xops[1] = gen_label_rtx ();
13794 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13796 if (ix86_regparm > 2)
13798 output_asm_insn ("push{l}\t%0", xops);
13799 output_asm_insn ("call\t%P1", xops);
13800 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13801 output_asm_insn ("pop{l}\t%0", xops);
13803 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13804 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13806 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13807 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13808 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13812 fprintf (file, "\tjmp ");
13813 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13814 fprintf (file, "\n");
13820 x86_field_alignment (field, computed)
13824 enum machine_mode mode;
13825 tree type = TREE_TYPE (field);
13827 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13829 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13830 ? get_inner_array_type (type) : type);
13831 if (mode == DFmode || mode == DCmode
13832 || GET_MODE_CLASS (mode) == MODE_INT
13833 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13834 return MIN (32, computed);
13838 /* Implement machine specific optimizations.
13839 At the moment we implement single transformation: AMD Athlon works faster
13840 when RET is not destination of conditional jump or directly preceeded
13841 by other jump instruction. We avoid the penalty by inserting NOP just
13842 before the RET instructions in such cases. */
13844 x86_machine_dependent_reorg (first)
13845 rtx first ATTRIBUTE_UNUSED;
13849 if (!TARGET_ATHLON || !optimize || optimize_size)
13851 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13853 basic_block bb = e->src;
13856 bool insert = false;
13858 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13860 prev = prev_nonnote_insn (ret);
13861 if (prev && GET_CODE (prev) == CODE_LABEL)
13864 for (e = bb->pred; e; e = e->pred_next)
13865 if (EDGE_FREQUENCY (e) && e->src->index > 0
13866 && !(e->flags & EDGE_FALLTHRU))
13871 prev = prev_real_insn (ret);
13872 if (prev && GET_CODE (prev) == JUMP_INSN
13873 && any_condjump_p (prev))
13877 emit_insn_before (gen_nop (), ret);
13881 #include "gt-i386.h"