1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs *ix86_cost = &pentium_cost;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
403 #define FAST_PROLOGUE_INSN_COUNT 30
405 /* Set by prologue expander and used by epilogue expander to determine
407 static int use_fast_prologue_epilogue;
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
420 AREG, DREG, CREG, BREG,
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
440 /* The "default" register map used in 32bit mode. */
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
453 static int const x86_64_int_parameter_registers[6] =
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
459 static int const x86_64_int_return_registers[4] =
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
547 /* The encoding characters for the four TLS models present in ELF. */
549 static char const tls_model_chars[] = " GLil";
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
567 /* Structure describing stack frame layout.
568 Stack grows downward:
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
581 > to_allocate <- FRAME_POINTER
593 int outgoing_arguments_size;
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
608 enum cmodel ix86_cmodel;
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
635 /* ix86_regparm_string as a number */
638 /* Alignment to use for loops and jumps: */
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
707 rtx base, index, disp;
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
761 enum x86_64_reg_class
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
851 struct gcc_target targetm = TARGET_INITIALIZER;
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
881 const processor_target_table[PROCESSOR_max] =
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
902 PTA_PREFETCH_SSE = 8,
907 const processor_alias_table[] =
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
915 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
916 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
917 {"i686", PROCESSOR_PENTIUMPRO, 0},
918 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
919 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
920 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
921 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
922 PTA_MMX | PTA_PREFETCH_SSE},
923 {"k6", PROCESSOR_K6, PTA_MMX},
924 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
925 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
926 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
928 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
929 | PTA_3DNOW | PTA_3DNOW_A},
930 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
931 | PTA_3DNOW_A | PTA_SSE},
932 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
933 | PTA_3DNOW_A | PTA_SSE},
934 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
935 | PTA_3DNOW_A | PTA_SSE},
938 int const pta_size = ARRAY_SIZE (processor_alias_table);
940 /* By default our XFmode is the 80-bit extended format. If we have
941 use TFmode instead, it's also the 80-bit format, but with padding. */
942 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
943 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
945 #ifdef SUBTARGET_OVERRIDE_OPTIONS
946 SUBTARGET_OVERRIDE_OPTIONS;
949 if (!ix86_cpu_string && ix86_arch_string)
950 ix86_cpu_string = ix86_arch_string;
951 if (!ix86_cpu_string)
952 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
953 if (!ix86_arch_string)
954 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
956 if (ix86_cmodel_string != 0)
958 if (!strcmp (ix86_cmodel_string, "small"))
959 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
961 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
962 else if (!strcmp (ix86_cmodel_string, "32"))
964 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
965 ix86_cmodel = CM_KERNEL;
966 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
967 ix86_cmodel = CM_MEDIUM;
968 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
969 ix86_cmodel = CM_LARGE;
971 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
977 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
979 if (ix86_asm_string != 0)
981 if (!strcmp (ix86_asm_string, "intel"))
982 ix86_asm_dialect = ASM_INTEL;
983 else if (!strcmp (ix86_asm_string, "att"))
984 ix86_asm_dialect = ASM_ATT;
986 error ("bad value (%s) for -masm= switch", ix86_asm_string);
988 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
989 error ("code model `%s' not supported in the %s bit mode",
990 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
991 if (ix86_cmodel == CM_LARGE)
992 sorry ("code model `large' not supported yet");
993 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
994 sorry ("%i-bit mode not compiled in",
995 (target_flags & MASK_64BIT) ? 64 : 32);
997 for (i = 0; i < pta_size; i++)
998 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1000 ix86_arch = processor_alias_table[i].processor;
1001 /* Default cpu tuning to the architecture. */
1002 ix86_cpu = ix86_arch;
1003 if (processor_alias_table[i].flags & PTA_MMX
1004 && !(target_flags_explicit & MASK_MMX))
1005 target_flags |= MASK_MMX;
1006 if (processor_alias_table[i].flags & PTA_3DNOW
1007 && !(target_flags_explicit & MASK_3DNOW))
1008 target_flags |= MASK_3DNOW;
1009 if (processor_alias_table[i].flags & PTA_3DNOW_A
1010 && !(target_flags_explicit & MASK_3DNOW_A))
1011 target_flags |= MASK_3DNOW_A;
1012 if (processor_alias_table[i].flags & PTA_SSE
1013 && !(target_flags_explicit & MASK_SSE))
1014 target_flags |= MASK_SSE;
1015 if (processor_alias_table[i].flags & PTA_SSE2
1016 && !(target_flags_explicit & MASK_SSE2))
1017 target_flags |= MASK_SSE2;
1018 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1019 x86_prefetch_sse = true;
1024 error ("bad value (%s) for -march= switch", ix86_arch_string);
1026 for (i = 0; i < pta_size; i++)
1027 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1029 ix86_cpu = processor_alias_table[i].processor;
1032 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1033 x86_prefetch_sse = true;
1035 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1038 ix86_cost = &size_cost;
1040 ix86_cost = processor_target_table[ix86_cpu].cost;
1041 target_flags |= processor_target_table[ix86_cpu].target_enable;
1042 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1044 /* Arrange to set up i386_stack_locals for all functions. */
1045 init_machine_status = ix86_init_machine_status;
1047 /* Validate -mregparm= value. */
1048 if (ix86_regparm_string)
1050 i = atoi (ix86_regparm_string);
1051 if (i < 0 || i > REGPARM_MAX)
1052 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1058 ix86_regparm = REGPARM_MAX;
1060 /* If the user has provided any of the -malign-* options,
1061 warn and use that value only if -falign-* is not set.
1062 Remove this code in GCC 3.2 or later. */
1063 if (ix86_align_loops_string)
1065 warning ("-malign-loops is obsolete, use -falign-loops");
1066 if (align_loops == 0)
1068 i = atoi (ix86_align_loops_string);
1069 if (i < 0 || i > MAX_CODE_ALIGN)
1070 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1072 align_loops = 1 << i;
1076 if (ix86_align_jumps_string)
1078 warning ("-malign-jumps is obsolete, use -falign-jumps");
1079 if (align_jumps == 0)
1081 i = atoi (ix86_align_jumps_string);
1082 if (i < 0 || i > MAX_CODE_ALIGN)
1083 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1085 align_jumps = 1 << i;
1089 if (ix86_align_funcs_string)
1091 warning ("-malign-functions is obsolete, use -falign-functions");
1092 if (align_functions == 0)
1094 i = atoi (ix86_align_funcs_string);
1095 if (i < 0 || i > MAX_CODE_ALIGN)
1096 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1098 align_functions = 1 << i;
1102 /* Default align_* from the processor table. */
1103 if (align_loops == 0)
1105 align_loops = processor_target_table[ix86_cpu].align_loop;
1106 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1108 if (align_jumps == 0)
1110 align_jumps = processor_target_table[ix86_cpu].align_jump;
1111 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1113 if (align_functions == 0)
1115 align_functions = processor_target_table[ix86_cpu].align_func;
1118 /* Validate -mpreferred-stack-boundary= value, or provide default.
1119 The default of 128 bits is for Pentium III's SSE __m128, but we
1120 don't want additional code to keep the stack aligned when
1121 optimizing for code size. */
1122 ix86_preferred_stack_boundary = (optimize_size
1123 ? TARGET_64BIT ? 128 : 32
1125 if (ix86_preferred_stack_boundary_string)
1127 i = atoi (ix86_preferred_stack_boundary_string);
1128 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1129 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1130 TARGET_64BIT ? 4 : 2);
1132 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1135 /* Validate -mbranch-cost= value, or provide default. */
1136 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1137 if (ix86_branch_cost_string)
1139 i = atoi (ix86_branch_cost_string);
1141 error ("-mbranch-cost=%d is not between 0 and 5", i);
1143 ix86_branch_cost = i;
1146 if (ix86_tls_dialect_string)
1148 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1149 ix86_tls_dialect = TLS_DIALECT_GNU;
1150 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1151 ix86_tls_dialect = TLS_DIALECT_SUN;
1153 error ("bad value (%s) for -mtls-dialect= switch",
1154 ix86_tls_dialect_string);
1158 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1160 /* Keep nonleaf frame pointers. */
1161 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1162 flag_omit_frame_pointer = 1;
1164 /* If we're doing fast math, we don't care about comparison order
1165 wrt NaNs. This lets us use a shorter comparison sequence. */
1166 if (flag_unsafe_math_optimizations)
1167 target_flags &= ~MASK_IEEE_FP;
1169 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1170 since the insns won't need emulation. */
1171 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1172 target_flags &= ~MASK_NO_FANCY_MATH_387;
1176 if (TARGET_ALIGN_DOUBLE)
1177 error ("-malign-double makes no sense in the 64bit mode");
1179 error ("-mrtd calling convention not supported in the 64bit mode");
1180 /* Enable by default the SSE and MMX builtins. */
1181 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1182 ix86_fpmath = FPMATH_SSE;
1185 ix86_fpmath = FPMATH_387;
1187 if (ix86_fpmath_string != 0)
1189 if (! strcmp (ix86_fpmath_string, "387"))
1190 ix86_fpmath = FPMATH_387;
1191 else if (! strcmp (ix86_fpmath_string, "sse"))
1195 warning ("SSE instruction set disabled, using 387 arithmetics");
1196 ix86_fpmath = FPMATH_387;
1199 ix86_fpmath = FPMATH_SSE;
1201 else if (! strcmp (ix86_fpmath_string, "387,sse")
1202 || ! strcmp (ix86_fpmath_string, "sse,387"))
1206 warning ("SSE instruction set disabled, using 387 arithmetics");
1207 ix86_fpmath = FPMATH_387;
1209 else if (!TARGET_80387)
1211 warning ("387 instruction set disabled, using SSE arithmetics");
1212 ix86_fpmath = FPMATH_SSE;
1215 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1218 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1221 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1225 target_flags |= MASK_MMX;
1226 x86_prefetch_sse = true;
1229 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1232 target_flags |= MASK_MMX;
1233 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1234 extensions it adds. */
1235 if (x86_3dnow_a & (1 << ix86_arch))
1236 target_flags |= MASK_3DNOW_A;
1238 if ((x86_accumulate_outgoing_args & CPUMASK)
1239 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1241 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1243 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1246 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1247 p = strchr (internal_label_prefix, 'X');
1248 internal_label_prefix_len = p - internal_label_prefix;
1254 optimization_options (level, size)
1256 int size ATTRIBUTE_UNUSED;
1258 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1259 make the problem with not enough registers even worse. */
1260 #ifdef INSN_SCHEDULING
1262 flag_schedule_insns = 0;
1264 if (TARGET_64BIT && optimize >= 1)
1265 flag_omit_frame_pointer = 1;
1268 flag_pcc_struct_return = 0;
1269 flag_asynchronous_unwind_tables = 1;
1272 flag_omit_frame_pointer = 0;
1275 /* Table of valid machine attributes. */
1276 const struct attribute_spec ix86_attribute_table[] =
1278 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1279 /* Stdcall attribute says callee is responsible for popping arguments
1280 if they are not variable. */
1281 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1282 /* Cdecl attribute says the callee is a normal C declaration */
1283 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1284 /* Regparm attribute specifies how many integer arguments are to be
1285 passed in registers. */
1286 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1287 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1288 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1289 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1290 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1292 { NULL, 0, 0, false, false, false, NULL }
1295 /* Handle a "cdecl" or "stdcall" attribute;
1296 arguments as in struct attribute_spec.handler. */
1298 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1301 tree args ATTRIBUTE_UNUSED;
1302 int flags ATTRIBUTE_UNUSED;
1305 if (TREE_CODE (*node) != FUNCTION_TYPE
1306 && TREE_CODE (*node) != METHOD_TYPE
1307 && TREE_CODE (*node) != FIELD_DECL
1308 && TREE_CODE (*node) != TYPE_DECL)
1310 warning ("`%s' attribute only applies to functions",
1311 IDENTIFIER_POINTER (name));
1312 *no_add_attrs = true;
1317 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1318 *no_add_attrs = true;
1324 /* Handle a "regparm" attribute;
1325 arguments as in struct attribute_spec.handler. */
1327 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1331 int flags ATTRIBUTE_UNUSED;
1334 if (TREE_CODE (*node) != FUNCTION_TYPE
1335 && TREE_CODE (*node) != METHOD_TYPE
1336 && TREE_CODE (*node) != FIELD_DECL
1337 && TREE_CODE (*node) != TYPE_DECL)
1339 warning ("`%s' attribute only applies to functions",
1340 IDENTIFIER_POINTER (name));
1341 *no_add_attrs = true;
1347 cst = TREE_VALUE (args);
1348 if (TREE_CODE (cst) != INTEGER_CST)
1350 warning ("`%s' attribute requires an integer constant argument",
1351 IDENTIFIER_POINTER (name));
1352 *no_add_attrs = true;
1354 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1356 warning ("argument to `%s' attribute larger than %d",
1357 IDENTIFIER_POINTER (name), REGPARM_MAX);
1358 *no_add_attrs = true;
1365 /* Return 0 if the attributes for two types are incompatible, 1 if they
1366 are compatible, and 2 if they are nearly compatible (which causes a
1367 warning to be generated). */
1370 ix86_comp_type_attributes (type1, type2)
1374 /* Check for mismatch of non-default calling convention. */
1375 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1377 if (TREE_CODE (type1) != FUNCTION_TYPE)
1380 /* Check for mismatched return types (cdecl vs stdcall). */
1381 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1382 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1387 /* Value is the number of bytes of arguments automatically
1388 popped when returning from a subroutine call.
1389 FUNDECL is the declaration node of the function (as a tree),
1390 FUNTYPE is the data type of the function (as a tree),
1391 or for a library call it is an identifier node for the subroutine name.
1392 SIZE is the number of bytes of arguments passed on the stack.
1394 On the 80386, the RTD insn may be used to pop them if the number
1395 of args is fixed, but if the number is variable then the caller
1396 must pop them all. RTD can't be used for library calls now
1397 because the library is compiled with the Unix compiler.
1398 Use of RTD is a selectable option, since it is incompatible with
1399 standard Unix calling sequences. If the option is not selected,
1400 the caller must always pop the args.
1402 The attribute stdcall is equivalent to RTD on a per module basis. */
1405 ix86_return_pops_args (fundecl, funtype, size)
1410 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1412 /* Cdecl functions override -mrtd, and never pop the stack. */
1413 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1415 /* Stdcall functions will pop the stack if not variable args. */
1416 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1420 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1421 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1422 == void_type_node)))
1426 /* Lose any fake structure return argument if it is passed on the stack. */
1427 if (aggregate_value_p (TREE_TYPE (funtype))
1430 int nregs = ix86_regparm;
1434 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1437 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1441 return GET_MODE_SIZE (Pmode);
1447 /* Argument support functions. */
1449 /* Return true when register may be used to pass function parameters. */
1451 ix86_function_arg_regno_p (regno)
1456 return (regno < REGPARM_MAX
1457 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1458 if (SSE_REGNO_P (regno) && TARGET_SSE)
1460 /* RAX is used as hidden argument to va_arg functions. */
1463 for (i = 0; i < REGPARM_MAX; i++)
1464 if (regno == x86_64_int_parameter_registers[i])
1469 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1470 for a call to a function whose data type is FNTYPE.
1471 For a library call, FNTYPE is 0. */
1474 init_cumulative_args (cum, fntype, libname)
1475 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1476 tree fntype; /* tree ptr for function decl */
1477 rtx libname; /* SYMBOL_REF of library name or 0 */
1479 static CUMULATIVE_ARGS zero_cum;
1480 tree param, next_param;
1482 if (TARGET_DEBUG_ARG)
1484 fprintf (stderr, "\ninit_cumulative_args (");
1486 fprintf (stderr, "fntype code = %s, ret code = %s",
1487 tree_code_name[(int) TREE_CODE (fntype)],
1488 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1490 fprintf (stderr, "no fntype");
1493 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1498 /* Set up the number of registers to use for passing arguments. */
1499 cum->nregs = ix86_regparm;
1500 cum->sse_nregs = SSE_REGPARM_MAX;
1501 if (fntype && !TARGET_64BIT)
1503 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1506 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1508 cum->maybe_vaarg = false;
1510 /* Determine if this function has variable arguments. This is
1511 indicated by the last argument being 'void_type_mode' if there
1512 are no variable arguments. If there are variable arguments, then
1513 we won't pass anything in registers */
1517 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1518 param != 0; param = next_param)
1520 next_param = TREE_CHAIN (param);
1521 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1525 cum->maybe_vaarg = true;
1529 if ((!fntype && !libname)
1530 || (fntype && !TYPE_ARG_TYPES (fntype)))
1531 cum->maybe_vaarg = 1;
1533 if (TARGET_DEBUG_ARG)
1534 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1539 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1540 of this code is to classify each 8bytes of incoming argument by the register
1541 class and assign registers accordingly. */
1543 /* Return the union class of CLASS1 and CLASS2.
1544 See the x86-64 PS ABI for details. */
1546 static enum x86_64_reg_class
1547 merge_classes (class1, class2)
1548 enum x86_64_reg_class class1, class2;
1550 /* Rule #1: If both classes are equal, this is the resulting class. */
1551 if (class1 == class2)
1554 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1556 if (class1 == X86_64_NO_CLASS)
1558 if (class2 == X86_64_NO_CLASS)
1561 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1562 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1563 return X86_64_MEMORY_CLASS;
1565 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1566 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1567 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1568 return X86_64_INTEGERSI_CLASS;
1569 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1570 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1571 return X86_64_INTEGER_CLASS;
1573 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1574 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1575 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1576 return X86_64_MEMORY_CLASS;
1578 /* Rule #6: Otherwise class SSE is used. */
1579 return X86_64_SSE_CLASS;
1582 /* Classify the argument of type TYPE and mode MODE.
1583 CLASSES will be filled by the register class used to pass each word
1584 of the operand. The number of words is returned. In case the parameter
1585 should be passed in memory, 0 is returned. As a special case for zero
1586 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1588 BIT_OFFSET is used internally for handling records and specifies offset
1589 of the offset in bits modulo 256 to avoid overflow cases.
1591 See the x86-64 PS ABI for details.
1595 classify_argument (mode, type, classes, bit_offset)
1596 enum machine_mode mode;
1598 enum x86_64_reg_class classes[MAX_CLASSES];
1602 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1603 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1605 /* Variable sized entities are always passed/returned in memory. */
1609 if (type && AGGREGATE_TYPE_P (type))
1613 enum x86_64_reg_class subclasses[MAX_CLASSES];
1615 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1619 for (i = 0; i < words; i++)
1620 classes[i] = X86_64_NO_CLASS;
1622 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1623 signalize memory class, so handle it as special case. */
1626 classes[0] = X86_64_NO_CLASS;
1630 /* Classify each field of record and merge classes. */
1631 if (TREE_CODE (type) == RECORD_TYPE)
1633 /* For classes first merge in the field of the subclasses. */
1634 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1636 tree bases = TYPE_BINFO_BASETYPES (type);
1637 int n_bases = TREE_VEC_LENGTH (bases);
1640 for (i = 0; i < n_bases; ++i)
1642 tree binfo = TREE_VEC_ELT (bases, i);
1644 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1645 tree type = BINFO_TYPE (binfo);
1647 num = classify_argument (TYPE_MODE (type),
1649 (offset + bit_offset) % 256);
1652 for (i = 0; i < num; i++)
1654 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1656 merge_classes (subclasses[i], classes[i + pos]);
1660 /* And now merge the fields of structure. */
1661 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1663 if (TREE_CODE (field) == FIELD_DECL)
1667 /* Bitfields are always classified as integer. Handle them
1668 early, since later code would consider them to be
1669 misaligned integers. */
1670 if (DECL_BIT_FIELD (field))
1672 for (i = int_bit_position (field) / 8 / 8;
1673 i < (int_bit_position (field)
1674 + tree_low_cst (DECL_SIZE (field), 0)
1677 merge_classes (X86_64_INTEGER_CLASS,
1682 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1683 TREE_TYPE (field), subclasses,
1684 (int_bit_position (field)
1685 + bit_offset) % 256);
1688 for (i = 0; i < num; i++)
1691 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1693 merge_classes (subclasses[i], classes[i + pos]);
1699 /* Arrays are handled as small records. */
1700 else if (TREE_CODE (type) == ARRAY_TYPE)
1703 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1704 TREE_TYPE (type), subclasses, bit_offset);
1708 /* The partial classes are now full classes. */
1709 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1710 subclasses[0] = X86_64_SSE_CLASS;
1711 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1712 subclasses[0] = X86_64_INTEGER_CLASS;
1714 for (i = 0; i < words; i++)
1715 classes[i] = subclasses[i % num];
1717 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1718 else if (TREE_CODE (type) == UNION_TYPE
1719 || TREE_CODE (type) == QUAL_UNION_TYPE)
1721 /* For classes first merge in the field of the subclasses. */
1722 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1724 tree bases = TYPE_BINFO_BASETYPES (type);
1725 int n_bases = TREE_VEC_LENGTH (bases);
1728 for (i = 0; i < n_bases; ++i)
1730 tree binfo = TREE_VEC_ELT (bases, i);
1732 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1733 tree type = BINFO_TYPE (binfo);
1735 num = classify_argument (TYPE_MODE (type),
1737 (offset + (bit_offset % 64)) % 256);
1740 for (i = 0; i < num; i++)
1742 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1744 merge_classes (subclasses[i], classes[i + pos]);
1748 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1750 if (TREE_CODE (field) == FIELD_DECL)
1753 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1754 TREE_TYPE (field), subclasses,
1758 for (i = 0; i < num; i++)
1759 classes[i] = merge_classes (subclasses[i], classes[i]);
1766 /* Final merger cleanup. */
1767 for (i = 0; i < words; i++)
1769 /* If one class is MEMORY, everything should be passed in
1771 if (classes[i] == X86_64_MEMORY_CLASS)
1774 /* The X86_64_SSEUP_CLASS should be always preceded by
1775 X86_64_SSE_CLASS. */
1776 if (classes[i] == X86_64_SSEUP_CLASS
1777 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1778 classes[i] = X86_64_SSE_CLASS;
1780 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1781 if (classes[i] == X86_64_X87UP_CLASS
1782 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1783 classes[i] = X86_64_SSE_CLASS;
1788 /* Compute alignment needed. We align all types to natural boundaries with
1789 exception of XFmode that is aligned to 64bits. */
1790 if (mode != VOIDmode && mode != BLKmode)
1792 int mode_alignment = GET_MODE_BITSIZE (mode);
1795 mode_alignment = 128;
1796 else if (mode == XCmode)
1797 mode_alignment = 256;
1798 /* Misaligned fields are always returned in memory. */
1799 if (bit_offset % mode_alignment)
1803 /* Classification of atomic types. */
1813 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1814 classes[0] = X86_64_INTEGERSI_CLASS;
1816 classes[0] = X86_64_INTEGER_CLASS;
1820 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1823 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1824 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1827 if (!(bit_offset % 64))
1828 classes[0] = X86_64_SSESF_CLASS;
1830 classes[0] = X86_64_SSE_CLASS;
1833 classes[0] = X86_64_SSEDF_CLASS;
1836 classes[0] = X86_64_X87_CLASS;
1837 classes[1] = X86_64_X87UP_CLASS;
1840 classes[0] = X86_64_X87_CLASS;
1841 classes[1] = X86_64_X87UP_CLASS;
1842 classes[2] = X86_64_X87_CLASS;
1843 classes[3] = X86_64_X87UP_CLASS;
1846 classes[0] = X86_64_SSEDF_CLASS;
1847 classes[1] = X86_64_SSEDF_CLASS;
1850 classes[0] = X86_64_SSE_CLASS;
1858 classes[0] = X86_64_SSE_CLASS;
1859 classes[1] = X86_64_SSEUP_CLASS;
1865 classes[0] = X86_64_SSE_CLASS;
1875 /* Examine the argument and return set number of register required in each
1876 class. Return 0 iff parameter should be passed in memory. */
1878 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1879 enum machine_mode mode;
1881 int *int_nregs, *sse_nregs;
1884 enum x86_64_reg_class class[MAX_CLASSES];
1885 int n = classify_argument (mode, type, class, 0);
1891 for (n--; n >= 0; n--)
1894 case X86_64_INTEGER_CLASS:
1895 case X86_64_INTEGERSI_CLASS:
1898 case X86_64_SSE_CLASS:
1899 case X86_64_SSESF_CLASS:
1900 case X86_64_SSEDF_CLASS:
1903 case X86_64_NO_CLASS:
1904 case X86_64_SSEUP_CLASS:
1906 case X86_64_X87_CLASS:
1907 case X86_64_X87UP_CLASS:
1911 case X86_64_MEMORY_CLASS:
1916 /* Construct container for the argument used by GCC interface. See
1917 FUNCTION_ARG for the detailed description. */
1919 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1920 enum machine_mode mode;
1923 int nintregs, nsseregs;
1927 enum machine_mode tmpmode;
1929 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1930 enum x86_64_reg_class class[MAX_CLASSES];
1934 int needed_sseregs, needed_intregs;
1935 rtx exp[MAX_CLASSES];
1938 n = classify_argument (mode, type, class, 0);
1939 if (TARGET_DEBUG_ARG)
1942 fprintf (stderr, "Memory class\n");
1945 fprintf (stderr, "Classes:");
1946 for (i = 0; i < n; i++)
1948 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1950 fprintf (stderr, "\n");
1955 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1957 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1960 /* First construct simple cases. Avoid SCmode, since we want to use
1961 single register to pass this type. */
1962 if (n == 1 && mode != SCmode)
1965 case X86_64_INTEGER_CLASS:
1966 case X86_64_INTEGERSI_CLASS:
1967 return gen_rtx_REG (mode, intreg[0]);
1968 case X86_64_SSE_CLASS:
1969 case X86_64_SSESF_CLASS:
1970 case X86_64_SSEDF_CLASS:
1971 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1972 case X86_64_X87_CLASS:
1973 return gen_rtx_REG (mode, FIRST_STACK_REG);
1974 case X86_64_NO_CLASS:
1975 /* Zero sized array, struct or class. */
1980 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1981 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1983 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1984 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1985 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1986 && class[1] == X86_64_INTEGER_CLASS
1987 && (mode == CDImode || mode == TImode)
1988 && intreg[0] + 1 == intreg[1])
1989 return gen_rtx_REG (mode, intreg[0]);
1991 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1992 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1993 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1995 /* Otherwise figure out the entries of the PARALLEL. */
1996 for (i = 0; i < n; i++)
2000 case X86_64_NO_CLASS:
2002 case X86_64_INTEGER_CLASS:
2003 case X86_64_INTEGERSI_CLASS:
2004 /* Merge TImodes on aligned occassions here too. */
2005 if (i * 8 + 8 > bytes)
2006 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2007 else if (class[i] == X86_64_INTEGERSI_CLASS)
2011 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2012 if (tmpmode == BLKmode)
2014 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2015 gen_rtx_REG (tmpmode, *intreg),
2019 case X86_64_SSESF_CLASS:
2020 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2021 gen_rtx_REG (SFmode,
2022 SSE_REGNO (sse_regno)),
2026 case X86_64_SSEDF_CLASS:
2027 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2028 gen_rtx_REG (DFmode,
2029 SSE_REGNO (sse_regno)),
2033 case X86_64_SSE_CLASS:
2034 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2035 tmpmode = TImode, i++;
2038 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2039 gen_rtx_REG (tmpmode,
2040 SSE_REGNO (sse_regno)),
2048 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2049 for (i = 0; i < nexps; i++)
2050 XVECEXP (ret, 0, i) = exp [i];
2054 /* Update the data in CUM to advance over an argument
2055 of mode MODE and data type TYPE.
2056 (TYPE is null for libcalls where that information may not be available.) */
2059 function_arg_advance (cum, mode, type, named)
2060 CUMULATIVE_ARGS *cum; /* current arg information */
2061 enum machine_mode mode; /* current arg mode */
2062 tree type; /* type of the argument or 0 if lib support */
2063 int named; /* whether or not the argument was named */
2066 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2067 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2069 if (TARGET_DEBUG_ARG)
2071 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2072 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2075 int int_nregs, sse_nregs;
2076 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2077 cum->words += words;
2078 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2080 cum->nregs -= int_nregs;
2081 cum->sse_nregs -= sse_nregs;
2082 cum->regno += int_nregs;
2083 cum->sse_regno += sse_nregs;
2086 cum->words += words;
2090 if (TARGET_SSE && mode == TImode)
2092 cum->sse_words += words;
2093 cum->sse_nregs -= 1;
2094 cum->sse_regno += 1;
2095 if (cum->sse_nregs <= 0)
2103 cum->words += words;
2104 cum->nregs -= words;
2105 cum->regno += words;
2107 if (cum->nregs <= 0)
2117 /* Define where to put the arguments to a function.
2118 Value is zero to push the argument on the stack,
2119 or a hard register in which to store the argument.
2121 MODE is the argument's machine mode.
2122 TYPE is the data type of the argument (as a tree).
2123 This is null for libcalls where that information may
2125 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2126 the preceding args and about the function being called.
2127 NAMED is nonzero if this argument is a named parameter
2128 (otherwise it is an extra parameter matching an ellipsis). */
2131 function_arg (cum, mode, type, named)
2132 CUMULATIVE_ARGS *cum; /* current arg information */
2133 enum machine_mode mode; /* current arg mode */
2134 tree type; /* type of the argument or 0 if lib support */
2135 int named; /* != 0 for normal args, == 0 for ... args */
2139 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2140 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2142 /* Handle an hidden AL argument containing number of registers for varargs
2143 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2145 if (mode == VOIDmode)
2148 return GEN_INT (cum->maybe_vaarg
2149 ? (cum->sse_nregs < 0
2157 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2158 &x86_64_int_parameter_registers [cum->regno],
2163 /* For now, pass fp/complex values on the stack. */
2172 if (words <= cum->nregs)
2173 ret = gen_rtx_REG (mode, cum->regno);
2177 ret = gen_rtx_REG (mode, cum->sse_regno);
2181 if (TARGET_DEBUG_ARG)
2184 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2185 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2188 print_simple_rtl (stderr, ret);
2190 fprintf (stderr, ", stack");
2192 fprintf (stderr, " )\n");
2198 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2202 ix86_function_arg_boundary (mode, type)
2203 enum machine_mode mode;
2208 return PARM_BOUNDARY;
2210 align = TYPE_ALIGN (type);
2212 align = GET_MODE_ALIGNMENT (mode);
2213 if (align < PARM_BOUNDARY)
2214 align = PARM_BOUNDARY;
2220 /* Return true if N is a possible register number of function value. */
2222 ix86_function_value_regno_p (regno)
2227 return ((regno) == 0
2228 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2229 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2231 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2232 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2233 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2236 /* Define how to find the value returned by a function.
2237 VALTYPE is the data type of the value (as a tree).
2238 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2239 otherwise, FUNC is 0. */
2241 ix86_function_value (valtype)
2246 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2247 REGPARM_MAX, SSE_REGPARM_MAX,
2248 x86_64_int_return_registers, 0);
2249 /* For zero sized structures, construct_continer return NULL, but we need
2250 to keep rest of compiler happy by returning meaningfull value. */
2252 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2256 return gen_rtx_REG (TYPE_MODE (valtype),
2257 ix86_value_regno (TYPE_MODE (valtype)));
2260 /* Return false iff type is returned in memory. */
2262 ix86_return_in_memory (type)
2265 int needed_intregs, needed_sseregs;
2268 return !examine_argument (TYPE_MODE (type), type, 1,
2269 &needed_intregs, &needed_sseregs);
2273 if (TYPE_MODE (type) == BLKmode
2274 || (VECTOR_MODE_P (TYPE_MODE (type))
2275 && int_size_in_bytes (type) == 8)
2276 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2277 && TYPE_MODE (type) != TFmode
2278 && !VECTOR_MODE_P (TYPE_MODE (type))))
2284 /* Define how to find the value returned by a library function
2285 assuming the value has mode MODE. */
2287 ix86_libcall_value (mode)
2288 enum machine_mode mode;
2298 return gen_rtx_REG (mode, FIRST_SSE_REG);
2301 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2303 return gen_rtx_REG (mode, 0);
2307 return gen_rtx_REG (mode, ix86_value_regno (mode));
2310 /* Given a mode, return the register to use for a return value. */
2313 ix86_value_regno (mode)
2314 enum machine_mode mode;
2316 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2317 return FIRST_FLOAT_REG;
2318 if (mode == TImode || VECTOR_MODE_P (mode))
2319 return FIRST_SSE_REG;
2323 /* Create the va_list data type. */
2326 ix86_build_va_list ()
2328 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2330 /* For i386 we use plain pointer to argument area. */
2332 return build_pointer_type (char_type_node);
2334 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2335 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2337 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2338 unsigned_type_node);
2339 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2340 unsigned_type_node);
2341 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2343 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2346 DECL_FIELD_CONTEXT (f_gpr) = record;
2347 DECL_FIELD_CONTEXT (f_fpr) = record;
2348 DECL_FIELD_CONTEXT (f_ovf) = record;
2349 DECL_FIELD_CONTEXT (f_sav) = record;
2351 TREE_CHAIN (record) = type_decl;
2352 TYPE_NAME (record) = type_decl;
2353 TYPE_FIELDS (record) = f_gpr;
2354 TREE_CHAIN (f_gpr) = f_fpr;
2355 TREE_CHAIN (f_fpr) = f_ovf;
2356 TREE_CHAIN (f_ovf) = f_sav;
2358 layout_type (record);
2360 /* The correct type is an array type of one element. */
2361 return build_array_type (record, build_index_type (size_zero_node));
2364 /* Perform any needed actions needed for a function that is receiving a
2365 variable number of arguments.
2369 MODE and TYPE are the mode and type of the current parameter.
2371 PRETEND_SIZE is a variable that should be set to the amount of stack
2372 that must be pushed by the prolog to pretend that our caller pushed
2375 Normally, this macro will push all remaining incoming registers on the
2376 stack and set PRETEND_SIZE to the length of the registers pushed. */
2379 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2380 CUMULATIVE_ARGS *cum;
2381 enum machine_mode mode;
2383 int *pretend_size ATTRIBUTE_UNUSED;
2387 CUMULATIVE_ARGS next_cum;
2388 rtx save_area = NULL_RTX, mem;
2401 /* Indicate to allocate space on the stack for varargs save area. */
2402 ix86_save_varrargs_registers = 1;
2404 fntype = TREE_TYPE (current_function_decl);
2405 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2406 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2407 != void_type_node));
2409 /* For varargs, we do not want to skip the dummy va_dcl argument.
2410 For stdargs, we do want to skip the last named argument. */
2413 function_arg_advance (&next_cum, mode, type, 1);
2416 save_area = frame_pointer_rtx;
2418 set = get_varargs_alias_set ();
2420 for (i = next_cum.regno; i < ix86_regparm; i++)
2422 mem = gen_rtx_MEM (Pmode,
2423 plus_constant (save_area, i * UNITS_PER_WORD));
2424 set_mem_alias_set (mem, set);
2425 emit_move_insn (mem, gen_rtx_REG (Pmode,
2426 x86_64_int_parameter_registers[i]));
2429 if (next_cum.sse_nregs)
2431 /* Now emit code to save SSE registers. The AX parameter contains number
2432 of SSE parameter regsiters used to call this function. We use
2433 sse_prologue_save insn template that produces computed jump across
2434 SSE saves. We need some preparation work to get this working. */
2436 label = gen_label_rtx ();
2437 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2439 /* Compute address to jump to :
2440 label - 5*eax + nnamed_sse_arguments*5 */
2441 tmp_reg = gen_reg_rtx (Pmode);
2442 nsse_reg = gen_reg_rtx (Pmode);
2443 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2444 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2445 gen_rtx_MULT (Pmode, nsse_reg,
2447 if (next_cum.sse_regno)
2450 gen_rtx_CONST (DImode,
2451 gen_rtx_PLUS (DImode,
2453 GEN_INT (next_cum.sse_regno * 4))));
2455 emit_move_insn (nsse_reg, label_ref);
2456 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2458 /* Compute address of memory block we save into. We always use pointer
2459 pointing 127 bytes after first byte to store - this is needed to keep
2460 instruction size limited by 4 bytes. */
2461 tmp_reg = gen_reg_rtx (Pmode);
2462 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2463 plus_constant (save_area,
2464 8 * REGPARM_MAX + 127)));
2465 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2466 set_mem_alias_set (mem, set);
2467 set_mem_align (mem, BITS_PER_WORD);
2469 /* And finally do the dirty job! */
2470 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2471 GEN_INT (next_cum.sse_regno), label));
2476 /* Implement va_start. */
2479 ix86_va_start (valist, nextarg)
2483 HOST_WIDE_INT words, n_gpr, n_fpr;
2484 tree f_gpr, f_fpr, f_ovf, f_sav;
2485 tree gpr, fpr, ovf, sav, t;
2487 /* Only 64bit target needs something special. */
2490 std_expand_builtin_va_start (valist, nextarg);
2494 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2495 f_fpr = TREE_CHAIN (f_gpr);
2496 f_ovf = TREE_CHAIN (f_fpr);
2497 f_sav = TREE_CHAIN (f_ovf);
2499 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2500 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2501 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2502 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2503 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2505 /* Count number of gp and fp argument registers used. */
2506 words = current_function_args_info.words;
2507 n_gpr = current_function_args_info.regno;
2508 n_fpr = current_function_args_info.sse_regno;
2510 if (TARGET_DEBUG_ARG)
2511 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2512 (int) words, (int) n_gpr, (int) n_fpr);
2514 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2515 build_int_2 (n_gpr * 8, 0));
2516 TREE_SIDE_EFFECTS (t) = 1;
2517 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2519 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2520 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2521 TREE_SIDE_EFFECTS (t) = 1;
2522 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2524 /* Find the overflow area. */
2525 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2527 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2528 build_int_2 (words * UNITS_PER_WORD, 0));
2529 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2530 TREE_SIDE_EFFECTS (t) = 1;
2531 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2533 /* Find the register save area.
2534 Prologue of the function save it right above stack frame. */
2535 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2536 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2537 TREE_SIDE_EFFECTS (t) = 1;
2538 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2541 /* Implement va_arg. */
2543 ix86_va_arg (valist, type)
2546 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2547 tree f_gpr, f_fpr, f_ovf, f_sav;
2548 tree gpr, fpr, ovf, sav, t;
2550 rtx lab_false, lab_over = NULL_RTX;
2554 /* Only 64bit target needs something special. */
2557 return std_expand_builtin_va_arg (valist, type);
2560 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2561 f_fpr = TREE_CHAIN (f_gpr);
2562 f_ovf = TREE_CHAIN (f_fpr);
2563 f_sav = TREE_CHAIN (f_ovf);
2565 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2566 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2567 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2568 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2569 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2571 size = int_size_in_bytes (type);
2572 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2574 container = construct_container (TYPE_MODE (type), type, 0,
2575 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2577 * Pull the value out of the saved registers ...
2580 addr_rtx = gen_reg_rtx (Pmode);
2584 rtx int_addr_rtx, sse_addr_rtx;
2585 int needed_intregs, needed_sseregs;
2588 lab_over = gen_label_rtx ();
2589 lab_false = gen_label_rtx ();
2591 examine_argument (TYPE_MODE (type), type, 0,
2592 &needed_intregs, &needed_sseregs);
2595 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2596 || TYPE_ALIGN (type) > 128);
2598 /* In case we are passing structure, verify that it is consetuctive block
2599 on the register save area. If not we need to do moves. */
2600 if (!need_temp && !REG_P (container))
2602 /* Verify that all registers are strictly consetuctive */
2603 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2607 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2609 rtx slot = XVECEXP (container, 0, i);
2610 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2611 || INTVAL (XEXP (slot, 1)) != i * 16)
2619 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2621 rtx slot = XVECEXP (container, 0, i);
2622 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2623 || INTVAL (XEXP (slot, 1)) != i * 8)
2630 int_addr_rtx = addr_rtx;
2631 sse_addr_rtx = addr_rtx;
2635 int_addr_rtx = gen_reg_rtx (Pmode);
2636 sse_addr_rtx = gen_reg_rtx (Pmode);
2638 /* First ensure that we fit completely in registers. */
2641 emit_cmp_and_jump_insns (expand_expr
2642 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2643 GEN_INT ((REGPARM_MAX - needed_intregs +
2644 1) * 8), GE, const1_rtx, SImode,
2649 emit_cmp_and_jump_insns (expand_expr
2650 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2651 GEN_INT ((SSE_REGPARM_MAX -
2652 needed_sseregs + 1) * 16 +
2653 REGPARM_MAX * 8), GE, const1_rtx,
2654 SImode, 1, lab_false);
2657 /* Compute index to start of area used for integer regs. */
2660 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2661 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2662 if (r != int_addr_rtx)
2663 emit_move_insn (int_addr_rtx, r);
2667 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2668 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2669 if (r != sse_addr_rtx)
2670 emit_move_insn (sse_addr_rtx, r);
2677 /* Never use the memory itself, as it has the alias set. */
2678 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2679 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2680 set_mem_alias_set (mem, get_varargs_alias_set ());
2681 set_mem_align (mem, BITS_PER_UNIT);
2683 for (i = 0; i < XVECLEN (container, 0); i++)
2685 rtx slot = XVECEXP (container, 0, i);
2686 rtx reg = XEXP (slot, 0);
2687 enum machine_mode mode = GET_MODE (reg);
2693 if (SSE_REGNO_P (REGNO (reg)))
2695 src_addr = sse_addr_rtx;
2696 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2700 src_addr = int_addr_rtx;
2701 src_offset = REGNO (reg) * 8;
2703 src_mem = gen_rtx_MEM (mode, src_addr);
2704 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2705 src_mem = adjust_address (src_mem, mode, src_offset);
2706 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2707 emit_move_insn (dest_mem, src_mem);
2714 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2715 build_int_2 (needed_intregs * 8, 0));
2716 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2717 TREE_SIDE_EFFECTS (t) = 1;
2718 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2723 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2724 build_int_2 (needed_sseregs * 16, 0));
2725 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2726 TREE_SIDE_EFFECTS (t) = 1;
2727 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2730 emit_jump_insn (gen_jump (lab_over));
2732 emit_label (lab_false);
2735 /* ... otherwise out of the overflow area. */
2737 /* Care for on-stack alignment if needed. */
2738 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2742 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2743 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2744 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2748 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2750 emit_move_insn (addr_rtx, r);
2753 build (PLUS_EXPR, TREE_TYPE (t), t,
2754 build_int_2 (rsize * UNITS_PER_WORD, 0));
2755 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2756 TREE_SIDE_EFFECTS (t) = 1;
2757 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2760 emit_label (lab_over);
2765 /* Return nonzero if OP is either a i387 or SSE fp register. */
2767 any_fp_register_operand (op, mode)
2769 enum machine_mode mode ATTRIBUTE_UNUSED;
2771 return ANY_FP_REG_P (op);
2774 /* Return nonzero if OP is an i387 fp register. */
2776 fp_register_operand (op, mode)
2778 enum machine_mode mode ATTRIBUTE_UNUSED;
2780 return FP_REG_P (op);
2783 /* Return nonzero if OP is a non-fp register_operand. */
2785 register_and_not_any_fp_reg_operand (op, mode)
2787 enum machine_mode mode;
2789 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2792 /* Return nonzero of OP is a register operand other than an
2793 i387 fp register. */
2795 register_and_not_fp_reg_operand (op, mode)
2797 enum machine_mode mode;
2799 return register_operand (op, mode) && !FP_REG_P (op);
2802 /* Return nonzero if OP is general operand representable on x86_64. */
2805 x86_64_general_operand (op, mode)
2807 enum machine_mode mode;
2810 return general_operand (op, mode);
2811 if (nonimmediate_operand (op, mode))
2813 return x86_64_sign_extended_value (op);
2816 /* Return nonzero if OP is general operand representable on x86_64
2817 as either sign extended or zero extended constant. */
2820 x86_64_szext_general_operand (op, mode)
2822 enum machine_mode mode;
2825 return general_operand (op, mode);
2826 if (nonimmediate_operand (op, mode))
2828 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2831 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2834 x86_64_nonmemory_operand (op, mode)
2836 enum machine_mode mode;
2839 return nonmemory_operand (op, mode);
2840 if (register_operand (op, mode))
2842 return x86_64_sign_extended_value (op);
2845 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2848 x86_64_movabs_operand (op, mode)
2850 enum machine_mode mode;
2852 if (!TARGET_64BIT || !flag_pic)
2853 return nonmemory_operand (op, mode);
2854 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2856 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2861 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2864 x86_64_szext_nonmemory_operand (op, mode)
2866 enum machine_mode mode;
2869 return nonmemory_operand (op, mode);
2870 if (register_operand (op, mode))
2872 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2875 /* Return nonzero if OP is immediate operand representable on x86_64. */
2878 x86_64_immediate_operand (op, mode)
2880 enum machine_mode mode;
2883 return immediate_operand (op, mode);
2884 return x86_64_sign_extended_value (op);
2887 /* Return nonzero if OP is immediate operand representable on x86_64. */
2890 x86_64_zext_immediate_operand (op, mode)
2892 enum machine_mode mode ATTRIBUTE_UNUSED;
2894 return x86_64_zero_extended_value (op);
2897 /* Return nonzero if OP is (const_int 1), else return zero. */
2900 const_int_1_operand (op, mode)
2902 enum machine_mode mode ATTRIBUTE_UNUSED;
2904 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2907 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2908 for shift & compare patterns, as shifting by 0 does not change flags),
2909 else return zero. */
2912 const_int_1_31_operand (op, mode)
2914 enum machine_mode mode ATTRIBUTE_UNUSED;
2916 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2919 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2920 reference and a constant. */
2923 symbolic_operand (op, mode)
2925 enum machine_mode mode ATTRIBUTE_UNUSED;
2927 switch (GET_CODE (op))
2935 if (GET_CODE (op) == SYMBOL_REF
2936 || GET_CODE (op) == LABEL_REF
2937 || (GET_CODE (op) == UNSPEC
2938 && (XINT (op, 1) == UNSPEC_GOT
2939 || XINT (op, 1) == UNSPEC_GOTOFF
2940 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2942 if (GET_CODE (op) != PLUS
2943 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2947 if (GET_CODE (op) == SYMBOL_REF
2948 || GET_CODE (op) == LABEL_REF)
2950 /* Only @GOTOFF gets offsets. */
2951 if (GET_CODE (op) != UNSPEC
2952 || XINT (op, 1) != UNSPEC_GOTOFF)
2955 op = XVECEXP (op, 0, 0);
2956 if (GET_CODE (op) == SYMBOL_REF
2957 || GET_CODE (op) == LABEL_REF)
2966 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2969 pic_symbolic_operand (op, mode)
2971 enum machine_mode mode ATTRIBUTE_UNUSED;
2973 if (GET_CODE (op) != CONST)
2978 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2983 if (GET_CODE (op) == UNSPEC)
2985 if (GET_CODE (op) != PLUS
2986 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2989 if (GET_CODE (op) == UNSPEC)
2995 /* Return true if OP is a symbolic operand that resolves locally. */
2998 local_symbolic_operand (op, mode)
3000 enum machine_mode mode ATTRIBUTE_UNUSED;
3002 if (GET_CODE (op) == LABEL_REF)
3005 if (GET_CODE (op) == CONST
3006 && GET_CODE (XEXP (op, 0)) == PLUS
3007 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3008 op = XEXP (XEXP (op, 0), 0);
3010 if (GET_CODE (op) != SYMBOL_REF)
3013 /* These we've been told are local by varasm and encode_section_info
3015 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3018 /* There is, however, a not insubstantial body of code in the rest of
3019 the compiler that assumes it can just stick the results of
3020 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3021 /* ??? This is a hack. Should update the body of the compiler to
3022 always create a DECL an invoke targetm.encode_section_info. */
3023 if (strncmp (XSTR (op, 0), internal_label_prefix,
3024 internal_label_prefix_len) == 0)
3030 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3033 tls_symbolic_operand (op, mode)
3035 enum machine_mode mode ATTRIBUTE_UNUSED;
3037 const char *symbol_str;
3039 if (GET_CODE (op) != SYMBOL_REF)
3041 symbol_str = XSTR (op, 0);
3043 if (symbol_str[0] != '%')
3045 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3049 tls_symbolic_operand_1 (op, kind)
3051 enum tls_model kind;
3053 const char *symbol_str;
3055 if (GET_CODE (op) != SYMBOL_REF)
3057 symbol_str = XSTR (op, 0);
3059 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3063 global_dynamic_symbolic_operand (op, mode)
3065 enum machine_mode mode ATTRIBUTE_UNUSED;
3067 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3071 local_dynamic_symbolic_operand (op, mode)
3073 enum machine_mode mode ATTRIBUTE_UNUSED;
3075 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3079 initial_exec_symbolic_operand (op, mode)
3081 enum machine_mode mode ATTRIBUTE_UNUSED;
3083 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3087 local_exec_symbolic_operand (op, mode)
3089 enum machine_mode mode ATTRIBUTE_UNUSED;
3091 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3094 /* Test for a valid operand for a call instruction. Don't allow the
3095 arg pointer register or virtual regs since they may decay into
3096 reg + const, which the patterns can't handle. */
3099 call_insn_operand (op, mode)
3101 enum machine_mode mode ATTRIBUTE_UNUSED;
3103 /* Disallow indirect through a virtual register. This leads to
3104 compiler aborts when trying to eliminate them. */
3105 if (GET_CODE (op) == REG
3106 && (op == arg_pointer_rtx
3107 || op == frame_pointer_rtx
3108 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3109 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3112 /* Disallow `call 1234'. Due to varying assembler lameness this
3113 gets either rejected or translated to `call .+1234'. */
3114 if (GET_CODE (op) == CONST_INT)
3117 /* Explicitly allow SYMBOL_REF even if pic. */
3118 if (GET_CODE (op) == SYMBOL_REF)
3121 /* Otherwise we can allow any general_operand in the address. */
3122 return general_operand (op, Pmode);
3126 constant_call_address_operand (op, mode)
3128 enum machine_mode mode ATTRIBUTE_UNUSED;
3130 if (GET_CODE (op) == CONST
3131 && GET_CODE (XEXP (op, 0)) == PLUS
3132 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3133 op = XEXP (XEXP (op, 0), 0);
3134 return GET_CODE (op) == SYMBOL_REF;
3137 /* Match exactly zero and one. */
3140 const0_operand (op, mode)
3142 enum machine_mode mode;
3144 return op == CONST0_RTX (mode);
3148 const1_operand (op, mode)
3150 enum machine_mode mode ATTRIBUTE_UNUSED;
3152 return op == const1_rtx;
3155 /* Match 2, 4, or 8. Used for leal multiplicands. */
3158 const248_operand (op, mode)
3160 enum machine_mode mode ATTRIBUTE_UNUSED;
3162 return (GET_CODE (op) == CONST_INT
3163 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3166 /* True if this is a constant appropriate for an increment or decremenmt. */
3169 incdec_operand (op, mode)
3171 enum machine_mode mode ATTRIBUTE_UNUSED;
3173 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3174 registers, since carry flag is not set. */
3175 if (TARGET_PENTIUM4 && !optimize_size)
3177 return op == const1_rtx || op == constm1_rtx;
3180 /* Return nonzero if OP is acceptable as operand of DImode shift
3184 shiftdi_operand (op, mode)
3186 enum machine_mode mode ATTRIBUTE_UNUSED;
3189 return nonimmediate_operand (op, mode);
3191 return register_operand (op, mode);
3194 /* Return false if this is the stack pointer, or any other fake
3195 register eliminable to the stack pointer. Otherwise, this is
3198 This is used to prevent esp from being used as an index reg.
3199 Which would only happen in pathological cases. */
3202 reg_no_sp_operand (op, mode)
3204 enum machine_mode mode;
3207 if (GET_CODE (t) == SUBREG)
3209 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3212 return register_operand (op, mode);
3216 mmx_reg_operand (op, mode)
3218 enum machine_mode mode ATTRIBUTE_UNUSED;
3220 return MMX_REG_P (op);
3223 /* Return false if this is any eliminable register. Otherwise
3227 general_no_elim_operand (op, mode)
3229 enum machine_mode mode;
3232 if (GET_CODE (t) == SUBREG)
3234 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3235 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3236 || t == virtual_stack_dynamic_rtx)
3239 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3240 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3243 return general_operand (op, mode);
3246 /* Return false if this is any eliminable register. Otherwise
3247 register_operand or const_int. */
3250 nonmemory_no_elim_operand (op, mode)
3252 enum machine_mode mode;
3255 if (GET_CODE (t) == SUBREG)
3257 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3258 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3259 || t == virtual_stack_dynamic_rtx)
3262 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3265 /* Return false if this is any eliminable register or stack register,
3266 otherwise work like register_operand. */
3269 index_register_operand (op, mode)
3271 enum machine_mode mode;
3274 if (GET_CODE (t) == SUBREG)
3278 if (t == arg_pointer_rtx
3279 || t == frame_pointer_rtx
3280 || t == virtual_incoming_args_rtx
3281 || t == virtual_stack_vars_rtx
3282 || t == virtual_stack_dynamic_rtx
3283 || REGNO (t) == STACK_POINTER_REGNUM)
3286 return general_operand (op, mode);
3289 /* Return true if op is a Q_REGS class register. */
3292 q_regs_operand (op, mode)
3294 enum machine_mode mode;
3296 if (mode != VOIDmode && GET_MODE (op) != mode)
3298 if (GET_CODE (op) == SUBREG)
3299 op = SUBREG_REG (op);
3300 return ANY_QI_REG_P (op);
3303 /* Return true if op is a NON_Q_REGS class register. */
3306 non_q_regs_operand (op, mode)
3308 enum machine_mode mode;
3310 if (mode != VOIDmode && GET_MODE (op) != mode)
3312 if (GET_CODE (op) == SUBREG)
3313 op = SUBREG_REG (op);
3314 return NON_QI_REG_P (op);
3317 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3320 sse_comparison_operator (op, mode)
3322 enum machine_mode mode ATTRIBUTE_UNUSED;
3324 enum rtx_code code = GET_CODE (op);
3327 /* Operations supported directly. */
3337 /* These are equivalent to ones above in non-IEEE comparisons. */
3344 return !TARGET_IEEE_FP;
3349 /* Return 1 if OP is a valid comparison operator in valid mode. */
3351 ix86_comparison_operator (op, mode)
3353 enum machine_mode mode;
3355 enum machine_mode inmode;
3356 enum rtx_code code = GET_CODE (op);
3357 if (mode != VOIDmode && GET_MODE (op) != mode)
3359 if (GET_RTX_CLASS (code) != '<')
3361 inmode = GET_MODE (XEXP (op, 0));
3363 if (inmode == CCFPmode || inmode == CCFPUmode)
3365 enum rtx_code second_code, bypass_code;
3366 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3367 return (bypass_code == NIL && second_code == NIL);
3374 if (inmode == CCmode || inmode == CCGCmode
3375 || inmode == CCGOCmode || inmode == CCNOmode)
3378 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3379 if (inmode == CCmode)
3383 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3391 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3394 fcmov_comparison_operator (op, mode)
3396 enum machine_mode mode;
3398 enum machine_mode inmode;
3399 enum rtx_code code = GET_CODE (op);
3400 if (mode != VOIDmode && GET_MODE (op) != mode)
3402 if (GET_RTX_CLASS (code) != '<')
3404 inmode = GET_MODE (XEXP (op, 0));
3405 if (inmode == CCFPmode || inmode == CCFPUmode)
3407 enum rtx_code second_code, bypass_code;
3408 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3409 if (bypass_code != NIL || second_code != NIL)
3411 code = ix86_fp_compare_code_to_integer (code);
3413 /* i387 supports just limited amount of conditional codes. */
3416 case LTU: case GTU: case LEU: case GEU:
3417 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3420 case ORDERED: case UNORDERED:
3428 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3431 promotable_binary_operator (op, mode)
3433 enum machine_mode mode ATTRIBUTE_UNUSED;
3435 switch (GET_CODE (op))
3438 /* Modern CPUs have same latency for HImode and SImode multiply,
3439 but 386 and 486 do HImode multiply faster. */
3440 return ix86_cpu > PROCESSOR_I486;
3452 /* Nearly general operand, but accept any const_double, since we wish
3453 to be able to drop them into memory rather than have them get pulled
3457 cmp_fp_expander_operand (op, mode)
3459 enum machine_mode mode;
3461 if (mode != VOIDmode && mode != GET_MODE (op))
3463 if (GET_CODE (op) == CONST_DOUBLE)
3465 return general_operand (op, mode);
3468 /* Match an SI or HImode register for a zero_extract. */
3471 ext_register_operand (op, mode)
3473 enum machine_mode mode ATTRIBUTE_UNUSED;
3476 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3477 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3480 if (!register_operand (op, VOIDmode))
3483 /* Be curefull to accept only registers having upper parts. */
3484 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3485 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3488 /* Return 1 if this is a valid binary floating-point operation.
3489 OP is the expression matched, and MODE is its mode. */
3492 binary_fp_operator (op, mode)
3494 enum machine_mode mode;
3496 if (mode != VOIDmode && mode != GET_MODE (op))
3499 switch (GET_CODE (op))
3505 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3513 mult_operator (op, mode)
3515 enum machine_mode mode ATTRIBUTE_UNUSED;
3517 return GET_CODE (op) == MULT;
3521 div_operator (op, mode)
3523 enum machine_mode mode ATTRIBUTE_UNUSED;
3525 return GET_CODE (op) == DIV;
3529 arith_or_logical_operator (op, mode)
3531 enum machine_mode mode;
3533 return ((mode == VOIDmode || GET_MODE (op) == mode)
3534 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3535 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3538 /* Returns 1 if OP is memory operand with a displacement. */
3541 memory_displacement_operand (op, mode)
3543 enum machine_mode mode;
3545 struct ix86_address parts;
3547 if (! memory_operand (op, mode))
3550 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3553 return parts.disp != NULL_RTX;
3556 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3557 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3559 ??? It seems likely that this will only work because cmpsi is an
3560 expander, and no actual insns use this. */
3563 cmpsi_operand (op, mode)
3565 enum machine_mode mode;
3567 if (nonimmediate_operand (op, mode))
3570 if (GET_CODE (op) == AND
3571 && GET_MODE (op) == SImode
3572 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3573 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3574 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3575 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3576 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3577 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3583 /* Returns 1 if OP is memory operand that can not be represented by the
3587 long_memory_operand (op, mode)
3589 enum machine_mode mode;
3591 if (! memory_operand (op, mode))
3594 return memory_address_length (op) != 0;
3597 /* Return nonzero if the rtx is known aligned. */
3600 aligned_operand (op, mode)
3602 enum machine_mode mode;
3604 struct ix86_address parts;
3606 if (!general_operand (op, mode))
3609 /* Registers and immediate operands are always "aligned". */
3610 if (GET_CODE (op) != MEM)
3613 /* Don't even try to do any aligned optimizations with volatiles. */
3614 if (MEM_VOLATILE_P (op))
3619 /* Pushes and pops are only valid on the stack pointer. */
3620 if (GET_CODE (op) == PRE_DEC
3621 || GET_CODE (op) == POST_INC)
3624 /* Decode the address. */
3625 if (! ix86_decompose_address (op, &parts))
3628 if (parts.base && GET_CODE (parts.base) == SUBREG)
3629 parts.base = SUBREG_REG (parts.base);
3630 if (parts.index && GET_CODE (parts.index) == SUBREG)
3631 parts.index = SUBREG_REG (parts.index);
3633 /* Look for some component that isn't known to be aligned. */
3637 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3642 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3647 if (GET_CODE (parts.disp) != CONST_INT
3648 || (INTVAL (parts.disp) & 3) != 0)
3652 /* Didn't find one -- this must be an aligned address. */
3656 /* Return true if the constant is something that can be loaded with
3657 a special instruction. Only handle 0.0 and 1.0; others are less
3661 standard_80387_constant_p (x)
3664 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3666 /* Note that on the 80387, other constants, such as pi, that we should support
3667 too. On some machines, these are much slower to load as standard constant,
3668 than to load from doubles in memory. */
3669 if (x == CONST0_RTX (GET_MODE (x)))
3671 if (x == CONST1_RTX (GET_MODE (x)))
3676 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3679 standard_sse_constant_p (x)
3682 if (GET_CODE (x) != CONST_DOUBLE)
3684 return (x == CONST0_RTX (GET_MODE (x)));
3687 /* Returns 1 if OP contains a symbol reference */
3690 symbolic_reference_mentioned_p (op)
3693 register const char *fmt;
3696 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3699 fmt = GET_RTX_FORMAT (GET_CODE (op));
3700 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3706 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3707 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3711 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3718 /* Return 1 if it is appropriate to emit `ret' instructions in the
3719 body of a function. Do this only if the epilogue is simple, needing a
3720 couple of insns. Prior to reloading, we can't tell how many registers
3721 must be saved, so return 0 then. Return 0 if there is no frame
3722 marker to de-allocate.
3724 If NON_SAVING_SETJMP is defined and true, then it is not possible
3725 for the epilogue to be simple, so return 0. This is a special case
3726 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3727 until final, but jump_optimize may need to know sooner if a
3731 ix86_can_use_return_insn_p ()
3733 struct ix86_frame frame;
3735 #ifdef NON_SAVING_SETJMP
3736 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3740 if (! reload_completed || frame_pointer_needed)
3743 /* Don't allow more than 32 pop, since that's all we can do
3744 with one instruction. */
3745 if (current_function_pops_args
3746 && current_function_args_size >= 32768)
3749 ix86_compute_frame_layout (&frame);
3750 return frame.to_allocate == 0 && frame.nregs == 0;
3753 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3755 x86_64_sign_extended_value (value)
3758 switch (GET_CODE (value))
3760 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3761 to be at least 32 and this all acceptable constants are
3762 represented as CONST_INT. */
3764 if (HOST_BITS_PER_WIDE_INT == 32)
3768 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3769 return trunc_int_for_mode (val, SImode) == val;
3773 /* For certain code models, the symbolic references are known to fit. */
3775 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3777 /* For certain code models, the code is near as well. */
3779 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3781 /* We also may accept the offsetted memory references in certain special
3784 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3785 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3787 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3789 rtx op1 = XEXP (XEXP (value, 0), 0);
3790 rtx op2 = XEXP (XEXP (value, 0), 1);
3791 HOST_WIDE_INT offset;
3793 if (ix86_cmodel == CM_LARGE)
3795 if (GET_CODE (op2) != CONST_INT)
3797 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3798 switch (GET_CODE (op1))
3801 /* For CM_SMALL assume that latest object is 1MB before
3802 end of 31bits boundary. We may also accept pretty
3803 large negative constants knowing that all objects are
3804 in the positive half of address space. */
3805 if (ix86_cmodel == CM_SMALL
3806 && offset < 1024*1024*1024
3807 && trunc_int_for_mode (offset, SImode) == offset)
3809 /* For CM_KERNEL we know that all object resist in the
3810 negative half of 32bits address space. We may not
3811 accept negative offsets, since they may be just off
3812 and we may accept pretty large positive ones. */
3813 if (ix86_cmodel == CM_KERNEL
3815 && trunc_int_for_mode (offset, SImode) == offset)
3819 /* These conditions are similar to SYMBOL_REF ones, just the
3820 constraints for code models differ. */
3821 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3822 && offset < 1024*1024*1024
3823 && trunc_int_for_mode (offset, SImode) == offset)
3825 if (ix86_cmodel == CM_KERNEL
3827 && trunc_int_for_mode (offset, SImode) == offset)
3840 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3842 x86_64_zero_extended_value (value)
3845 switch (GET_CODE (value))
3848 if (HOST_BITS_PER_WIDE_INT == 32)
3849 return (GET_MODE (value) == VOIDmode
3850 && !CONST_DOUBLE_HIGH (value));
3854 if (HOST_BITS_PER_WIDE_INT == 32)
3855 return INTVAL (value) >= 0;
3857 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3860 /* For certain code models, the symbolic references are known to fit. */
3862 return ix86_cmodel == CM_SMALL;
3864 /* For certain code models, the code is near as well. */
3866 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3868 /* We also may accept the offsetted memory references in certain special
3871 if (GET_CODE (XEXP (value, 0)) == PLUS)
3873 rtx op1 = XEXP (XEXP (value, 0), 0);
3874 rtx op2 = XEXP (XEXP (value, 0), 1);
3876 if (ix86_cmodel == CM_LARGE)
3878 switch (GET_CODE (op1))
3882 /* For small code model we may accept pretty large positive
3883 offsets, since one bit is available for free. Negative
3884 offsets are limited by the size of NULL pointer area
3885 specified by the ABI. */
3886 if (ix86_cmodel == CM_SMALL
3887 && GET_CODE (op2) == CONST_INT
3888 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3889 && (trunc_int_for_mode (INTVAL (op2), SImode)
3892 /* ??? For the kernel, we may accept adjustment of
3893 -0x10000000, since we know that it will just convert
3894 negative address space to positive, but perhaps this
3895 is not worthwhile. */
3898 /* These conditions are similar to SYMBOL_REF ones, just the
3899 constraints for code models differ. */
3900 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3901 && GET_CODE (op2) == CONST_INT
3902 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3903 && (trunc_int_for_mode (INTVAL (op2), SImode)
3917 /* Value should be nonzero if functions must have frame pointers.
3918 Zero means the frame pointer need not be set up (and parms may
3919 be accessed via the stack pointer) in functions that seem suitable. */
3922 ix86_frame_pointer_required ()
3924 /* If we accessed previous frames, then the generated code expects
3925 to be able to access the saved ebp value in our frame. */
3926 if (cfun->machine->accesses_prev_frame)
3929 /* Several x86 os'es need a frame pointer for other reasons,
3930 usually pertaining to setjmp. */
3931 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3934 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3935 the frame pointer by default. Turn it back on now if we've not
3936 got a leaf function. */
3937 if (TARGET_OMIT_LEAF_FRAME_POINTER
3938 && (!current_function_is_leaf || current_function_profile))
3944 /* Record that the current function accesses previous call frames. */
3947 ix86_setup_frame_addresses ()
3949 cfun->machine->accesses_prev_frame = 1;
3952 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3953 # define USE_HIDDEN_LINKONCE 1
3955 # define USE_HIDDEN_LINKONCE 0
3958 static int pic_labels_used;
3960 /* Fills in the label name that should be used for a pc thunk for
3961 the given register. */
3964 get_pc_thunk_name (name, regno)
3968 if (USE_HIDDEN_LINKONCE)
3969 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3971 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3975 /* This function generates code for -fpic that loads %ebx with
3976 the return address of the caller and then returns. */
3979 ix86_asm_file_end (file)
3985 for (regno = 0; regno < 8; ++regno)
3989 if (! ((pic_labels_used >> regno) & 1))
3992 get_pc_thunk_name (name, regno);
3994 if (USE_HIDDEN_LINKONCE)
3998 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4000 TREE_PUBLIC (decl) = 1;
4001 TREE_STATIC (decl) = 1;
4002 DECL_ONE_ONLY (decl) = 1;
4004 (*targetm.asm_out.unique_section) (decl, 0);
4005 named_section (decl, NULL, 0);
4007 (*targetm.asm_out.globalize_label) (file, name);
4008 fputs ("\t.hidden\t", file);
4009 assemble_name (file, name);
4011 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4016 ASM_OUTPUT_LABEL (file, name);
4019 xops[0] = gen_rtx_REG (SImode, regno);
4020 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4021 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4022 output_asm_insn ("ret", xops);
4026 /* Emit code for the SET_GOT patterns. */
4029 output_set_got (dest)
4035 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4037 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4039 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4042 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4044 output_asm_insn ("call\t%a2", xops);
4047 /* Output the "canonical" label name ("Lxx$pb") here too. This
4048 is what will be referred to by the Mach-O PIC subsystem. */
4049 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4051 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4052 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4055 output_asm_insn ("pop{l}\t%0", xops);
4060 get_pc_thunk_name (name, REGNO (dest));
4061 pic_labels_used |= 1 << REGNO (dest);
4063 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4064 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4065 output_asm_insn ("call\t%X2", xops);
4068 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4069 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4070 else if (!TARGET_MACHO)
4071 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4076 /* Generate an "push" pattern for input ARG. */
4082 return gen_rtx_SET (VOIDmode,
4084 gen_rtx_PRE_DEC (Pmode,
4085 stack_pointer_rtx)),
4089 /* Return >= 0 if there is an unused call-clobbered register available
4090 for the entire function. */
4093 ix86_select_alt_pic_regnum ()
4095 if (current_function_is_leaf && !current_function_profile)
4098 for (i = 2; i >= 0; --i)
4099 if (!regs_ever_live[i])
4103 return INVALID_REGNUM;
4106 /* Return 1 if we need to save REGNO. */
4108 ix86_save_reg (regno, maybe_eh_return)
4110 int maybe_eh_return;
4112 if (pic_offset_table_rtx
4113 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4114 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4115 || current_function_profile
4116 || current_function_calls_eh_return))
4118 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4123 if (current_function_calls_eh_return && maybe_eh_return)
4128 unsigned test = EH_RETURN_DATA_REGNO (i);
4129 if (test == INVALID_REGNUM)
4136 return (regs_ever_live[regno]
4137 && !call_used_regs[regno]
4138 && !fixed_regs[regno]
4139 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4142 /* Return number of registers to be saved on the stack. */
4150 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4151 if (ix86_save_reg (regno, true))
4156 /* Return the offset between two registers, one to be eliminated, and the other
4157 its replacement, at the start of a routine. */
4160 ix86_initial_elimination_offset (from, to)
4164 struct ix86_frame frame;
4165 ix86_compute_frame_layout (&frame);
4167 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4168 return frame.hard_frame_pointer_offset;
4169 else if (from == FRAME_POINTER_REGNUM
4170 && to == HARD_FRAME_POINTER_REGNUM)
4171 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4174 if (to != STACK_POINTER_REGNUM)
4176 else if (from == ARG_POINTER_REGNUM)
4177 return frame.stack_pointer_offset;
4178 else if (from != FRAME_POINTER_REGNUM)
4181 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4185 /* Fill structure ix86_frame about frame of currently computed function. */
4188 ix86_compute_frame_layout (frame)
4189 struct ix86_frame *frame;
4191 HOST_WIDE_INT total_size;
4192 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4194 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4195 HOST_WIDE_INT size = get_frame_size ();
4197 frame->nregs = ix86_nsaved_regs ();
4200 /* Skip return address and saved base pointer. */
4201 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4203 frame->hard_frame_pointer_offset = offset;
4205 /* Do some sanity checking of stack_alignment_needed and
4206 preferred_alignment, since i386 port is the only using those features
4207 that may break easily. */
4209 if (size && !stack_alignment_needed)
4211 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4213 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4215 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4218 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4219 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4221 /* Register save area */
4222 offset += frame->nregs * UNITS_PER_WORD;
4225 if (ix86_save_varrargs_registers)
4227 offset += X86_64_VARARGS_SIZE;
4228 frame->va_arg_size = X86_64_VARARGS_SIZE;
4231 frame->va_arg_size = 0;
4233 /* Align start of frame for local function. */
4234 frame->padding1 = ((offset + stack_alignment_needed - 1)
4235 & -stack_alignment_needed) - offset;
4237 offset += frame->padding1;
4239 /* Frame pointer points here. */
4240 frame->frame_pointer_offset = offset;
4244 /* Add outgoing arguments area. Can be skipped if we eliminated
4245 all the function calls as dead code. */
4246 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4248 offset += current_function_outgoing_args_size;
4249 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4252 frame->outgoing_arguments_size = 0;
4254 /* Align stack boundary. Only needed if we're calling another function
4256 if (!current_function_is_leaf || current_function_calls_alloca)
4257 frame->padding2 = ((offset + preferred_alignment - 1)
4258 & -preferred_alignment) - offset;
4260 frame->padding2 = 0;
4262 offset += frame->padding2;
4264 /* We've reached end of stack frame. */
4265 frame->stack_pointer_offset = offset;
4267 /* Size prologue needs to allocate. */
4268 frame->to_allocate =
4269 (size + frame->padding1 + frame->padding2
4270 + frame->outgoing_arguments_size + frame->va_arg_size);
4272 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4273 && current_function_is_leaf)
4275 frame->red_zone_size = frame->to_allocate;
4276 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4277 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4280 frame->red_zone_size = 0;
4281 frame->to_allocate -= frame->red_zone_size;
4282 frame->stack_pointer_offset -= frame->red_zone_size;
4284 fprintf (stderr, "nregs: %i\n", frame->nregs);
4285 fprintf (stderr, "size: %i\n", size);
4286 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4287 fprintf (stderr, "padding1: %i\n", frame->padding1);
4288 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4289 fprintf (stderr, "padding2: %i\n", frame->padding2);
4290 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4291 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4292 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4293 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4294 frame->hard_frame_pointer_offset);
4295 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4299 /* Emit code to save registers in the prologue. */
4302 ix86_emit_save_regs ()
4307 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4308 if (ix86_save_reg (regno, true))
4310 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4311 RTX_FRAME_RELATED_P (insn) = 1;
4315 /* Emit code to save registers using MOV insns. First register
4316 is restored from POINTER + OFFSET. */
4318 ix86_emit_save_regs_using_mov (pointer, offset)
4320 HOST_WIDE_INT offset;
4325 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4326 if (ix86_save_reg (regno, true))
4328 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4330 gen_rtx_REG (Pmode, regno));
4331 RTX_FRAME_RELATED_P (insn) = 1;
4332 offset += UNITS_PER_WORD;
4336 /* Expand the prologue into a bunch of separate insns. */
4339 ix86_expand_prologue ()
4343 struct ix86_frame frame;
4345 HOST_WIDE_INT allocate;
4349 use_fast_prologue_epilogue
4350 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4351 if (TARGET_PROLOGUE_USING_MOVE)
4352 use_mov = use_fast_prologue_epilogue;
4354 ix86_compute_frame_layout (&frame);
4356 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4357 slower on all targets. Also sdb doesn't like it. */
4359 if (frame_pointer_needed)
4361 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4362 RTX_FRAME_RELATED_P (insn) = 1;
4364 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4365 RTX_FRAME_RELATED_P (insn) = 1;
4368 allocate = frame.to_allocate;
4369 /* In case we are dealing only with single register and empty frame,
4370 push is equivalent of the mov+add sequence. */
4371 if (allocate == 0 && frame.nregs <= 1)
4375 ix86_emit_save_regs ();
4377 allocate += frame.nregs * UNITS_PER_WORD;
4381 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4383 insn = emit_insn (gen_pro_epilogue_adjust_stack
4384 (stack_pointer_rtx, stack_pointer_rtx,
4385 GEN_INT (-allocate)));
4386 RTX_FRAME_RELATED_P (insn) = 1;
4390 /* ??? Is this only valid for Win32? */
4397 arg0 = gen_rtx_REG (SImode, 0);
4398 emit_move_insn (arg0, GEN_INT (allocate));
4400 sym = gen_rtx_MEM (FUNCTION_MODE,
4401 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4402 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4404 CALL_INSN_FUNCTION_USAGE (insn)
4405 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4406 CALL_INSN_FUNCTION_USAGE (insn));
4410 if (!frame_pointer_needed || !frame.to_allocate)
4411 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4413 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4414 -frame.nregs * UNITS_PER_WORD);
4417 #ifdef SUBTARGET_PROLOGUE
4421 pic_reg_used = false;
4422 if (pic_offset_table_rtx
4423 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4424 || current_function_profile))
4426 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4428 if (alt_pic_reg_used != INVALID_REGNUM)
4429 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4431 pic_reg_used = true;
4436 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4438 /* Even with accurate pre-reload life analysis, we can wind up
4439 deleting all references to the pic register after reload.
4440 Consider if cross-jumping unifies two sides of a branch
4441 controled by a comparison vs the only read from a global.
4442 In which case, allow the set_got to be deleted, though we're
4443 too late to do anything about the ebx save in the prologue. */
4444 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4447 /* Prevent function calls from be scheduled before the call to mcount.
4448 In the pic_reg_used case, make sure that the got load isn't deleted. */
4449 if (current_function_profile)
4450 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4453 /* Emit code to restore saved registers using MOV insns. First register
4454 is restored from POINTER + OFFSET. */
4456 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4459 int maybe_eh_return;
4463 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4464 if (ix86_save_reg (regno, maybe_eh_return))
4466 emit_move_insn (gen_rtx_REG (Pmode, regno),
4467 adjust_address (gen_rtx_MEM (Pmode, pointer),
4469 offset += UNITS_PER_WORD;
4473 /* Restore function stack, frame, and registers. */
4476 ix86_expand_epilogue (style)
4480 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4481 struct ix86_frame frame;
4482 HOST_WIDE_INT offset;
4484 ix86_compute_frame_layout (&frame);
4486 /* Calculate start of saved registers relative to ebp. Special care
4487 must be taken for the normal return case of a function using
4488 eh_return: the eax and edx registers are marked as saved, but not
4489 restored along this path. */
4490 offset = frame.nregs;
4491 if (current_function_calls_eh_return && style != 2)
4493 offset *= -UNITS_PER_WORD;
4495 /* If we're only restoring one register and sp is not valid then
4496 using a move instruction to restore the register since it's
4497 less work than reloading sp and popping the register.
4499 The default code result in stack adjustment using add/lea instruction,
4500 while this code results in LEAVE instruction (or discrete equivalent),
4501 so it is profitable in some other cases as well. Especially when there
4502 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4503 and there is exactly one register to pop. This heruistic may need some
4504 tuning in future. */
4505 if ((!sp_valid && frame.nregs <= 1)
4506 || (TARGET_EPILOGUE_USING_MOVE
4507 && use_fast_prologue_epilogue
4508 && (frame.nregs > 1 || frame.to_allocate))
4509 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4510 || (frame_pointer_needed && TARGET_USE_LEAVE
4511 && use_fast_prologue_epilogue && frame.nregs == 1)
4512 || current_function_calls_eh_return)
4514 /* Restore registers. We can use ebp or esp to address the memory
4515 locations. If both are available, default to ebp, since offsets
4516 are known to be small. Only exception is esp pointing directly to the
4517 end of block of saved registers, where we may simplify addressing
4520 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4521 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4522 frame.to_allocate, style == 2);
4524 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4525 offset, style == 2);
4527 /* eh_return epilogues need %ecx added to the stack pointer. */
4530 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4532 if (frame_pointer_needed)
4534 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4535 tmp = plus_constant (tmp, UNITS_PER_WORD);
4536 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4538 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4539 emit_move_insn (hard_frame_pointer_rtx, tmp);
4541 emit_insn (gen_pro_epilogue_adjust_stack
4542 (stack_pointer_rtx, sa, const0_rtx));
4546 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4547 tmp = plus_constant (tmp, (frame.to_allocate
4548 + frame.nregs * UNITS_PER_WORD));
4549 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4552 else if (!frame_pointer_needed)
4553 emit_insn (gen_pro_epilogue_adjust_stack
4554 (stack_pointer_rtx, stack_pointer_rtx,
4555 GEN_INT (frame.to_allocate
4556 + frame.nregs * UNITS_PER_WORD)));
4557 /* If not an i386, mov & pop is faster than "leave". */
4558 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4559 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4562 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4563 hard_frame_pointer_rtx,
4566 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4568 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4573 /* First step is to deallocate the stack frame so that we can
4574 pop the registers. */
4577 if (!frame_pointer_needed)
4579 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4580 hard_frame_pointer_rtx,
4583 else if (frame.to_allocate)
4584 emit_insn (gen_pro_epilogue_adjust_stack
4585 (stack_pointer_rtx, stack_pointer_rtx,
4586 GEN_INT (frame.to_allocate)));
4588 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4589 if (ix86_save_reg (regno, false))
4592 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4594 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4596 if (frame_pointer_needed)
4598 /* Leave results in shorter dependency chains on CPUs that are
4599 able to grok it fast. */
4600 if (TARGET_USE_LEAVE)
4601 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4602 else if (TARGET_64BIT)
4603 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4605 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4609 /* Sibcall epilogues don't want a return instruction. */
4613 if (current_function_pops_args && current_function_args_size)
4615 rtx popc = GEN_INT (current_function_pops_args);
4617 /* i386 can only pop 64K bytes. If asked to pop more, pop
4618 return address, do explicit add, and jump indirectly to the
4621 if (current_function_pops_args >= 65536)
4623 rtx ecx = gen_rtx_REG (SImode, 2);
4625 /* There are is no "pascal" calling convention in 64bit ABI. */
4629 emit_insn (gen_popsi1 (ecx));
4630 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4631 emit_jump_insn (gen_return_indirect_internal (ecx));
4634 emit_jump_insn (gen_return_pop_internal (popc));
4637 emit_jump_insn (gen_return_internal ());
4640 /* Reset from the function's potential modifications. */
4643 ix86_output_function_epilogue (file, size)
4644 FILE *file ATTRIBUTE_UNUSED;
4645 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4647 if (pic_offset_table_rtx)
4648 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4651 /* Extract the parts of an RTL expression that is a valid memory address
4652 for an instruction. Return 0 if the structure of the address is
4653 grossly off. Return -1 if the address contains ASHIFT, so it is not
4654 strictly valid, but still used for computing length of lea instruction.
4658 ix86_decompose_address (addr, out)
4660 struct ix86_address *out;
4662 rtx base = NULL_RTX;
4663 rtx index = NULL_RTX;
4664 rtx disp = NULL_RTX;
4665 HOST_WIDE_INT scale = 1;
4666 rtx scale_rtx = NULL_RTX;
4669 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4671 else if (GET_CODE (addr) == PLUS)
4673 rtx op0 = XEXP (addr, 0);
4674 rtx op1 = XEXP (addr, 1);
4675 enum rtx_code code0 = GET_CODE (op0);
4676 enum rtx_code code1 = GET_CODE (op1);
4678 if (code0 == REG || code0 == SUBREG)
4680 if (code1 == REG || code1 == SUBREG)
4681 index = op0, base = op1; /* index + base */
4683 base = op0, disp = op1; /* base + displacement */
4685 else if (code0 == MULT)
4687 index = XEXP (op0, 0);
4688 scale_rtx = XEXP (op0, 1);
4689 if (code1 == REG || code1 == SUBREG)
4690 base = op1; /* index*scale + base */
4692 disp = op1; /* index*scale + disp */
4694 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4696 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4697 scale_rtx = XEXP (XEXP (op0, 0), 1);
4698 base = XEXP (op0, 1);
4701 else if (code0 == PLUS)
4703 index = XEXP (op0, 0); /* index + base + disp */
4704 base = XEXP (op0, 1);
4710 else if (GET_CODE (addr) == MULT)
4712 index = XEXP (addr, 0); /* index*scale */
4713 scale_rtx = XEXP (addr, 1);
4715 else if (GET_CODE (addr) == ASHIFT)
4719 /* We're called for lea too, which implements ashift on occasion. */
4720 index = XEXP (addr, 0);
4721 tmp = XEXP (addr, 1);
4722 if (GET_CODE (tmp) != CONST_INT)
4724 scale = INTVAL (tmp);
4725 if ((unsigned HOST_WIDE_INT) scale > 3)
4731 disp = addr; /* displacement */
4733 /* Extract the integral value of scale. */
4736 if (GET_CODE (scale_rtx) != CONST_INT)
4738 scale = INTVAL (scale_rtx);
4741 /* Allow arg pointer and stack pointer as index if there is not scaling */
4742 if (base && index && scale == 1
4743 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4744 || index == stack_pointer_rtx))
4751 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4752 if ((base == hard_frame_pointer_rtx
4753 || base == frame_pointer_rtx
4754 || base == arg_pointer_rtx) && !disp)
4757 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4758 Avoid this by transforming to [%esi+0]. */
4759 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4760 && base && !index && !disp
4762 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4765 /* Special case: encode reg+reg instead of reg*2. */
4766 if (!base && index && scale && scale == 2)
4767 base = index, scale = 1;
4769 /* Special case: scaling cannot be encoded without base or displacement. */
4770 if (!base && !disp && index && scale != 1)
4781 /* Return cost of the memory address x.
4782 For i386, it is better to use a complex address than let gcc copy
4783 the address into a reg and make a new pseudo. But not if the address
4784 requires to two regs - that would mean more pseudos with longer
4787 ix86_address_cost (x)
4790 struct ix86_address parts;
4793 if (!ix86_decompose_address (x, &parts))
4796 if (parts.base && GET_CODE (parts.base) == SUBREG)
4797 parts.base = SUBREG_REG (parts.base);
4798 if (parts.index && GET_CODE (parts.index) == SUBREG)
4799 parts.index = SUBREG_REG (parts.index);
4801 /* More complex memory references are better. */
4802 if (parts.disp && parts.disp != const0_rtx)
4805 /* Attempt to minimize number of registers in the address. */
4807 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4809 && (!REG_P (parts.index)
4810 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4814 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4816 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4817 && parts.base != parts.index)
4820 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4821 since it's predecode logic can't detect the length of instructions
4822 and it degenerates to vector decoded. Increase cost of such
4823 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4824 to split such addresses or even refuse such addresses at all.
4826 Following addressing modes are affected:
4831 The first and last case may be avoidable by explicitly coding the zero in
4832 memory address, but I don't have AMD-K6 machine handy to check this
4836 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4837 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4838 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4844 /* If X is a machine specific address (i.e. a symbol or label being
4845 referenced as a displacement from the GOT implemented using an
4846 UNSPEC), then return the base term. Otherwise return X. */
4849 ix86_find_base_term (x)
4856 if (GET_CODE (x) != CONST)
4859 if (GET_CODE (term) == PLUS
4860 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4861 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4862 term = XEXP (term, 0);
4863 if (GET_CODE (term) != UNSPEC
4864 || XINT (term, 1) != UNSPEC_GOTPCREL)
4867 term = XVECEXP (term, 0, 0);
4869 if (GET_CODE (term) != SYMBOL_REF
4870 && GET_CODE (term) != LABEL_REF)
4876 if (GET_CODE (x) != PLUS
4877 || XEXP (x, 0) != pic_offset_table_rtx
4878 || GET_CODE (XEXP (x, 1)) != CONST)
4881 term = XEXP (XEXP (x, 1), 0);
4883 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4884 term = XEXP (term, 0);
4886 if (GET_CODE (term) != UNSPEC
4887 || XINT (term, 1) != UNSPEC_GOTOFF)
4890 term = XVECEXP (term, 0, 0);
4892 if (GET_CODE (term) != SYMBOL_REF
4893 && GET_CODE (term) != LABEL_REF)
4899 /* Determine if a given RTX is a valid constant. We already know this
4900 satisfies CONSTANT_P. */
4903 legitimate_constant_p (x)
4908 switch (GET_CODE (x))
4911 /* TLS symbols are not constant. */
4912 if (tls_symbolic_operand (x, Pmode))
4917 inner = XEXP (x, 0);
4919 /* Offsets of TLS symbols are never valid.
4920 Discourage CSE from creating them. */
4921 if (GET_CODE (inner) == PLUS
4922 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4925 /* Only some unspecs are valid as "constants". */
4926 if (GET_CODE (inner) == UNSPEC)
4927 switch (XINT (inner, 1))
4930 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4940 /* Otherwise we handle everything else in the move patterns. */
4944 /* Determine if a given RTX is a valid constant address. */
4947 constant_address_p (x)
4950 switch (GET_CODE (x))
4957 return TARGET_64BIT;
4960 /* For Mach-O, really believe the CONST. */
4963 /* Otherwise fall through. */
4965 return !flag_pic && legitimate_constant_p (x);
4972 /* Nonzero if the constant value X is a legitimate general operand
4973 when generating PIC code. It is given that flag_pic is on and
4974 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4977 legitimate_pic_operand_p (x)
4982 switch (GET_CODE (x))
4985 inner = XEXP (x, 0);
4987 /* Only some unspecs are valid as "constants". */
4988 if (GET_CODE (inner) == UNSPEC)
4989 switch (XINT (inner, 1))
4992 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5000 return legitimate_pic_address_disp_p (x);
5007 /* Determine if a given CONST RTX is a valid memory displacement
5011 legitimate_pic_address_disp_p (disp)
5016 /* In 64bit mode we can allow direct addresses of symbols and labels
5017 when they are not dynamic symbols. */
5021 if (GET_CODE (disp) == CONST)
5023 /* ??? Handle PIC code models */
5024 if (GET_CODE (x) == PLUS
5025 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5026 && ix86_cmodel == CM_SMALL_PIC
5027 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5028 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5030 if (local_symbolic_operand (x, Pmode))
5033 if (GET_CODE (disp) != CONST)
5035 disp = XEXP (disp, 0);
5039 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5040 of GOT tables. We should not need these anyway. */
5041 if (GET_CODE (disp) != UNSPEC
5042 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5045 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5046 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5052 if (GET_CODE (disp) == PLUS)
5054 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5056 disp = XEXP (disp, 0);
5060 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5061 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5063 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5064 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5065 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5067 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5068 if (strstr (sym_name, "$pb") != 0)
5073 if (GET_CODE (disp) != UNSPEC)
5076 switch (XINT (disp, 1))
5081 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5083 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5084 case UNSPEC_GOTTPOFF:
5085 case UNSPEC_GOTNTPOFF:
5086 case UNSPEC_INDNTPOFF:
5089 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5091 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5093 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5099 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5100 memory address for an instruction. The MODE argument is the machine mode
5101 for the MEM expression that wants to use this address.
5103 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5104 convert common non-canonical forms to canonical form so that they will
5108 legitimate_address_p (mode, addr, strict)
5109 enum machine_mode mode;
5113 struct ix86_address parts;
5114 rtx base, index, disp;
5115 HOST_WIDE_INT scale;
5116 const char *reason = NULL;
5117 rtx reason_rtx = NULL_RTX;
5119 if (TARGET_DEBUG_ADDR)
5122 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5123 GET_MODE_NAME (mode), strict);
5127 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5129 if (TARGET_DEBUG_ADDR)
5130 fprintf (stderr, "Success.\n");
5134 if (ix86_decompose_address (addr, &parts) <= 0)
5136 reason = "decomposition failed";
5141 index = parts.index;
5143 scale = parts.scale;
5145 /* Validate base register.
5147 Don't allow SUBREG's here, it can lead to spill failures when the base
5148 is one word out of a two word structure, which is represented internally
5156 if (GET_CODE (base) == SUBREG)
5157 reg = SUBREG_REG (base);
5161 if (GET_CODE (reg) != REG)
5163 reason = "base is not a register";
5167 if (GET_MODE (base) != Pmode)
5169 reason = "base is not in Pmode";
5173 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5174 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5176 reason = "base is not valid";
5181 /* Validate index register.
5183 Don't allow SUBREG's here, it can lead to spill failures when the index
5184 is one word out of a two word structure, which is represented internally
5192 if (GET_CODE (index) == SUBREG)
5193 reg = SUBREG_REG (index);
5197 if (GET_CODE (reg) != REG)
5199 reason = "index is not a register";
5203 if (GET_MODE (index) != Pmode)
5205 reason = "index is not in Pmode";
5209 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5210 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5212 reason = "index is not valid";
5217 /* Validate scale factor. */
5220 reason_rtx = GEN_INT (scale);
5223 reason = "scale without index";
5227 if (scale != 2 && scale != 4 && scale != 8)
5229 reason = "scale is not a valid multiplier";
5234 /* Validate displacement. */
5241 if (!x86_64_sign_extended_value (disp))
5243 reason = "displacement is out of range";
5249 if (GET_CODE (disp) == CONST_DOUBLE)
5251 reason = "displacement is a const_double";
5256 if (GET_CODE (disp) == CONST
5257 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5258 switch (XINT (XEXP (disp, 0), 1))
5262 case UNSPEC_GOTPCREL:
5265 goto is_legitimate_pic;
5267 case UNSPEC_GOTTPOFF:
5268 case UNSPEC_GOTNTPOFF:
5269 case UNSPEC_INDNTPOFF:
5275 reason = "invalid address unspec";
5279 else if (flag_pic && (SYMBOLIC_CONST (disp)
5281 && !machopic_operand_p (disp)
5286 if (TARGET_64BIT && (index || base))
5288 reason = "non-constant pic memory reference";
5291 if (! legitimate_pic_address_disp_p (disp))
5293 reason = "displacement is an invalid pic construct";
5297 /* This code used to verify that a symbolic pic displacement
5298 includes the pic_offset_table_rtx register.
5300 While this is good idea, unfortunately these constructs may
5301 be created by "adds using lea" optimization for incorrect
5310 This code is nonsensical, but results in addressing
5311 GOT table with pic_offset_table_rtx base. We can't
5312 just refuse it easily, since it gets matched by
5313 "addsi3" pattern, that later gets split to lea in the
5314 case output register differs from input. While this
5315 can be handled by separate addsi pattern for this case
5316 that never results in lea, this seems to be easier and
5317 correct fix for crash to disable this test. */
5319 else if (!CONSTANT_ADDRESS_P (disp))
5321 reason = "displacement is not constant";
5326 /* Everything looks valid. */
5327 if (TARGET_DEBUG_ADDR)
5328 fprintf (stderr, "Success.\n");
5332 if (TARGET_DEBUG_ADDR)
5334 fprintf (stderr, "Error: %s\n", reason);
5335 debug_rtx (reason_rtx);
5340 /* Return an unique alias set for the GOT. */
5342 static HOST_WIDE_INT
5343 ix86_GOT_alias_set ()
5345 static HOST_WIDE_INT set = -1;
5347 set = new_alias_set ();
5351 /* Return a legitimate reference for ORIG (an address) using the
5352 register REG. If REG is 0, a new pseudo is generated.
5354 There are two types of references that must be handled:
5356 1. Global data references must load the address from the GOT, via
5357 the PIC reg. An insn is emitted to do this load, and the reg is
5360 2. Static data references, constant pool addresses, and code labels
5361 compute the address as an offset from the GOT, whose base is in
5362 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5363 differentiate them from global data objects. The returned
5364 address is the PIC reg + an unspec constant.
5366 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5367 reg also appears in the address. */
5370 legitimize_pic_address (orig, reg)
5380 reg = gen_reg_rtx (Pmode);
5381 /* Use the generic Mach-O PIC machinery. */
5382 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5385 if (local_symbolic_operand (addr, Pmode))
5387 /* In 64bit mode we can address such objects directly. */
5392 /* This symbol may be referenced via a displacement from the PIC
5393 base address (@GOTOFF). */
5395 if (reload_in_progress)
5396 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5397 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5398 new = gen_rtx_CONST (Pmode, new);
5399 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5403 emit_move_insn (reg, new);
5408 else if (GET_CODE (addr) == SYMBOL_REF)
5412 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5413 new = gen_rtx_CONST (Pmode, new);
5414 new = gen_rtx_MEM (Pmode, new);
5415 RTX_UNCHANGING_P (new) = 1;
5416 set_mem_alias_set (new, ix86_GOT_alias_set ());
5419 reg = gen_reg_rtx (Pmode);
5420 /* Use directly gen_movsi, otherwise the address is loaded
5421 into register for CSE. We don't want to CSE this addresses,
5422 instead we CSE addresses from the GOT table, so skip this. */
5423 emit_insn (gen_movsi (reg, new));
5428 /* This symbol must be referenced via a load from the
5429 Global Offset Table (@GOT). */
5431 if (reload_in_progress)
5432 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5433 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5434 new = gen_rtx_CONST (Pmode, new);
5435 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5436 new = gen_rtx_MEM (Pmode, new);
5437 RTX_UNCHANGING_P (new) = 1;
5438 set_mem_alias_set (new, ix86_GOT_alias_set ());
5441 reg = gen_reg_rtx (Pmode);
5442 emit_move_insn (reg, new);
5448 if (GET_CODE (addr) == CONST)
5450 addr = XEXP (addr, 0);
5452 /* We must match stuff we generate before. Assume the only
5453 unspecs that can get here are ours. Not that we could do
5454 anything with them anyway... */
5455 if (GET_CODE (addr) == UNSPEC
5456 || (GET_CODE (addr) == PLUS
5457 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5459 if (GET_CODE (addr) != PLUS)
5462 if (GET_CODE (addr) == PLUS)
5464 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5466 /* Check first to see if this is a constant offset from a @GOTOFF
5467 symbol reference. */
5468 if (local_symbolic_operand (op0, Pmode)
5469 && GET_CODE (op1) == CONST_INT)
5473 if (reload_in_progress)
5474 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5475 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5477 new = gen_rtx_PLUS (Pmode, new, op1);
5478 new = gen_rtx_CONST (Pmode, new);
5479 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5483 emit_move_insn (reg, new);
5489 /* ??? We need to limit offsets here. */
5494 base = legitimize_pic_address (XEXP (addr, 0), reg);
5495 new = legitimize_pic_address (XEXP (addr, 1),
5496 base == reg ? NULL_RTX : reg);
5498 if (GET_CODE (new) == CONST_INT)
5499 new = plus_constant (base, INTVAL (new));
5502 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5504 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5505 new = XEXP (new, 1);
5507 new = gen_rtx_PLUS (Pmode, base, new);
5516 ix86_encode_section_info (decl, first)
5518 int first ATTRIBUTE_UNUSED;
5520 bool local_p = (*targetm.binds_local_p) (decl);
5523 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5524 if (GET_CODE (rtl) != MEM)
5526 symbol = XEXP (rtl, 0);
5527 if (GET_CODE (symbol) != SYMBOL_REF)
5530 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5531 symbol so that we may access it directly in the GOT. */
5534 SYMBOL_REF_FLAG (symbol) = local_p;
5536 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5537 "local dynamic", "initial exec" or "local exec" TLS models
5540 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5542 const char *symbol_str;
5545 enum tls_model kind = decl_tls_model (decl);
5547 symbol_str = XSTR (symbol, 0);
5549 if (symbol_str[0] == '%')
5551 if (symbol_str[1] == tls_model_chars[kind])
5555 len = strlen (symbol_str) + 1;
5556 newstr = alloca (len + 2);
5559 newstr[1] = tls_model_chars[kind];
5560 memcpy (newstr + 2, symbol_str, len);
5562 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5566 /* Undo the above when printing symbol names. */
5569 ix86_strip_name_encoding (str)
5579 /* Load the thread pointer into a register. */
5582 get_thread_pointer ()
5586 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5587 tp = gen_rtx_MEM (Pmode, tp);
5588 RTX_UNCHANGING_P (tp) = 1;
5589 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5590 tp = force_reg (Pmode, tp);
5595 /* Try machine-dependent ways of modifying an illegitimate address
5596 to be legitimate. If we find one, return the new, valid address.
5597 This macro is used in only one place: `memory_address' in explow.c.
5599 OLDX is the address as it was before break_out_memory_refs was called.
5600 In some cases it is useful to look at this to decide what needs to be done.
5602 MODE and WIN are passed so that this macro can use
5603 GO_IF_LEGITIMATE_ADDRESS.
5605 It is always safe for this macro to do nothing. It exists to recognize
5606 opportunities to optimize the output.
5608 For the 80386, we handle X+REG by loading X into a register R and
5609 using R+REG. R will go in a general reg and indexing will be used.
5610 However, if REG is a broken-out memory address or multiplication,
5611 nothing needs to be done because REG can certainly go in a general reg.
5613 When -fpic is used, special handling is needed for symbolic references.
5614 See comments by legitimize_pic_address in i386.c for details. */
5617 legitimize_address (x, oldx, mode)
5619 register rtx oldx ATTRIBUTE_UNUSED;
5620 enum machine_mode mode;
5625 if (TARGET_DEBUG_ADDR)
5627 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5628 GET_MODE_NAME (mode));
5632 log = tls_symbolic_operand (x, mode);
5635 rtx dest, base, off, pic;
5639 case TLS_MODEL_GLOBAL_DYNAMIC:
5640 dest = gen_reg_rtx (Pmode);
5641 emit_insn (gen_tls_global_dynamic (dest, x));
5644 case TLS_MODEL_LOCAL_DYNAMIC:
5645 base = gen_reg_rtx (Pmode);
5646 emit_insn (gen_tls_local_dynamic_base (base));
5648 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5649 off = gen_rtx_CONST (Pmode, off);
5651 return gen_rtx_PLUS (Pmode, base, off);
5653 case TLS_MODEL_INITIAL_EXEC:
5656 if (reload_in_progress)
5657 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5658 pic = pic_offset_table_rtx;
5660 else if (!TARGET_GNU_TLS)
5662 pic = gen_reg_rtx (Pmode);
5663 emit_insn (gen_set_got (pic));
5668 base = get_thread_pointer ();
5670 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5673 : flag_pic ? UNSPEC_GOTNTPOFF
5674 : UNSPEC_INDNTPOFF);
5675 off = gen_rtx_CONST (Pmode, off);
5676 if (flag_pic || !TARGET_GNU_TLS)
5677 off = gen_rtx_PLUS (Pmode, pic, off);
5678 off = gen_rtx_MEM (Pmode, off);
5679 RTX_UNCHANGING_P (off) = 1;
5680 set_mem_alias_set (off, ix86_GOT_alias_set ());
5681 dest = gen_reg_rtx (Pmode);
5685 emit_move_insn (dest, off);
5686 return gen_rtx_PLUS (Pmode, base, dest);
5689 emit_insn (gen_subsi3 (dest, base, off));
5692 case TLS_MODEL_LOCAL_EXEC:
5693 base = get_thread_pointer ();
5695 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5696 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5697 off = gen_rtx_CONST (Pmode, off);
5700 return gen_rtx_PLUS (Pmode, base, off);
5703 dest = gen_reg_rtx (Pmode);
5704 emit_insn (gen_subsi3 (dest, base, off));
5715 if (flag_pic && SYMBOLIC_CONST (x))
5716 return legitimize_pic_address (x, 0);
5718 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5719 if (GET_CODE (x) == ASHIFT
5720 && GET_CODE (XEXP (x, 1)) == CONST_INT
5721 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5724 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5725 GEN_INT (1 << log));
5728 if (GET_CODE (x) == PLUS)
5730 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5732 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5733 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5734 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5737 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5738 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5739 GEN_INT (1 << log));
5742 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5743 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5744 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5747 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5748 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5749 GEN_INT (1 << log));
5752 /* Put multiply first if it isn't already. */
5753 if (GET_CODE (XEXP (x, 1)) == MULT)
5755 rtx tmp = XEXP (x, 0);
5756 XEXP (x, 0) = XEXP (x, 1);
5761 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5762 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5763 created by virtual register instantiation, register elimination, and
5764 similar optimizations. */
5765 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5768 x = gen_rtx_PLUS (Pmode,
5769 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5770 XEXP (XEXP (x, 1), 0)),
5771 XEXP (XEXP (x, 1), 1));
5775 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5776 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5777 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5778 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5779 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5780 && CONSTANT_P (XEXP (x, 1)))
5783 rtx other = NULL_RTX;
5785 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5787 constant = XEXP (x, 1);
5788 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5790 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5792 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5793 other = XEXP (x, 1);
5801 x = gen_rtx_PLUS (Pmode,
5802 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5803 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5804 plus_constant (other, INTVAL (constant)));
5808 if (changed && legitimate_address_p (mode, x, FALSE))
5811 if (GET_CODE (XEXP (x, 0)) == MULT)
5814 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5817 if (GET_CODE (XEXP (x, 1)) == MULT)
5820 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5824 && GET_CODE (XEXP (x, 1)) == REG
5825 && GET_CODE (XEXP (x, 0)) == REG)
5828 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5831 x = legitimize_pic_address (x, 0);
5834 if (changed && legitimate_address_p (mode, x, FALSE))
5837 if (GET_CODE (XEXP (x, 0)) == REG)
5839 register rtx temp = gen_reg_rtx (Pmode);
5840 register rtx val = force_operand (XEXP (x, 1), temp);
5842 emit_move_insn (temp, val);
5848 else if (GET_CODE (XEXP (x, 1)) == REG)
5850 register rtx temp = gen_reg_rtx (Pmode);
5851 register rtx val = force_operand (XEXP (x, 0), temp);
5853 emit_move_insn (temp, val);
5863 /* Print an integer constant expression in assembler syntax. Addition
5864 and subtraction are the only arithmetic that may appear in these
5865 expressions. FILE is the stdio stream to write to, X is the rtx, and
5866 CODE is the operand print code from the output string. */
5869 output_pic_addr_const (file, x, code)
5876 switch (GET_CODE (x))
5886 assemble_name (file, XSTR (x, 0));
5887 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5888 fputs ("@PLT", file);
5895 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5896 assemble_name (asm_out_file, buf);
5900 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5904 /* This used to output parentheses around the expression,
5905 but that does not work on the 386 (either ATT or BSD assembler). */
5906 output_pic_addr_const (file, XEXP (x, 0), code);
5910 if (GET_MODE (x) == VOIDmode)
5912 /* We can use %d if the number is <32 bits and positive. */
5913 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5914 fprintf (file, "0x%lx%08lx",
5915 (unsigned long) CONST_DOUBLE_HIGH (x),
5916 (unsigned long) CONST_DOUBLE_LOW (x));
5918 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5921 /* We can't handle floating point constants;
5922 PRINT_OPERAND must handle them. */
5923 output_operand_lossage ("floating constant misused");
5927 /* Some assemblers need integer constants to appear first. */
5928 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5930 output_pic_addr_const (file, XEXP (x, 0), code);
5932 output_pic_addr_const (file, XEXP (x, 1), code);
5934 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5936 output_pic_addr_const (file, XEXP (x, 1), code);
5938 output_pic_addr_const (file, XEXP (x, 0), code);
5946 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5947 output_pic_addr_const (file, XEXP (x, 0), code);
5949 output_pic_addr_const (file, XEXP (x, 1), code);
5951 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5955 if (XVECLEN (x, 0) != 1)
5957 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5958 switch (XINT (x, 1))
5961 fputs ("@GOT", file);
5964 fputs ("@GOTOFF", file);
5966 case UNSPEC_GOTPCREL:
5967 fputs ("@GOTPCREL(%rip)", file);
5969 case UNSPEC_GOTTPOFF:
5970 /* FIXME: This might be @TPOFF in Sun ld too. */
5971 fputs ("@GOTTPOFF", file);
5974 fputs ("@TPOFF", file);
5977 fputs ("@NTPOFF", file);
5980 fputs ("@DTPOFF", file);
5982 case UNSPEC_GOTNTPOFF:
5983 fputs ("@GOTNTPOFF", file);
5985 case UNSPEC_INDNTPOFF:
5986 fputs ("@INDNTPOFF", file);
5989 output_operand_lossage ("invalid UNSPEC as operand");
5995 output_operand_lossage ("invalid expression as operand");
5999 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6000 We need to handle our special PIC relocations. */
6003 i386_dwarf_output_addr_const (file, x)
6008 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6012 fprintf (file, "%s", ASM_LONG);
6015 output_pic_addr_const (file, x, '\0');
6017 output_addr_const (file, x);
6021 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6022 We need to emit DTP-relative relocations. */
6025 i386_output_dwarf_dtprel (file, size, x)
6033 fputs (ASM_LONG, file);
6037 fputs (ASM_QUAD, file);
6044 output_addr_const (file, x);
6045 fputs ("@DTPOFF", file);
6048 /* In the name of slightly smaller debug output, and to cater to
6049 general assembler losage, recognize PIC+GOTOFF and turn it back
6050 into a direct symbol reference. */
6053 i386_simplify_dwarf_addr (orig_x)
6058 if (GET_CODE (x) == MEM)
6063 if (GET_CODE (x) != CONST
6064 || GET_CODE (XEXP (x, 0)) != UNSPEC
6065 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6066 || GET_CODE (orig_x) != MEM)
6068 return XVECEXP (XEXP (x, 0), 0, 0);
6071 if (GET_CODE (x) != PLUS
6072 || GET_CODE (XEXP (x, 1)) != CONST)
6075 if (GET_CODE (XEXP (x, 0)) == REG
6076 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6077 /* %ebx + GOT/GOTOFF */
6079 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6081 /* %ebx + %reg * scale + GOT/GOTOFF */
6083 if (GET_CODE (XEXP (y, 0)) == REG
6084 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6086 else if (GET_CODE (XEXP (y, 1)) == REG
6087 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6091 if (GET_CODE (y) != REG
6092 && GET_CODE (y) != MULT
6093 && GET_CODE (y) != ASHIFT)
6099 x = XEXP (XEXP (x, 1), 0);
6100 if (GET_CODE (x) == UNSPEC
6101 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6102 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6105 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6106 return XVECEXP (x, 0, 0);
6109 if (GET_CODE (x) == PLUS
6110 && GET_CODE (XEXP (x, 0)) == UNSPEC
6111 && GET_CODE (XEXP (x, 1)) == CONST_INT
6112 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6113 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6114 && GET_CODE (orig_x) != MEM)))
6116 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6118 return gen_rtx_PLUS (Pmode, y, x);
6126 put_condition_code (code, mode, reverse, fp, file)
6128 enum machine_mode mode;
6134 if (mode == CCFPmode || mode == CCFPUmode)
6136 enum rtx_code second_code, bypass_code;
6137 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6138 if (bypass_code != NIL || second_code != NIL)
6140 code = ix86_fp_compare_code_to_integer (code);
6144 code = reverse_condition (code);
6155 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6160 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6161 Those same assemblers have the same but opposite losage on cmov. */
6164 suffix = fp ? "nbe" : "a";
6167 if (mode == CCNOmode || mode == CCGOCmode)
6169 else if (mode == CCmode || mode == CCGCmode)
6180 if (mode == CCNOmode || mode == CCGOCmode)
6182 else if (mode == CCmode || mode == CCGCmode)
6191 suffix = fp ? "nb" : "ae";
6194 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6204 suffix = fp ? "u" : "p";
6207 suffix = fp ? "nu" : "np";
6212 fputs (suffix, file);
6216 print_reg (x, code, file)
6221 if (REGNO (x) == ARG_POINTER_REGNUM
6222 || REGNO (x) == FRAME_POINTER_REGNUM
6223 || REGNO (x) == FLAGS_REG
6224 || REGNO (x) == FPSR_REG)
6227 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6230 if (code == 'w' || MMX_REG_P (x))
6232 else if (code == 'b')
6234 else if (code == 'k')
6236 else if (code == 'q')
6238 else if (code == 'y')
6240 else if (code == 'h')
6243 code = GET_MODE_SIZE (GET_MODE (x));
6245 /* Irritatingly, AMD extended registers use different naming convention
6246 from the normal registers. */
6247 if (REX_INT_REG_P (x))
6254 error ("extended registers have no high halves");
6257 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6260 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6263 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6266 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6269 error ("unsupported operand size for extended register");
6277 if (STACK_TOP_P (x))
6279 fputs ("st(0)", file);
6286 if (! ANY_FP_REG_P (x))
6287 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6291 fputs (hi_reg_name[REGNO (x)], file);
6294 fputs (qi_reg_name[REGNO (x)], file);
6297 fputs (qi_high_reg_name[REGNO (x)], file);
6304 /* Locate some local-dynamic symbol still in use by this function
6305 so that we can print its name in some tls_local_dynamic_base
6309 get_some_local_dynamic_name ()
6313 if (cfun->machine->some_ld_name)
6314 return cfun->machine->some_ld_name;
6316 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6318 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6319 return cfun->machine->some_ld_name;
6325 get_some_local_dynamic_name_1 (px, data)
6327 void *data ATTRIBUTE_UNUSED;
6331 if (GET_CODE (x) == SYMBOL_REF
6332 && local_dynamic_symbolic_operand (x, Pmode))
6334 cfun->machine->some_ld_name = XSTR (x, 0);
6342 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6343 C -- print opcode suffix for set/cmov insn.
6344 c -- like C, but print reversed condition
6345 F,f -- likewise, but for floating-point.
6346 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6348 R -- print the prefix for register names.
6349 z -- print the opcode suffix for the size of the current operand.
6350 * -- print a star (in certain assembler syntax)
6351 A -- print an absolute memory reference.
6352 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6353 s -- print a shift double count, followed by the assemblers argument
6355 b -- print the QImode name of the register for the indicated operand.
6356 %b0 would print %al if operands[0] is reg 0.
6357 w -- likewise, print the HImode name of the register.
6358 k -- likewise, print the SImode name of the register.
6359 q -- likewise, print the DImode name of the register.
6360 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6361 y -- print "st(0)" instead of "st" as a register.
6362 D -- print condition for SSE cmp instruction.
6363 P -- if PIC, print an @PLT suffix.
6364 X -- don't print any sort of PIC '@' suffix for a symbol.
6365 & -- print some in-use local-dynamic symbol name.
6369 print_operand (file, x, code)
6379 if (ASSEMBLER_DIALECT == ASM_ATT)
6384 assemble_name (file, get_some_local_dynamic_name ());
6388 if (ASSEMBLER_DIALECT == ASM_ATT)
6390 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6392 /* Intel syntax. For absolute addresses, registers should not
6393 be surrounded by braces. */
6394 if (GET_CODE (x) != REG)
6397 PRINT_OPERAND (file, x, 0);
6405 PRINT_OPERAND (file, x, 0);
6410 if (ASSEMBLER_DIALECT == ASM_ATT)
6415 if (ASSEMBLER_DIALECT == ASM_ATT)
6420 if (ASSEMBLER_DIALECT == ASM_ATT)
6425 if (ASSEMBLER_DIALECT == ASM_ATT)
6430 if (ASSEMBLER_DIALECT == ASM_ATT)
6435 if (ASSEMBLER_DIALECT == ASM_ATT)
6440 /* 387 opcodes don't get size suffixes if the operands are
6442 if (STACK_REG_P (x))
6445 /* Likewise if using Intel opcodes. */
6446 if (ASSEMBLER_DIALECT == ASM_INTEL)
6449 /* This is the size of op from size of operand. */
6450 switch (GET_MODE_SIZE (GET_MODE (x)))
6453 #ifdef HAVE_GAS_FILDS_FISTS
6459 if (GET_MODE (x) == SFmode)
6474 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6476 #ifdef GAS_MNEMONICS
6502 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6504 PRINT_OPERAND (file, x, 0);
6510 /* Little bit of braindamage here. The SSE compare instructions
6511 does use completely different names for the comparisons that the
6512 fp conditional moves. */
6513 switch (GET_CODE (x))
6528 fputs ("unord", file);
6532 fputs ("neq", file);
6536 fputs ("nlt", file);
6540 fputs ("nle", file);
6543 fputs ("ord", file);
6551 #ifdef CMOV_SUN_AS_SYNTAX
6552 if (ASSEMBLER_DIALECT == ASM_ATT)
6554 switch (GET_MODE (x))
6556 case HImode: putc ('w', file); break;
6558 case SFmode: putc ('l', file); break;
6560 case DFmode: putc ('q', file); break;
6568 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6571 #ifdef CMOV_SUN_AS_SYNTAX
6572 if (ASSEMBLER_DIALECT == ASM_ATT)
6575 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6578 /* Like above, but reverse condition */
6580 /* Check to see if argument to %c is really a constant
6581 and not a condition code which needs to be reversed. */
6582 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6584 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6587 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6590 #ifdef CMOV_SUN_AS_SYNTAX
6591 if (ASSEMBLER_DIALECT == ASM_ATT)
6594 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6600 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6603 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6606 int pred_val = INTVAL (XEXP (x, 0));
6608 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6609 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6611 int taken = pred_val > REG_BR_PROB_BASE / 2;
6612 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6614 /* Emit hints only in the case default branch prediction
6615 heruistics would fail. */
6616 if (taken != cputaken)
6618 /* We use 3e (DS) prefix for taken branches and
6619 2e (CS) prefix for not taken branches. */
6621 fputs ("ds ; ", file);
6623 fputs ("cs ; ", file);
6630 output_operand_lossage ("invalid operand code `%c'", code);
6634 if (GET_CODE (x) == REG)
6636 PRINT_REG (x, code, file);
6639 else if (GET_CODE (x) == MEM)
6641 /* No `byte ptr' prefix for call instructions. */
6642 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6645 switch (GET_MODE_SIZE (GET_MODE (x)))
6647 case 1: size = "BYTE"; break;
6648 case 2: size = "WORD"; break;
6649 case 4: size = "DWORD"; break;
6650 case 8: size = "QWORD"; break;
6651 case 12: size = "XWORD"; break;
6652 case 16: size = "XMMWORD"; break;
6657 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6660 else if (code == 'w')
6662 else if (code == 'k')
6666 fputs (" PTR ", file);
6670 if (flag_pic && CONSTANT_ADDRESS_P (x))
6671 output_pic_addr_const (file, x, code);
6672 /* Avoid (%rip) for call operands. */
6673 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6674 && GET_CODE (x) != CONST_INT)
6675 output_addr_const (file, x);
6676 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6677 output_operand_lossage ("invalid constraints for operand");
6682 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6687 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6688 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6690 if (ASSEMBLER_DIALECT == ASM_ATT)
6692 fprintf (file, "0x%lx", l);
6695 /* These float cases don't actually occur as immediate operands. */
6696 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6701 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6702 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6703 fprintf (file, "%s", dstr);
6706 else if (GET_CODE (x) == CONST_DOUBLE
6707 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6712 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6713 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6714 fprintf (file, "%s", dstr);
6721 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6723 if (ASSEMBLER_DIALECT == ASM_ATT)
6726 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6727 || GET_CODE (x) == LABEL_REF)
6729 if (ASSEMBLER_DIALECT == ASM_ATT)
6732 fputs ("OFFSET FLAT:", file);
6735 if (GET_CODE (x) == CONST_INT)
6736 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6738 output_pic_addr_const (file, x, code);
6740 output_addr_const (file, x);
6744 /* Print a memory operand whose address is ADDR. */
6747 print_operand_address (file, addr)
6751 struct ix86_address parts;
6752 rtx base, index, disp;
6755 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6757 if (ASSEMBLER_DIALECT == ASM_INTEL)
6758 fputs ("DWORD PTR ", file);
6759 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6761 fputs ("gs:0", file);
6765 if (! ix86_decompose_address (addr, &parts))
6769 index = parts.index;
6771 scale = parts.scale;
6773 if (!base && !index)
6775 /* Displacement only requires special attention. */
6777 if (GET_CODE (disp) == CONST_INT)
6779 if (ASSEMBLER_DIALECT == ASM_INTEL)
6781 if (USER_LABEL_PREFIX[0] == 0)
6783 fputs ("ds:", file);
6785 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6788 output_pic_addr_const (file, addr, 0);
6790 output_addr_const (file, addr);
6792 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6794 && (GET_CODE (addr) == SYMBOL_REF
6795 || GET_CODE (addr) == LABEL_REF
6796 || (GET_CODE (addr) == CONST
6797 && GET_CODE (XEXP (addr, 0)) == PLUS
6798 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6799 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
6800 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6801 fputs ("(%rip)", file);
6805 if (ASSEMBLER_DIALECT == ASM_ATT)
6810 output_pic_addr_const (file, disp, 0);
6811 else if (GET_CODE (disp) == LABEL_REF)
6812 output_asm_label (disp);
6814 output_addr_const (file, disp);
6819 PRINT_REG (base, 0, file);
6823 PRINT_REG (index, 0, file);
6825 fprintf (file, ",%d", scale);
6831 rtx offset = NULL_RTX;
6835 /* Pull out the offset of a symbol; print any symbol itself. */
6836 if (GET_CODE (disp) == CONST
6837 && GET_CODE (XEXP (disp, 0)) == PLUS
6838 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6840 offset = XEXP (XEXP (disp, 0), 1);
6841 disp = gen_rtx_CONST (VOIDmode,
6842 XEXP (XEXP (disp, 0), 0));
6846 output_pic_addr_const (file, disp, 0);
6847 else if (GET_CODE (disp) == LABEL_REF)
6848 output_asm_label (disp);
6849 else if (GET_CODE (disp) == CONST_INT)
6852 output_addr_const (file, disp);
6858 PRINT_REG (base, 0, file);
6861 if (INTVAL (offset) >= 0)
6863 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6867 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6874 PRINT_REG (index, 0, file);
6876 fprintf (file, "*%d", scale);
6884 output_addr_const_extra (file, x)
6890 if (GET_CODE (x) != UNSPEC)
6893 op = XVECEXP (x, 0, 0);
6894 switch (XINT (x, 1))
6896 case UNSPEC_GOTTPOFF:
6897 output_addr_const (file, op);
6898 /* FIXME: This might be @TPOFF in Sun ld. */
6899 fputs ("@GOTTPOFF", file);
6902 output_addr_const (file, op);
6903 fputs ("@TPOFF", file);
6906 output_addr_const (file, op);
6907 fputs ("@NTPOFF", file);
6910 output_addr_const (file, op);
6911 fputs ("@DTPOFF", file);
6913 case UNSPEC_GOTNTPOFF:
6914 output_addr_const (file, op);
6915 fputs ("@GOTNTPOFF", file);
6917 case UNSPEC_INDNTPOFF:
6918 output_addr_const (file, op);
6919 fputs ("@INDNTPOFF", file);
6929 /* Split one or more DImode RTL references into pairs of SImode
6930 references. The RTL can be REG, offsettable MEM, integer constant, or
6931 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6932 split and "num" is its length. lo_half and hi_half are output arrays
6933 that parallel "operands". */
6936 split_di (operands, num, lo_half, hi_half)
6939 rtx lo_half[], hi_half[];
6943 rtx op = operands[num];
6945 /* simplify_subreg refuse to split volatile memory addresses,
6946 but we still have to handle it. */
6947 if (GET_CODE (op) == MEM)
6949 lo_half[num] = adjust_address (op, SImode, 0);
6950 hi_half[num] = adjust_address (op, SImode, 4);
6954 lo_half[num] = simplify_gen_subreg (SImode, op,
6955 GET_MODE (op) == VOIDmode
6956 ? DImode : GET_MODE (op), 0);
6957 hi_half[num] = simplify_gen_subreg (SImode, op,
6958 GET_MODE (op) == VOIDmode
6959 ? DImode : GET_MODE (op), 4);
6963 /* Split one or more TImode RTL references into pairs of SImode
6964 references. The RTL can be REG, offsettable MEM, integer constant, or
6965 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6966 split and "num" is its length. lo_half and hi_half are output arrays
6967 that parallel "operands". */
6970 split_ti (operands, num, lo_half, hi_half)
6973 rtx lo_half[], hi_half[];
6977 rtx op = operands[num];
6979 /* simplify_subreg refuse to split volatile memory addresses, but we
6980 still have to handle it. */
6981 if (GET_CODE (op) == MEM)
6983 lo_half[num] = adjust_address (op, DImode, 0);
6984 hi_half[num] = adjust_address (op, DImode, 8);
6988 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6989 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6994 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6995 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6996 is the expression of the binary operation. The output may either be
6997 emitted here, or returned to the caller, like all output_* functions.
6999 There is no guarantee that the operands are the same mode, as they
7000 might be within FLOAT or FLOAT_EXTEND expressions. */
7002 #ifndef SYSV386_COMPAT
7003 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7004 wants to fix the assemblers because that causes incompatibility
7005 with gcc. No-one wants to fix gcc because that causes
7006 incompatibility with assemblers... You can use the option of
7007 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7008 #define SYSV386_COMPAT 1
7012 output_387_binary_op (insn, operands)
7016 static char buf[30];
7019 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7021 #ifdef ENABLE_CHECKING
7022 /* Even if we do not want to check the inputs, this documents input
7023 constraints. Which helps in understanding the following code. */
7024 if (STACK_REG_P (operands[0])
7025 && ((REG_P (operands[1])
7026 && REGNO (operands[0]) == REGNO (operands[1])
7027 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7028 || (REG_P (operands[2])
7029 && REGNO (operands[0]) == REGNO (operands[2])
7030 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7031 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7037 switch (GET_CODE (operands[3]))
7040 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7041 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7049 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7050 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7058 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7059 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7067 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7068 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7082 if (GET_MODE (operands[0]) == SFmode)
7083 strcat (buf, "ss\t{%2, %0|%0, %2}");
7085 strcat (buf, "sd\t{%2, %0|%0, %2}");
7090 switch (GET_CODE (operands[3]))
7094 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7096 rtx temp = operands[2];
7097 operands[2] = operands[1];
7101 /* know operands[0] == operands[1]. */
7103 if (GET_CODE (operands[2]) == MEM)
7109 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7111 if (STACK_TOP_P (operands[0]))
7112 /* How is it that we are storing to a dead operand[2]?
7113 Well, presumably operands[1] is dead too. We can't
7114 store the result to st(0) as st(0) gets popped on this
7115 instruction. Instead store to operands[2] (which I
7116 think has to be st(1)). st(1) will be popped later.
7117 gcc <= 2.8.1 didn't have this check and generated
7118 assembly code that the Unixware assembler rejected. */
7119 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7121 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7125 if (STACK_TOP_P (operands[0]))
7126 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7128 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7133 if (GET_CODE (operands[1]) == MEM)
7139 if (GET_CODE (operands[2]) == MEM)
7145 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7148 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7149 derived assemblers, confusingly reverse the direction of
7150 the operation for fsub{r} and fdiv{r} when the
7151 destination register is not st(0). The Intel assembler
7152 doesn't have this brain damage. Read !SYSV386_COMPAT to
7153 figure out what the hardware really does. */
7154 if (STACK_TOP_P (operands[0]))
7155 p = "{p\t%0, %2|rp\t%2, %0}";
7157 p = "{rp\t%2, %0|p\t%0, %2}";
7159 if (STACK_TOP_P (operands[0]))
7160 /* As above for fmul/fadd, we can't store to st(0). */
7161 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7163 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7168 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7171 if (STACK_TOP_P (operands[0]))
7172 p = "{rp\t%0, %1|p\t%1, %0}";
7174 p = "{p\t%1, %0|rp\t%0, %1}";
7176 if (STACK_TOP_P (operands[0]))
7177 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7179 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7184 if (STACK_TOP_P (operands[0]))
7186 if (STACK_TOP_P (operands[1]))
7187 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7189 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7192 else if (STACK_TOP_P (operands[1]))
7195 p = "{\t%1, %0|r\t%0, %1}";
7197 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7203 p = "{r\t%2, %0|\t%0, %2}";
7205 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7218 /* Output code to initialize control word copies used by
7219 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7220 is set to control word rounding downwards. */
7222 emit_i387_cw_initialization (normal, round_down)
7223 rtx normal, round_down;
7225 rtx reg = gen_reg_rtx (HImode);
7227 emit_insn (gen_x86_fnstcw_1 (normal));
7228 emit_move_insn (reg, normal);
7229 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7231 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7233 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7234 emit_move_insn (round_down, reg);
7237 /* Output code for INSN to convert a float to a signed int. OPERANDS
7238 are the insn operands. The output may be [HSD]Imode and the input
7239 operand may be [SDX]Fmode. */
7242 output_fix_trunc (insn, operands)
7246 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7247 int dimode_p = GET_MODE (operands[0]) == DImode;
7249 /* Jump through a hoop or two for DImode, since the hardware has no
7250 non-popping instruction. We used to do this a different way, but
7251 that was somewhat fragile and broke with post-reload splitters. */
7252 if (dimode_p && !stack_top_dies)
7253 output_asm_insn ("fld\t%y1", operands);
7255 if (!STACK_TOP_P (operands[1]))
7258 if (GET_CODE (operands[0]) != MEM)
7261 output_asm_insn ("fldcw\t%3", operands);
7262 if (stack_top_dies || dimode_p)
7263 output_asm_insn ("fistp%z0\t%0", operands);
7265 output_asm_insn ("fist%z0\t%0", operands);
7266 output_asm_insn ("fldcw\t%2", operands);
7271 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7272 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7273 when fucom should be used. */
7276 output_fp_compare (insn, operands, eflags_p, unordered_p)
7279 int eflags_p, unordered_p;
7282 rtx cmp_op0 = operands[0];
7283 rtx cmp_op1 = operands[1];
7284 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7289 cmp_op1 = operands[2];
7293 if (GET_MODE (operands[0]) == SFmode)
7295 return "ucomiss\t{%1, %0|%0, %1}";
7297 return "comiss\t{%1, %0|%0, %y}";
7300 return "ucomisd\t{%1, %0|%0, %1}";
7302 return "comisd\t{%1, %0|%0, %y}";
7305 if (! STACK_TOP_P (cmp_op0))
7308 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7310 if (STACK_REG_P (cmp_op1)
7312 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7313 && REGNO (cmp_op1) != FIRST_STACK_REG)
7315 /* If both the top of the 387 stack dies, and the other operand
7316 is also a stack register that dies, then this must be a
7317 `fcompp' float compare */
7321 /* There is no double popping fcomi variant. Fortunately,
7322 eflags is immune from the fstp's cc clobbering. */
7324 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7326 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7334 return "fucompp\n\tfnstsw\t%0";
7336 return "fcompp\n\tfnstsw\t%0";
7349 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7351 static const char * const alt[24] =
7363 "fcomi\t{%y1, %0|%0, %y1}",
7364 "fcomip\t{%y1, %0|%0, %y1}",
7365 "fucomi\t{%y1, %0|%0, %y1}",
7366 "fucomip\t{%y1, %0|%0, %y1}",
7373 "fcom%z2\t%y2\n\tfnstsw\t%0",
7374 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7375 "fucom%z2\t%y2\n\tfnstsw\t%0",
7376 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7378 "ficom%z2\t%y2\n\tfnstsw\t%0",
7379 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7387 mask = eflags_p << 3;
7388 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7389 mask |= unordered_p << 1;
7390 mask |= stack_top_dies;
7403 ix86_output_addr_vec_elt (file, value)
7407 const char *directive = ASM_LONG;
7412 directive = ASM_QUAD;
7418 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7422 ix86_output_addr_diff_elt (file, value, rel)
7427 fprintf (file, "%s%s%d-%s%d\n",
7428 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7429 else if (HAVE_AS_GOTOFF_IN_DATA)
7430 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7432 else if (TARGET_MACHO)
7433 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7434 machopic_function_base_name () + 1);
7437 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7438 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7441 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7445 ix86_expand_clear (dest)
7450 /* We play register width games, which are only valid after reload. */
7451 if (!reload_completed)
7454 /* Avoid HImode and its attendant prefix byte. */
7455 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7456 dest = gen_rtx_REG (SImode, REGNO (dest));
7458 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7460 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7461 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7463 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7464 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7470 /* X is an unchanging MEM. If it is a constant pool reference, return
7471 the constant pool rtx, else NULL. */
7474 maybe_get_pool_constant (x)
7481 if (GET_CODE (x) != PLUS)
7483 if (XEXP (x, 0) != pic_offset_table_rtx)
7486 if (GET_CODE (x) != CONST)
7489 if (GET_CODE (x) != UNSPEC)
7491 if (XINT (x, 1) != UNSPEC_GOTOFF)
7493 x = XVECEXP (x, 0, 0);
7496 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7497 return get_pool_constant (x);
7503 ix86_expand_move (mode, operands)
7504 enum machine_mode mode;
7507 int strict = (reload_in_progress || reload_completed);
7508 rtx insn, op0, op1, tmp;
7513 /* ??? We have a slight problem. We need to say that tls symbols are
7514 not legitimate constants so that reload does not helpfully reload
7515 these constants from a REG_EQUIV, which we cannot handle. (Recall
7516 that general- and local-dynamic address resolution requires a
7519 However, if we say that tls symbols are not legitimate constants,
7520 then emit_move_insn helpfully drop them into the constant pool.
7522 It is far easier to work around emit_move_insn than reload. Recognize
7523 the MEM that we would have created and extract the symbol_ref. */
7526 && GET_CODE (op1) == MEM
7527 && RTX_UNCHANGING_P (op1))
7529 tmp = maybe_get_pool_constant (op1);
7530 /* Note that we only care about symbolic constants here, which
7531 unlike CONST_INT will always have a proper mode. */
7532 if (tmp && GET_MODE (tmp) == Pmode)
7536 if (tls_symbolic_operand (op1, Pmode))
7538 op1 = legitimize_address (op1, op1, VOIDmode);
7539 if (GET_CODE (op0) == MEM)
7541 tmp = gen_reg_rtx (mode);
7542 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7546 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7551 rtx temp = ((reload_in_progress
7552 || ((op0 && GET_CODE (op0) == REG)
7554 ? op0 : gen_reg_rtx (Pmode));
7555 op1 = machopic_indirect_data_reference (op1, temp);
7556 op1 = machopic_legitimize_pic_address (op1, mode,
7557 temp == op1 ? 0 : temp);
7561 if (MACHOPIC_INDIRECT)
7562 op1 = machopic_indirect_data_reference (op1, 0);
7566 insn = gen_rtx_SET (VOIDmode, op0, op1);
7570 #endif /* TARGET_MACHO */
7571 if (GET_CODE (op0) == MEM)
7572 op1 = force_reg (Pmode, op1);
7576 if (GET_CODE (temp) != REG)
7577 temp = gen_reg_rtx (Pmode);
7578 temp = legitimize_pic_address (op1, temp);
7586 if (GET_CODE (op0) == MEM
7587 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7588 || !push_operand (op0, mode))
7589 && GET_CODE (op1) == MEM)
7590 op1 = force_reg (mode, op1);
7592 if (push_operand (op0, mode)
7593 && ! general_no_elim_operand (op1, mode))
7594 op1 = copy_to_mode_reg (mode, op1);
7596 /* Force large constants in 64bit compilation into register
7597 to get them CSEed. */
7598 if (TARGET_64BIT && mode == DImode
7599 && immediate_operand (op1, mode)
7600 && !x86_64_zero_extended_value (op1)
7601 && !register_operand (op0, mode)
7602 && optimize && !reload_completed && !reload_in_progress)
7603 op1 = copy_to_mode_reg (mode, op1);
7605 if (FLOAT_MODE_P (mode))
7607 /* If we are loading a floating point constant to a register,
7608 force the value to memory now, since we'll get better code
7609 out the back end. */
7613 else if (GET_CODE (op1) == CONST_DOUBLE
7614 && register_operand (op0, mode))
7615 op1 = validize_mem (force_const_mem (mode, op1));
7619 insn = gen_rtx_SET (VOIDmode, op0, op1);
7625 ix86_expand_vector_move (mode, operands)
7626 enum machine_mode mode;
7629 /* Force constants other than zero into memory. We do not know how
7630 the instructions used to build constants modify the upper 64 bits
7631 of the register, once we have that information we may be able
7632 to handle some of them more efficiently. */
7633 if ((reload_in_progress | reload_completed) == 0
7634 && register_operand (operands[0], mode)
7635 && CONSTANT_P (operands[1]))
7637 rtx addr = gen_reg_rtx (Pmode);
7638 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7639 operands[1] = gen_rtx_MEM (mode, addr);
7642 /* Make operand1 a register if it isn't already. */
7643 if ((reload_in_progress | reload_completed) == 0
7644 && !register_operand (operands[0], mode)
7645 && !register_operand (operands[1], mode)
7646 && operands[1] != CONST0_RTX (mode))
7648 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7649 emit_move_insn (operands[0], temp);
7653 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7656 /* Attempt to expand a binary operator. Make the expansion closer to the
7657 actual machine, then just general_operand, which will allow 3 separate
7658 memory references (one output, two input) in a single insn. */
7661 ix86_expand_binary_operator (code, mode, operands)
7663 enum machine_mode mode;
7666 int matching_memory;
7667 rtx src1, src2, dst, op, clob;
7673 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7674 if (GET_RTX_CLASS (code) == 'c'
7675 && (rtx_equal_p (dst, src2)
7676 || immediate_operand (src1, mode)))
7683 /* If the destination is memory, and we do not have matching source
7684 operands, do things in registers. */
7685 matching_memory = 0;
7686 if (GET_CODE (dst) == MEM)
7688 if (rtx_equal_p (dst, src1))
7689 matching_memory = 1;
7690 else if (GET_RTX_CLASS (code) == 'c'
7691 && rtx_equal_p (dst, src2))
7692 matching_memory = 2;
7694 dst = gen_reg_rtx (mode);
7697 /* Both source operands cannot be in memory. */
7698 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7700 if (matching_memory != 2)
7701 src2 = force_reg (mode, src2);
7703 src1 = force_reg (mode, src1);
7706 /* If the operation is not commutable, source 1 cannot be a constant
7707 or non-matching memory. */
7708 if ((CONSTANT_P (src1)
7709 || (!matching_memory && GET_CODE (src1) == MEM))
7710 && GET_RTX_CLASS (code) != 'c')
7711 src1 = force_reg (mode, src1);
7713 /* If optimizing, copy to regs to improve CSE */
7714 if (optimize && ! no_new_pseudos)
7716 if (GET_CODE (dst) == MEM)
7717 dst = gen_reg_rtx (mode);
7718 if (GET_CODE (src1) == MEM)
7719 src1 = force_reg (mode, src1);
7720 if (GET_CODE (src2) == MEM)
7721 src2 = force_reg (mode, src2);
7724 /* Emit the instruction. */
7726 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7727 if (reload_in_progress)
7729 /* Reload doesn't know about the flags register, and doesn't know that
7730 it doesn't want to clobber it. We can only do this with PLUS. */
7737 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7738 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7741 /* Fix up the destination if needed. */
7742 if (dst != operands[0])
7743 emit_move_insn (operands[0], dst);
7746 /* Return TRUE or FALSE depending on whether the binary operator meets the
7747 appropriate constraints. */
7750 ix86_binary_operator_ok (code, mode, operands)
7752 enum machine_mode mode ATTRIBUTE_UNUSED;
7755 /* Both source operands cannot be in memory. */
7756 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7758 /* If the operation is not commutable, source 1 cannot be a constant. */
7759 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7761 /* If the destination is memory, we must have a matching source operand. */
7762 if (GET_CODE (operands[0]) == MEM
7763 && ! (rtx_equal_p (operands[0], operands[1])
7764 || (GET_RTX_CLASS (code) == 'c'
7765 && rtx_equal_p (operands[0], operands[2]))))
7767 /* If the operation is not commutable and the source 1 is memory, we must
7768 have a matching destination. */
7769 if (GET_CODE (operands[1]) == MEM
7770 && GET_RTX_CLASS (code) != 'c'
7771 && ! rtx_equal_p (operands[0], operands[1]))
7776 /* Attempt to expand a unary operator. Make the expansion closer to the
7777 actual machine, then just general_operand, which will allow 2 separate
7778 memory references (one output, one input) in a single insn. */
7781 ix86_expand_unary_operator (code, mode, operands)
7783 enum machine_mode mode;
7786 int matching_memory;
7787 rtx src, dst, op, clob;
7792 /* If the destination is memory, and we do not have matching source
7793 operands, do things in registers. */
7794 matching_memory = 0;
7795 if (GET_CODE (dst) == MEM)
7797 if (rtx_equal_p (dst, src))
7798 matching_memory = 1;
7800 dst = gen_reg_rtx (mode);
7803 /* When source operand is memory, destination must match. */
7804 if (!matching_memory && GET_CODE (src) == MEM)
7805 src = force_reg (mode, src);
7807 /* If optimizing, copy to regs to improve CSE */
7808 if (optimize && ! no_new_pseudos)
7810 if (GET_CODE (dst) == MEM)
7811 dst = gen_reg_rtx (mode);
7812 if (GET_CODE (src) == MEM)
7813 src = force_reg (mode, src);
7816 /* Emit the instruction. */
7818 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7819 if (reload_in_progress || code == NOT)
7821 /* Reload doesn't know about the flags register, and doesn't know that
7822 it doesn't want to clobber it. */
7829 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7830 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7833 /* Fix up the destination if needed. */
7834 if (dst != operands[0])
7835 emit_move_insn (operands[0], dst);
7838 /* Return TRUE or FALSE depending on whether the unary operator meets the
7839 appropriate constraints. */
7842 ix86_unary_operator_ok (code, mode, operands)
7843 enum rtx_code code ATTRIBUTE_UNUSED;
7844 enum machine_mode mode ATTRIBUTE_UNUSED;
7845 rtx operands[2] ATTRIBUTE_UNUSED;
7847 /* If one of operands is memory, source and destination must match. */
7848 if ((GET_CODE (operands[0]) == MEM
7849 || GET_CODE (operands[1]) == MEM)
7850 && ! rtx_equal_p (operands[0], operands[1]))
7855 /* Return TRUE or FALSE depending on whether the first SET in INSN
7856 has source and destination with matching CC modes, and that the
7857 CC mode is at least as constrained as REQ_MODE. */
7860 ix86_match_ccmode (insn, req_mode)
7862 enum machine_mode req_mode;
7865 enum machine_mode set_mode;
7867 set = PATTERN (insn);
7868 if (GET_CODE (set) == PARALLEL)
7869 set = XVECEXP (set, 0, 0);
7870 if (GET_CODE (set) != SET)
7872 if (GET_CODE (SET_SRC (set)) != COMPARE)
7875 set_mode = GET_MODE (SET_DEST (set));
7879 if (req_mode != CCNOmode
7880 && (req_mode != CCmode
7881 || XEXP (SET_SRC (set), 1) != const0_rtx))
7885 if (req_mode == CCGCmode)
7889 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7893 if (req_mode == CCZmode)
7903 return (GET_MODE (SET_SRC (set)) == set_mode);
7906 /* Generate insn patterns to do an integer compare of OPERANDS. */
7909 ix86_expand_int_compare (code, op0, op1)
7913 enum machine_mode cmpmode;
7916 cmpmode = SELECT_CC_MODE (code, op0, op1);
7917 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7919 /* This is very simple, but making the interface the same as in the
7920 FP case makes the rest of the code easier. */
7921 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7922 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7924 /* Return the test that should be put into the flags user, i.e.
7925 the bcc, scc, or cmov instruction. */
7926 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7929 /* Figure out whether to use ordered or unordered fp comparisons.
7930 Return the appropriate mode to use. */
7933 ix86_fp_compare_mode (code)
7934 enum rtx_code code ATTRIBUTE_UNUSED;
7936 /* ??? In order to make all comparisons reversible, we do all comparisons
7937 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7938 all forms trapping and nontrapping comparisons, we can make inequality
7939 comparisons trapping again, since it results in better code when using
7940 FCOM based compares. */
7941 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7945 ix86_cc_mode (code, op0, op1)
7949 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7950 return ix86_fp_compare_mode (code);
7953 /* Only zero flag is needed. */
7955 case NE: /* ZF!=0 */
7957 /* Codes needing carry flag. */
7958 case GEU: /* CF=0 */
7959 case GTU: /* CF=0 & ZF=0 */
7960 case LTU: /* CF=1 */
7961 case LEU: /* CF=1 | ZF=1 */
7963 /* Codes possibly doable only with sign flag when
7964 comparing against zero. */
7965 case GE: /* SF=OF or SF=0 */
7966 case LT: /* SF<>OF or SF=1 */
7967 if (op1 == const0_rtx)
7970 /* For other cases Carry flag is not required. */
7972 /* Codes doable only with sign flag when comparing
7973 against zero, but we miss jump instruction for it
7974 so we need to use relational tests agains overflow
7975 that thus needs to be zero. */
7976 case GT: /* ZF=0 & SF=OF */
7977 case LE: /* ZF=1 | SF<>OF */
7978 if (op1 == const0_rtx)
7982 /* strcmp pattern do (use flags) and combine may ask us for proper
7991 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7994 ix86_use_fcomi_compare (code)
7995 enum rtx_code code ATTRIBUTE_UNUSED;
7997 enum rtx_code swapped_code = swap_condition (code);
7998 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7999 || (ix86_fp_comparison_cost (swapped_code)
8000 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8003 /* Swap, force into registers, or otherwise massage the two operands
8004 to a fp comparison. The operands are updated in place; the new
8005 comparsion code is returned. */
8007 static enum rtx_code
8008 ix86_prepare_fp_compare_args (code, pop0, pop1)
8012 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8013 rtx op0 = *pop0, op1 = *pop1;
8014 enum machine_mode op_mode = GET_MODE (op0);
8015 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8017 /* All of the unordered compare instructions only work on registers.
8018 The same is true of the XFmode compare instructions. The same is
8019 true of the fcomi compare instructions. */
8022 && (fpcmp_mode == CCFPUmode
8023 || op_mode == XFmode
8024 || op_mode == TFmode
8025 || ix86_use_fcomi_compare (code)))
8027 op0 = force_reg (op_mode, op0);
8028 op1 = force_reg (op_mode, op1);
8032 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8033 things around if they appear profitable, otherwise force op0
8036 if (standard_80387_constant_p (op0) == 0
8037 || (GET_CODE (op0) == MEM
8038 && ! (standard_80387_constant_p (op1) == 0
8039 || GET_CODE (op1) == MEM)))
8042 tmp = op0, op0 = op1, op1 = tmp;
8043 code = swap_condition (code);
8046 if (GET_CODE (op0) != REG)
8047 op0 = force_reg (op_mode, op0);
8049 if (CONSTANT_P (op1))
8051 if (standard_80387_constant_p (op1))
8052 op1 = force_reg (op_mode, op1);
8054 op1 = validize_mem (force_const_mem (op_mode, op1));
8058 /* Try to rearrange the comparison to make it cheaper. */
8059 if (ix86_fp_comparison_cost (code)
8060 > ix86_fp_comparison_cost (swap_condition (code))
8061 && (GET_CODE (op1) == REG || !no_new_pseudos))
8064 tmp = op0, op0 = op1, op1 = tmp;
8065 code = swap_condition (code);
8066 if (GET_CODE (op0) != REG)
8067 op0 = force_reg (op_mode, op0);
8075 /* Convert comparison codes we use to represent FP comparison to integer
8076 code that will result in proper branch. Return UNKNOWN if no such code
8078 static enum rtx_code
8079 ix86_fp_compare_code_to_integer (code)
8109 /* Split comparison code CODE into comparisons we can do using branch
8110 instructions. BYPASS_CODE is comparison code for branch that will
8111 branch around FIRST_CODE and SECOND_CODE. If some of branches
8112 is not required, set value to NIL.
8113 We never require more than two branches. */
8115 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8116 enum rtx_code code, *bypass_code, *first_code, *second_code;
8122 /* The fcomi comparison sets flags as follows:
8132 case GT: /* GTU - CF=0 & ZF=0 */
8133 case GE: /* GEU - CF=0 */
8134 case ORDERED: /* PF=0 */
8135 case UNORDERED: /* PF=1 */
8136 case UNEQ: /* EQ - ZF=1 */
8137 case UNLT: /* LTU - CF=1 */
8138 case UNLE: /* LEU - CF=1 | ZF=1 */
8139 case LTGT: /* EQ - ZF=0 */
8141 case LT: /* LTU - CF=1 - fails on unordered */
8143 *bypass_code = UNORDERED;
8145 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8147 *bypass_code = UNORDERED;
8149 case EQ: /* EQ - ZF=1 - fails on unordered */
8151 *bypass_code = UNORDERED;
8153 case NE: /* NE - ZF=0 - fails on unordered */
8155 *second_code = UNORDERED;
8157 case UNGE: /* GEU - CF=0 - fails on unordered */
8159 *second_code = UNORDERED;
8161 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8163 *second_code = UNORDERED;
8168 if (!TARGET_IEEE_FP)
8175 /* Return cost of comparison done fcom + arithmetics operations on AX.
8176 All following functions do use number of instructions as an cost metrics.
8177 In future this should be tweaked to compute bytes for optimize_size and
8178 take into account performance of various instructions on various CPUs. */
8180 ix86_fp_comparison_arithmetics_cost (code)
8183 if (!TARGET_IEEE_FP)
8185 /* The cost of code output by ix86_expand_fp_compare. */
8213 /* Return cost of comparison done using fcomi operation.
8214 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8216 ix86_fp_comparison_fcomi_cost (code)
8219 enum rtx_code bypass_code, first_code, second_code;
8220 /* Return arbitarily high cost when instruction is not supported - this
8221 prevents gcc from using it. */
8224 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8225 return (bypass_code != NIL || second_code != NIL) + 2;
8228 /* Return cost of comparison done using sahf operation.
8229 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8231 ix86_fp_comparison_sahf_cost (code)
8234 enum rtx_code bypass_code, first_code, second_code;
8235 /* Return arbitarily high cost when instruction is not preferred - this
8236 avoids gcc from using it. */
8237 if (!TARGET_USE_SAHF && !optimize_size)
8239 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8240 return (bypass_code != NIL || second_code != NIL) + 3;
8243 /* Compute cost of the comparison done using any method.
8244 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8246 ix86_fp_comparison_cost (code)
8249 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8252 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8253 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8255 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8256 if (min > sahf_cost)
8258 if (min > fcomi_cost)
8263 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8266 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8268 rtx op0, op1, scratch;
8272 enum machine_mode fpcmp_mode, intcmp_mode;
8274 int cost = ix86_fp_comparison_cost (code);
8275 enum rtx_code bypass_code, first_code, second_code;
8277 fpcmp_mode = ix86_fp_compare_mode (code);
8278 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8281 *second_test = NULL_RTX;
8283 *bypass_test = NULL_RTX;
8285 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8287 /* Do fcomi/sahf based test when profitable. */
8288 if ((bypass_code == NIL || bypass_test)
8289 && (second_code == NIL || second_test)
8290 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8294 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8295 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8301 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8302 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8304 scratch = gen_reg_rtx (HImode);
8305 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8306 emit_insn (gen_x86_sahf_1 (scratch));
8309 /* The FP codes work out to act like unsigned. */
8310 intcmp_mode = fpcmp_mode;
8312 if (bypass_code != NIL)
8313 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8314 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8316 if (second_code != NIL)
8317 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8318 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8323 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8324 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8325 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8327 scratch = gen_reg_rtx (HImode);
8328 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8330 /* In the unordered case, we have to check C2 for NaN's, which
8331 doesn't happen to work out to anything nice combination-wise.
8332 So do some bit twiddling on the value we've got in AH to come
8333 up with an appropriate set of condition codes. */
8335 intcmp_mode = CCNOmode;
8340 if (code == GT || !TARGET_IEEE_FP)
8342 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8347 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8348 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8349 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8350 intcmp_mode = CCmode;
8356 if (code == LT && TARGET_IEEE_FP)
8358 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8359 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8360 intcmp_mode = CCmode;
8365 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8371 if (code == GE || !TARGET_IEEE_FP)
8373 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8378 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8379 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8386 if (code == LE && TARGET_IEEE_FP)
8388 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8389 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8390 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8391 intcmp_mode = CCmode;
8396 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8402 if (code == EQ && TARGET_IEEE_FP)
8404 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8405 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8406 intcmp_mode = CCmode;
8411 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8418 if (code == NE && TARGET_IEEE_FP)
8420 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8421 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8427 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8433 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8437 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8446 /* Return the test that should be put into the flags user, i.e.
8447 the bcc, scc, or cmov instruction. */
8448 return gen_rtx_fmt_ee (code, VOIDmode,
8449 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8454 ix86_expand_compare (code, second_test, bypass_test)
8456 rtx *second_test, *bypass_test;
8459 op0 = ix86_compare_op0;
8460 op1 = ix86_compare_op1;
8463 *second_test = NULL_RTX;
8465 *bypass_test = NULL_RTX;
8467 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8468 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8469 second_test, bypass_test);
8471 ret = ix86_expand_int_compare (code, op0, op1);
8476 /* Return true if the CODE will result in nontrivial jump sequence. */
8478 ix86_fp_jump_nontrivial_p (code)
8481 enum rtx_code bypass_code, first_code, second_code;
8484 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8485 return bypass_code != NIL || second_code != NIL;
8489 ix86_expand_branch (code, label)
8495 switch (GET_MODE (ix86_compare_op0))
8501 tmp = ix86_expand_compare (code, NULL, NULL);
8502 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8503 gen_rtx_LABEL_REF (VOIDmode, label),
8505 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8515 enum rtx_code bypass_code, first_code, second_code;
8517 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8520 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8522 /* Check whether we will use the natural sequence with one jump. If
8523 so, we can expand jump early. Otherwise delay expansion by
8524 creating compound insn to not confuse optimizers. */
8525 if (bypass_code == NIL && second_code == NIL
8528 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8529 gen_rtx_LABEL_REF (VOIDmode, label),
8534 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8535 ix86_compare_op0, ix86_compare_op1);
8536 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8537 gen_rtx_LABEL_REF (VOIDmode, label),
8539 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8541 use_fcomi = ix86_use_fcomi_compare (code);
8542 vec = rtvec_alloc (3 + !use_fcomi);
8543 RTVEC_ELT (vec, 0) = tmp;
8545 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8547 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8550 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8552 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8560 /* Expand DImode branch into multiple compare+branch. */
8562 rtx lo[2], hi[2], label2;
8563 enum rtx_code code1, code2, code3;
8565 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8567 tmp = ix86_compare_op0;
8568 ix86_compare_op0 = ix86_compare_op1;
8569 ix86_compare_op1 = tmp;
8570 code = swap_condition (code);
8572 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8573 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8575 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8576 avoid two branches. This costs one extra insn, so disable when
8577 optimizing for size. */
8579 if ((code == EQ || code == NE)
8581 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8586 if (hi[1] != const0_rtx)
8587 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8588 NULL_RTX, 0, OPTAB_WIDEN);
8591 if (lo[1] != const0_rtx)
8592 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8593 NULL_RTX, 0, OPTAB_WIDEN);
8595 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8596 NULL_RTX, 0, OPTAB_WIDEN);
8598 ix86_compare_op0 = tmp;
8599 ix86_compare_op1 = const0_rtx;
8600 ix86_expand_branch (code, label);
8604 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8605 op1 is a constant and the low word is zero, then we can just
8606 examine the high word. */
8608 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8611 case LT: case LTU: case GE: case GEU:
8612 ix86_compare_op0 = hi[0];
8613 ix86_compare_op1 = hi[1];
8614 ix86_expand_branch (code, label);
8620 /* Otherwise, we need two or three jumps. */
8622 label2 = gen_label_rtx ();
8625 code2 = swap_condition (code);
8626 code3 = unsigned_condition (code);
8630 case LT: case GT: case LTU: case GTU:
8633 case LE: code1 = LT; code2 = GT; break;
8634 case GE: code1 = GT; code2 = LT; break;
8635 case LEU: code1 = LTU; code2 = GTU; break;
8636 case GEU: code1 = GTU; code2 = LTU; break;
8638 case EQ: code1 = NIL; code2 = NE; break;
8639 case NE: code2 = NIL; break;
8647 * if (hi(a) < hi(b)) goto true;
8648 * if (hi(a) > hi(b)) goto false;
8649 * if (lo(a) < lo(b)) goto true;
8653 ix86_compare_op0 = hi[0];
8654 ix86_compare_op1 = hi[1];
8657 ix86_expand_branch (code1, label);
8659 ix86_expand_branch (code2, label2);
8661 ix86_compare_op0 = lo[0];
8662 ix86_compare_op1 = lo[1];
8663 ix86_expand_branch (code3, label);
8666 emit_label (label2);
8675 /* Split branch based on floating point condition. */
8677 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8679 rtx op1, op2, target1, target2, tmp;
8682 rtx label = NULL_RTX;
8684 int bypass_probability = -1, second_probability = -1, probability = -1;
8687 if (target2 != pc_rtx)
8690 code = reverse_condition_maybe_unordered (code);
8695 condition = ix86_expand_fp_compare (code, op1, op2,
8696 tmp, &second, &bypass);
8698 if (split_branch_probability >= 0)
8700 /* Distribute the probabilities across the jumps.
8701 Assume the BYPASS and SECOND to be always test
8703 probability = split_branch_probability;
8705 /* Value of 1 is low enough to make no need for probability
8706 to be updated. Later we may run some experiments and see
8707 if unordered values are more frequent in practice. */
8709 bypass_probability = 1;
8711 second_probability = 1;
8713 if (bypass != NULL_RTX)
8715 label = gen_label_rtx ();
8716 i = emit_jump_insn (gen_rtx_SET
8718 gen_rtx_IF_THEN_ELSE (VOIDmode,
8720 gen_rtx_LABEL_REF (VOIDmode,
8723 if (bypass_probability >= 0)
8725 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8726 GEN_INT (bypass_probability),
8729 i = emit_jump_insn (gen_rtx_SET
8731 gen_rtx_IF_THEN_ELSE (VOIDmode,
8732 condition, target1, target2)));
8733 if (probability >= 0)
8735 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8736 GEN_INT (probability),
8738 if (second != NULL_RTX)
8740 i = emit_jump_insn (gen_rtx_SET
8742 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8744 if (second_probability >= 0)
8746 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8747 GEN_INT (second_probability),
8750 if (label != NULL_RTX)
8755 ix86_expand_setcc (code, dest)
8759 rtx ret, tmp, tmpreg;
8760 rtx second_test, bypass_test;
8762 if (GET_MODE (ix86_compare_op0) == DImode
8764 return 0; /* FAIL */
8766 if (GET_MODE (dest) != QImode)
8769 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8770 PUT_MODE (ret, QImode);
8775 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8776 if (bypass_test || second_test)
8778 rtx test = second_test;
8780 rtx tmp2 = gen_reg_rtx (QImode);
8787 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8789 PUT_MODE (test, QImode);
8790 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8793 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8795 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8798 return 1; /* DONE */
8802 ix86_expand_int_movcc (operands)
8805 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8806 rtx compare_seq, compare_op;
8807 rtx second_test, bypass_test;
8808 enum machine_mode mode = GET_MODE (operands[0]);
8810 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8811 In case comparsion is done with immediate, we can convert it to LTU or
8812 GEU by altering the integer. */
8814 if ((code == LEU || code == GTU)
8815 && GET_CODE (ix86_compare_op1) == CONST_INT
8817 && INTVAL (ix86_compare_op1) != -1
8818 /* For x86-64, the immediate field in the instruction is 32-bit
8819 signed, so we can't increment a DImode value above 0x7fffffff. */
8821 || GET_MODE (ix86_compare_op0) != DImode
8822 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8823 && GET_CODE (operands[2]) == CONST_INT
8824 && GET_CODE (operands[3]) == CONST_INT)
8830 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8831 GET_MODE (ix86_compare_op0));
8835 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8836 compare_seq = get_insns ();
8839 compare_code = GET_CODE (compare_op);
8841 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8842 HImode insns, we'd be swallowed in word prefix ops. */
8845 && (mode != DImode || TARGET_64BIT)
8846 && GET_CODE (operands[2]) == CONST_INT
8847 && GET_CODE (operands[3]) == CONST_INT)
8849 rtx out = operands[0];
8850 HOST_WIDE_INT ct = INTVAL (operands[2]);
8851 HOST_WIDE_INT cf = INTVAL (operands[3]);
8854 if ((compare_code == LTU || compare_code == GEU)
8855 && !second_test && !bypass_test)
8857 /* Detect overlap between destination and compare sources. */
8860 /* To simplify rest of code, restrict to the GEU case. */
8861 if (compare_code == LTU)
8866 compare_code = reverse_condition (compare_code);
8867 code = reverse_condition (code);
8871 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8872 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8873 tmp = gen_reg_rtx (mode);
8875 emit_insn (compare_seq);
8877 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8879 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8891 tmp = expand_simple_binop (mode, PLUS,
8893 tmp, 1, OPTAB_DIRECT);
8904 tmp = expand_simple_binop (mode, IOR,
8906 tmp, 1, OPTAB_DIRECT);
8908 else if (diff == -1 && ct)
8918 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8920 tmp = expand_simple_binop (mode, PLUS,
8922 tmp, 1, OPTAB_DIRECT);
8930 * andl cf - ct, dest
8940 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8943 tmp = expand_simple_binop (mode, AND,
8945 gen_int_mode (cf - ct, mode),
8946 tmp, 1, OPTAB_DIRECT);
8948 tmp = expand_simple_binop (mode, PLUS,
8950 tmp, 1, OPTAB_DIRECT);
8954 emit_move_insn (out, tmp);
8956 return 1; /* DONE */
8963 tmp = ct, ct = cf, cf = tmp;
8965 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8967 /* We may be reversing unordered compare to normal compare, that
8968 is not valid in general (we may convert non-trapping condition
8969 to trapping one), however on i386 we currently emit all
8970 comparisons unordered. */
8971 compare_code = reverse_condition_maybe_unordered (compare_code);
8972 code = reverse_condition_maybe_unordered (code);
8976 compare_code = reverse_condition (compare_code);
8977 code = reverse_condition (code);
8982 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8983 && GET_CODE (ix86_compare_op1) == CONST_INT)
8985 if (ix86_compare_op1 == const0_rtx
8986 && (code == LT || code == GE))
8987 compare_code = code;
8988 else if (ix86_compare_op1 == constm1_rtx)
8992 else if (code == GT)
8997 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8998 if (compare_code != NIL
8999 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9000 && (cf == -1 || ct == -1))
9002 /* If lea code below could be used, only optimize
9003 if it results in a 2 insn sequence. */
9005 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9006 || diff == 3 || diff == 5 || diff == 9)
9007 || (compare_code == LT && ct == -1)
9008 || (compare_code == GE && cf == -1))
9011 * notl op1 (if necessary)
9019 code = reverse_condition (code);
9022 out = emit_store_flag (out, code, ix86_compare_op0,
9023 ix86_compare_op1, VOIDmode, 0, -1);
9025 out = expand_simple_binop (mode, IOR,
9027 out, 1, OPTAB_DIRECT);
9028 if (out != operands[0])
9029 emit_move_insn (operands[0], out);
9031 return 1; /* DONE */
9035 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9036 || diff == 3 || diff == 5 || diff == 9)
9037 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9043 * lea cf(dest*(ct-cf)),dest
9047 * This also catches the degenerate setcc-only case.
9053 out = emit_store_flag (out, code, ix86_compare_op0,
9054 ix86_compare_op1, VOIDmode, 0, 1);
9057 /* On x86_64 the lea instruction operates on Pmode, so we need
9058 to get arithmetics done in proper mode to match. */
9065 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9069 tmp = gen_rtx_PLUS (mode, tmp, out1);
9075 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9079 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9085 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9086 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9088 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9089 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9093 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9095 if (out != operands[0])
9096 emit_move_insn (operands[0], copy_rtx (out));
9098 return 1; /* DONE */
9102 * General case: Jumpful:
9103 * xorl dest,dest cmpl op1, op2
9104 * cmpl op1, op2 movl ct, dest
9106 * decl dest movl cf, dest
9107 * andl (cf-ct),dest 1:
9112 * This is reasonably steep, but branch mispredict costs are
9113 * high on modern cpus, so consider failing only if optimizing
9116 * %%% Parameterize branch_cost on the tuning architecture, then
9117 * use that. The 80386 couldn't care less about mispredicts.
9120 if (!optimize_size && !TARGET_CMOVE)
9126 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9127 /* We may be reversing unordered compare to normal compare,
9128 that is not valid in general (we may convert non-trapping
9129 condition to trapping one), however on i386 we currently
9130 emit all comparisons unordered. */
9131 code = reverse_condition_maybe_unordered (code);
9134 code = reverse_condition (code);
9135 if (compare_code != NIL)
9136 compare_code = reverse_condition (compare_code);
9140 if (compare_code != NIL)
9142 /* notl op1 (if needed)
9147 For x < 0 (resp. x <= -1) there will be no notl,
9148 so if possible swap the constants to get rid of the
9150 True/false will be -1/0 while code below (store flag
9151 followed by decrement) is 0/-1, so the constants need
9152 to be exchanged once more. */
9154 if (compare_code == GE || !cf)
9156 code = reverse_condition (code);
9161 HOST_WIDE_INT tmp = cf;
9166 out = emit_store_flag (out, code, ix86_compare_op0,
9167 ix86_compare_op1, VOIDmode, 0, -1);
9171 out = emit_store_flag (out, code, ix86_compare_op0,
9172 ix86_compare_op1, VOIDmode, 0, 1);
9174 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9175 out, 1, OPTAB_DIRECT);
9178 out = expand_simple_binop (mode, AND, out,
9179 gen_int_mode (cf - ct, mode),
9180 out, 1, OPTAB_DIRECT);
9182 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9183 out, 1, OPTAB_DIRECT);
9184 if (out != operands[0])
9185 emit_move_insn (operands[0], out);
9187 return 1; /* DONE */
9193 /* Try a few things more with specific constants and a variable. */
9196 rtx var, orig_out, out, tmp;
9199 return 0; /* FAIL */
9201 /* If one of the two operands is an interesting constant, load a
9202 constant with the above and mask it in with a logical operation. */
9204 if (GET_CODE (operands[2]) == CONST_INT)
9207 if (INTVAL (operands[2]) == 0)
9208 operands[3] = constm1_rtx, op = and_optab;
9209 else if (INTVAL (operands[2]) == -1)
9210 operands[3] = const0_rtx, op = ior_optab;
9212 return 0; /* FAIL */
9214 else if (GET_CODE (operands[3]) == CONST_INT)
9217 if (INTVAL (operands[3]) == 0)
9218 operands[2] = constm1_rtx, op = and_optab;
9219 else if (INTVAL (operands[3]) == -1)
9220 operands[2] = const0_rtx, op = ior_optab;
9222 return 0; /* FAIL */
9225 return 0; /* FAIL */
9227 orig_out = operands[0];
9228 tmp = gen_reg_rtx (mode);
9231 /* Recurse to get the constant loaded. */
9232 if (ix86_expand_int_movcc (operands) == 0)
9233 return 0; /* FAIL */
9235 /* Mask in the interesting variable. */
9236 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9238 if (out != orig_out)
9239 emit_move_insn (orig_out, out);
9241 return 1; /* DONE */
9245 * For comparison with above,
9255 if (! nonimmediate_operand (operands[2], mode))
9256 operands[2] = force_reg (mode, operands[2]);
9257 if (! nonimmediate_operand (operands[3], mode))
9258 operands[3] = force_reg (mode, operands[3]);
9260 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9262 rtx tmp = gen_reg_rtx (mode);
9263 emit_move_insn (tmp, operands[3]);
9266 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9268 rtx tmp = gen_reg_rtx (mode);
9269 emit_move_insn (tmp, operands[2]);
9272 if (! register_operand (operands[2], VOIDmode)
9273 && ! register_operand (operands[3], VOIDmode))
9274 operands[2] = force_reg (mode, operands[2]);
9276 emit_insn (compare_seq);
9277 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9278 gen_rtx_IF_THEN_ELSE (mode,
9279 compare_op, operands[2],
9282 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9283 gen_rtx_IF_THEN_ELSE (mode,
9288 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9289 gen_rtx_IF_THEN_ELSE (mode,
9294 return 1; /* DONE */
9298 ix86_expand_fp_movcc (operands)
9303 rtx compare_op, second_test, bypass_test;
9305 /* For SF/DFmode conditional moves based on comparisons
9306 in same mode, we may want to use SSE min/max instructions. */
9307 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9308 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9309 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9310 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9312 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9313 /* We may be called from the post-reload splitter. */
9314 && (!REG_P (operands[0])
9315 || SSE_REG_P (operands[0])
9316 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9318 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9319 code = GET_CODE (operands[1]);
9321 /* See if we have (cross) match between comparison operands and
9322 conditional move operands. */
9323 if (rtx_equal_p (operands[2], op1))
9328 code = reverse_condition_maybe_unordered (code);
9330 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9332 /* Check for min operation. */
9335 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9336 if (memory_operand (op0, VOIDmode))
9337 op0 = force_reg (GET_MODE (operands[0]), op0);
9338 if (GET_MODE (operands[0]) == SFmode)
9339 emit_insn (gen_minsf3 (operands[0], op0, op1));
9341 emit_insn (gen_mindf3 (operands[0], op0, op1));
9344 /* Check for max operation. */
9347 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9348 if (memory_operand (op0, VOIDmode))
9349 op0 = force_reg (GET_MODE (operands[0]), op0);
9350 if (GET_MODE (operands[0]) == SFmode)
9351 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9353 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9357 /* Manage condition to be sse_comparison_operator. In case we are
9358 in non-ieee mode, try to canonicalize the destination operand
9359 to be first in the comparison - this helps reload to avoid extra
9361 if (!sse_comparison_operator (operands[1], VOIDmode)
9362 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9364 rtx tmp = ix86_compare_op0;
9365 ix86_compare_op0 = ix86_compare_op1;
9366 ix86_compare_op1 = tmp;
9367 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9368 VOIDmode, ix86_compare_op0,
9371 /* Similary try to manage result to be first operand of conditional
9372 move. We also don't support the NE comparison on SSE, so try to
9374 if ((rtx_equal_p (operands[0], operands[3])
9375 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9376 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9378 rtx tmp = operands[2];
9379 operands[2] = operands[3];
9381 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9382 (GET_CODE (operands[1])),
9383 VOIDmode, ix86_compare_op0,
9386 if (GET_MODE (operands[0]) == SFmode)
9387 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9388 operands[2], operands[3],
9389 ix86_compare_op0, ix86_compare_op1));
9391 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9392 operands[2], operands[3],
9393 ix86_compare_op0, ix86_compare_op1));
9397 /* The floating point conditional move instructions don't directly
9398 support conditions resulting from a signed integer comparison. */
9400 code = GET_CODE (operands[1]);
9401 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9403 /* The floating point conditional move instructions don't directly
9404 support signed integer comparisons. */
9406 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9408 if (second_test != NULL || bypass_test != NULL)
9410 tmp = gen_reg_rtx (QImode);
9411 ix86_expand_setcc (code, tmp);
9413 ix86_compare_op0 = tmp;
9414 ix86_compare_op1 = const0_rtx;
9415 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9417 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9419 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9420 emit_move_insn (tmp, operands[3]);
9423 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9425 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9426 emit_move_insn (tmp, operands[2]);
9430 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9431 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9436 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9437 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9442 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9443 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9451 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9452 works for floating pointer parameters and nonoffsetable memories.
9453 For pushes, it returns just stack offsets; the values will be saved
9454 in the right order. Maximally three parts are generated. */
9457 ix86_split_to_parts (operand, parts, mode)
9460 enum machine_mode mode;
9465 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9467 size = (GET_MODE_SIZE (mode) + 4) / 8;
9469 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9471 if (size < 2 || size > 3)
9474 /* Optimize constant pool reference to immediates. This is used by fp
9475 moves, that force all constants to memory to allow combining. */
9476 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9478 rtx tmp = maybe_get_pool_constant (operand);
9483 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9485 /* The only non-offsetable memories we handle are pushes. */
9486 if (! push_operand (operand, VOIDmode))
9489 operand = copy_rtx (operand);
9490 PUT_MODE (operand, Pmode);
9491 parts[0] = parts[1] = parts[2] = operand;
9493 else if (!TARGET_64BIT)
9496 split_di (&operand, 1, &parts[0], &parts[1]);
9499 if (REG_P (operand))
9501 if (!reload_completed)
9503 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9504 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9506 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9508 else if (offsettable_memref_p (operand))
9510 operand = adjust_address (operand, SImode, 0);
9512 parts[1] = adjust_address (operand, SImode, 4);
9514 parts[2] = adjust_address (operand, SImode, 8);
9516 else if (GET_CODE (operand) == CONST_DOUBLE)
9521 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9526 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9527 parts[2] = gen_int_mode (l[2], SImode);
9530 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9535 parts[1] = gen_int_mode (l[1], SImode);
9536 parts[0] = gen_int_mode (l[0], SImode);
9545 split_ti (&operand, 1, &parts[0], &parts[1]);
9546 if (mode == XFmode || mode == TFmode)
9548 if (REG_P (operand))
9550 if (!reload_completed)
9552 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9553 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9555 else if (offsettable_memref_p (operand))
9557 operand = adjust_address (operand, DImode, 0);
9559 parts[1] = adjust_address (operand, SImode, 8);
9561 else if (GET_CODE (operand) == CONST_DOUBLE)
9566 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9567 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9568 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9569 if (HOST_BITS_PER_WIDE_INT >= 64)
9572 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9573 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9576 parts[0] = immed_double_const (l[0], l[1], DImode);
9577 parts[1] = gen_int_mode (l[2], SImode);
9587 /* Emit insns to perform a move or push of DI, DF, and XF values.
9588 Return false when normal moves are needed; true when all required
9589 insns have been emitted. Operands 2-4 contain the input values
9590 int the correct order; operands 5-7 contain the output values. */
9593 ix86_split_long_move (operands)
9600 enum machine_mode mode = GET_MODE (operands[0]);
9602 /* The DFmode expanders may ask us to move double.
9603 For 64bit target this is single move. By hiding the fact
9604 here we simplify i386.md splitters. */
9605 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9607 /* Optimize constant pool reference to immediates. This is used by
9608 fp moves, that force all constants to memory to allow combining. */
9610 if (GET_CODE (operands[1]) == MEM
9611 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9612 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9613 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9614 if (push_operand (operands[0], VOIDmode))
9616 operands[0] = copy_rtx (operands[0]);
9617 PUT_MODE (operands[0], Pmode);
9620 operands[0] = gen_lowpart (DImode, operands[0]);
9621 operands[1] = gen_lowpart (DImode, operands[1]);
9622 emit_move_insn (operands[0], operands[1]);
9626 /* The only non-offsettable memory we handle is push. */
9627 if (push_operand (operands[0], VOIDmode))
9629 else if (GET_CODE (operands[0]) == MEM
9630 && ! offsettable_memref_p (operands[0]))
9633 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9634 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9636 /* When emitting push, take care for source operands on the stack. */
9637 if (push && GET_CODE (operands[1]) == MEM
9638 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9641 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9642 XEXP (part[1][2], 0));
9643 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9644 XEXP (part[1][1], 0));
9647 /* We need to do copy in the right order in case an address register
9648 of the source overlaps the destination. */
9649 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9651 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9653 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9656 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9659 /* Collision in the middle part can be handled by reordering. */
9660 if (collisions == 1 && nparts == 3
9661 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9664 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9665 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9668 /* If there are more collisions, we can't handle it by reordering.
9669 Do an lea to the last part and use only one colliding move. */
9670 else if (collisions > 1)
9673 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9674 XEXP (part[1][0], 0)));
9675 part[1][0] = change_address (part[1][0],
9676 TARGET_64BIT ? DImode : SImode,
9677 part[0][nparts - 1]);
9678 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9680 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9690 /* We use only first 12 bytes of TFmode value, but for pushing we
9691 are required to adjust stack as if we were pushing real 16byte
9693 if (mode == TFmode && !TARGET_64BIT)
9694 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9696 emit_move_insn (part[0][2], part[1][2]);
9701 /* In 64bit mode we don't have 32bit push available. In case this is
9702 register, it is OK - we will just use larger counterpart. We also
9703 retype memory - these comes from attempt to avoid REX prefix on
9704 moving of second half of TFmode value. */
9705 if (GET_MODE (part[1][1]) == SImode)
9707 if (GET_CODE (part[1][1]) == MEM)
9708 part[1][1] = adjust_address (part[1][1], DImode, 0);
9709 else if (REG_P (part[1][1]))
9710 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9713 if (GET_MODE (part[1][0]) == SImode)
9714 part[1][0] = part[1][1];
9717 emit_move_insn (part[0][1], part[1][1]);
9718 emit_move_insn (part[0][0], part[1][0]);
9722 /* Choose correct order to not overwrite the source before it is copied. */
9723 if ((REG_P (part[0][0])
9724 && REG_P (part[1][1])
9725 && (REGNO (part[0][0]) == REGNO (part[1][1])
9727 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9729 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9733 operands[2] = part[0][2];
9734 operands[3] = part[0][1];
9735 operands[4] = part[0][0];
9736 operands[5] = part[1][2];
9737 operands[6] = part[1][1];
9738 operands[7] = part[1][0];
9742 operands[2] = part[0][1];
9743 operands[3] = part[0][0];
9744 operands[5] = part[1][1];
9745 operands[6] = part[1][0];
9752 operands[2] = part[0][0];
9753 operands[3] = part[0][1];
9754 operands[4] = part[0][2];
9755 operands[5] = part[1][0];
9756 operands[6] = part[1][1];
9757 operands[7] = part[1][2];
9761 operands[2] = part[0][0];
9762 operands[3] = part[0][1];
9763 operands[5] = part[1][0];
9764 operands[6] = part[1][1];
9767 emit_move_insn (operands[2], operands[5]);
9768 emit_move_insn (operands[3], operands[6]);
9770 emit_move_insn (operands[4], operands[7]);
9776 ix86_split_ashldi (operands, scratch)
9777 rtx *operands, scratch;
9779 rtx low[2], high[2];
9782 if (GET_CODE (operands[2]) == CONST_INT)
9784 split_di (operands, 2, low, high);
9785 count = INTVAL (operands[2]) & 63;
9789 emit_move_insn (high[0], low[1]);
9790 emit_move_insn (low[0], const0_rtx);
9793 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9797 if (!rtx_equal_p (operands[0], operands[1]))
9798 emit_move_insn (operands[0], operands[1]);
9799 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9800 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9805 if (!rtx_equal_p (operands[0], operands[1]))
9806 emit_move_insn (operands[0], operands[1]);
9808 split_di (operands, 1, low, high);
9810 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9811 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9813 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9815 if (! no_new_pseudos)
9816 scratch = force_reg (SImode, const0_rtx);
9818 emit_move_insn (scratch, const0_rtx);
9820 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9824 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9829 ix86_split_ashrdi (operands, scratch)
9830 rtx *operands, scratch;
9832 rtx low[2], high[2];
9835 if (GET_CODE (operands[2]) == CONST_INT)
9837 split_di (operands, 2, low, high);
9838 count = INTVAL (operands[2]) & 63;
9842 emit_move_insn (low[0], high[1]);
9844 if (! reload_completed)
9845 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9848 emit_move_insn (high[0], low[0]);
9849 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9853 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9857 if (!rtx_equal_p (operands[0], operands[1]))
9858 emit_move_insn (operands[0], operands[1]);
9859 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9860 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9865 if (!rtx_equal_p (operands[0], operands[1]))
9866 emit_move_insn (operands[0], operands[1]);
9868 split_di (operands, 1, low, high);
9870 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9871 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9873 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9875 if (! no_new_pseudos)
9876 scratch = gen_reg_rtx (SImode);
9877 emit_move_insn (scratch, high[0]);
9878 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9879 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9883 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9888 ix86_split_lshrdi (operands, scratch)
9889 rtx *operands, scratch;
9891 rtx low[2], high[2];
9894 if (GET_CODE (operands[2]) == CONST_INT)
9896 split_di (operands, 2, low, high);
9897 count = INTVAL (operands[2]) & 63;
9901 emit_move_insn (low[0], high[1]);
9902 emit_move_insn (high[0], const0_rtx);
9905 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9909 if (!rtx_equal_p (operands[0], operands[1]))
9910 emit_move_insn (operands[0], operands[1]);
9911 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9912 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9917 if (!rtx_equal_p (operands[0], operands[1]))
9918 emit_move_insn (operands[0], operands[1]);
9920 split_di (operands, 1, low, high);
9922 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9923 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9925 /* Heh. By reversing the arguments, we can reuse this pattern. */
9926 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9928 if (! no_new_pseudos)
9929 scratch = force_reg (SImode, const0_rtx);
9931 emit_move_insn (scratch, const0_rtx);
9933 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9937 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9941 /* Helper function for the string operations below. Dest VARIABLE whether
9942 it is aligned to VALUE bytes. If true, jump to the label. */
9944 ix86_expand_aligntest (variable, value)
9948 rtx label = gen_label_rtx ();
9949 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9950 if (GET_MODE (variable) == DImode)
9951 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9953 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9954 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9959 /* Adjust COUNTER by the VALUE. */
9961 ix86_adjust_counter (countreg, value)
9963 HOST_WIDE_INT value;
9965 if (GET_MODE (countreg) == DImode)
9966 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9968 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9971 /* Zero extend possibly SImode EXP to Pmode register. */
9973 ix86_zero_extend_to_Pmode (exp)
9977 if (GET_MODE (exp) == VOIDmode)
9978 return force_reg (Pmode, exp);
9979 if (GET_MODE (exp) == Pmode)
9980 return copy_to_mode_reg (Pmode, exp);
9981 r = gen_reg_rtx (Pmode);
9982 emit_insn (gen_zero_extendsidi2 (r, exp));
9986 /* Expand string move (memcpy) operation. Use i386 string operations when
9987 profitable. expand_clrstr contains similar code. */
9989 ix86_expand_movstr (dst, src, count_exp, align_exp)
9990 rtx dst, src, count_exp, align_exp;
9992 rtx srcreg, destreg, countreg;
9993 enum machine_mode counter_mode;
9994 HOST_WIDE_INT align = 0;
9995 unsigned HOST_WIDE_INT count = 0;
10000 if (GET_CODE (align_exp) == CONST_INT)
10001 align = INTVAL (align_exp);
10003 /* This simple hack avoids all inlining code and simplifies code below. */
10004 if (!TARGET_ALIGN_STRINGOPS)
10007 if (GET_CODE (count_exp) == CONST_INT)
10008 count = INTVAL (count_exp);
10010 /* Figure out proper mode for counter. For 32bits it is always SImode,
10011 for 64bits use SImode when possible, otherwise DImode.
10012 Set count to number of bytes copied when known at compile time. */
10013 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10014 || x86_64_zero_extended_value (count_exp))
10015 counter_mode = SImode;
10017 counter_mode = DImode;
10019 if (counter_mode != SImode && counter_mode != DImode)
10022 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10023 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10025 emit_insn (gen_cld ());
10027 /* When optimizing for size emit simple rep ; movsb instruction for
10028 counts not divisible by 4. */
10030 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10032 countreg = ix86_zero_extend_to_Pmode (count_exp);
10034 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10035 destreg, srcreg, countreg));
10037 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10038 destreg, srcreg, countreg));
10041 /* For constant aligned (or small unaligned) copies use rep movsl
10042 followed by code copying the rest. For PentiumPro ensure 8 byte
10043 alignment to allow rep movsl acceleration. */
10045 else if (count != 0
10047 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10048 || optimize_size || count < (unsigned int) 64))
10050 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10051 if (count & ~(size - 1))
10053 countreg = copy_to_mode_reg (counter_mode,
10054 GEN_INT ((count >> (size == 4 ? 2 : 3))
10055 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10056 countreg = ix86_zero_extend_to_Pmode (countreg);
10060 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10061 destreg, srcreg, countreg));
10063 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10064 destreg, srcreg, countreg));
10067 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10068 destreg, srcreg, countreg));
10070 if (size == 8 && (count & 0x04))
10071 emit_insn (gen_strmovsi (destreg, srcreg));
10073 emit_insn (gen_strmovhi (destreg, srcreg));
10075 emit_insn (gen_strmovqi (destreg, srcreg));
10077 /* The generic code based on the glibc implementation:
10078 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10079 allowing accelerated copying there)
10080 - copy the data using rep movsl
10081 - copy the rest. */
10086 int desired_alignment = (TARGET_PENTIUMPRO
10087 && (count == 0 || count >= (unsigned int) 260)
10088 ? 8 : UNITS_PER_WORD);
10090 /* In case we don't know anything about the alignment, default to
10091 library version, since it is usually equally fast and result in
10093 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10099 if (TARGET_SINGLE_STRINGOP)
10100 emit_insn (gen_cld ());
10102 countreg2 = gen_reg_rtx (Pmode);
10103 countreg = copy_to_mode_reg (counter_mode, count_exp);
10105 /* We don't use loops to align destination and to copy parts smaller
10106 than 4 bytes, because gcc is able to optimize such code better (in
10107 the case the destination or the count really is aligned, gcc is often
10108 able to predict the branches) and also it is friendlier to the
10109 hardware branch prediction.
10111 Using loops is benefical for generic case, because we can
10112 handle small counts using the loops. Many CPUs (such as Athlon)
10113 have large REP prefix setup costs.
10115 This is quite costy. Maybe we can revisit this decision later or
10116 add some customizability to this code. */
10118 if (count == 0 && align < desired_alignment)
10120 label = gen_label_rtx ();
10121 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10122 LEU, 0, counter_mode, 1, label);
10126 rtx label = ix86_expand_aligntest (destreg, 1);
10127 emit_insn (gen_strmovqi (destreg, srcreg));
10128 ix86_adjust_counter (countreg, 1);
10129 emit_label (label);
10130 LABEL_NUSES (label) = 1;
10134 rtx label = ix86_expand_aligntest (destreg, 2);
10135 emit_insn (gen_strmovhi (destreg, srcreg));
10136 ix86_adjust_counter (countreg, 2);
10137 emit_label (label);
10138 LABEL_NUSES (label) = 1;
10140 if (align <= 4 && desired_alignment > 4)
10142 rtx label = ix86_expand_aligntest (destreg, 4);
10143 emit_insn (gen_strmovsi (destreg, srcreg));
10144 ix86_adjust_counter (countreg, 4);
10145 emit_label (label);
10146 LABEL_NUSES (label) = 1;
10149 if (label && desired_alignment > 4 && !TARGET_64BIT)
10151 emit_label (label);
10152 LABEL_NUSES (label) = 1;
10155 if (!TARGET_SINGLE_STRINGOP)
10156 emit_insn (gen_cld ());
10159 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10161 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10162 destreg, srcreg, countreg2));
10166 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10167 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10168 destreg, srcreg, countreg2));
10173 emit_label (label);
10174 LABEL_NUSES (label) = 1;
10176 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10177 emit_insn (gen_strmovsi (destreg, srcreg));
10178 if ((align <= 4 || count == 0) && TARGET_64BIT)
10180 rtx label = ix86_expand_aligntest (countreg, 4);
10181 emit_insn (gen_strmovsi (destreg, srcreg));
10182 emit_label (label);
10183 LABEL_NUSES (label) = 1;
10185 if (align > 2 && count != 0 && (count & 2))
10186 emit_insn (gen_strmovhi (destreg, srcreg));
10187 if (align <= 2 || count == 0)
10189 rtx label = ix86_expand_aligntest (countreg, 2);
10190 emit_insn (gen_strmovhi (destreg, srcreg));
10191 emit_label (label);
10192 LABEL_NUSES (label) = 1;
10194 if (align > 1 && count != 0 && (count & 1))
10195 emit_insn (gen_strmovqi (destreg, srcreg));
10196 if (align <= 1 || count == 0)
10198 rtx label = ix86_expand_aligntest (countreg, 1);
10199 emit_insn (gen_strmovqi (destreg, srcreg));
10200 emit_label (label);
10201 LABEL_NUSES (label) = 1;
10205 insns = get_insns ();
10208 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10213 /* Expand string clear operation (bzero). Use i386 string operations when
10214 profitable. expand_movstr contains similar code. */
10216 ix86_expand_clrstr (src, count_exp, align_exp)
10217 rtx src, count_exp, align_exp;
10219 rtx destreg, zeroreg, countreg;
10220 enum machine_mode counter_mode;
10221 HOST_WIDE_INT align = 0;
10222 unsigned HOST_WIDE_INT count = 0;
10224 if (GET_CODE (align_exp) == CONST_INT)
10225 align = INTVAL (align_exp);
10227 /* This simple hack avoids all inlining code and simplifies code below. */
10228 if (!TARGET_ALIGN_STRINGOPS)
10231 if (GET_CODE (count_exp) == CONST_INT)
10232 count = INTVAL (count_exp);
10233 /* Figure out proper mode for counter. For 32bits it is always SImode,
10234 for 64bits use SImode when possible, otherwise DImode.
10235 Set count to number of bytes copied when known at compile time. */
10236 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10237 || x86_64_zero_extended_value (count_exp))
10238 counter_mode = SImode;
10240 counter_mode = DImode;
10242 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10244 emit_insn (gen_cld ());
10246 /* When optimizing for size emit simple rep ; movsb instruction for
10247 counts not divisible by 4. */
10249 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10251 countreg = ix86_zero_extend_to_Pmode (count_exp);
10252 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10254 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10255 destreg, countreg));
10257 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10258 destreg, countreg));
10260 else if (count != 0
10262 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10263 || optimize_size || count < (unsigned int) 64))
10265 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10266 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10267 if (count & ~(size - 1))
10269 countreg = copy_to_mode_reg (counter_mode,
10270 GEN_INT ((count >> (size == 4 ? 2 : 3))
10271 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10272 countreg = ix86_zero_extend_to_Pmode (countreg);
10276 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10277 destreg, countreg));
10279 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10280 destreg, countreg));
10283 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10284 destreg, countreg));
10286 if (size == 8 && (count & 0x04))
10287 emit_insn (gen_strsetsi (destreg,
10288 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10290 emit_insn (gen_strsethi (destreg,
10291 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10293 emit_insn (gen_strsetqi (destreg,
10294 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10300 /* Compute desired alignment of the string operation. */
10301 int desired_alignment = (TARGET_PENTIUMPRO
10302 && (count == 0 || count >= (unsigned int) 260)
10303 ? 8 : UNITS_PER_WORD);
10305 /* In case we don't know anything about the alignment, default to
10306 library version, since it is usually equally fast and result in
10308 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10311 if (TARGET_SINGLE_STRINGOP)
10312 emit_insn (gen_cld ());
10314 countreg2 = gen_reg_rtx (Pmode);
10315 countreg = copy_to_mode_reg (counter_mode, count_exp);
10316 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10318 if (count == 0 && align < desired_alignment)
10320 label = gen_label_rtx ();
10321 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10322 LEU, 0, counter_mode, 1, label);
10326 rtx label = ix86_expand_aligntest (destreg, 1);
10327 emit_insn (gen_strsetqi (destreg,
10328 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10329 ix86_adjust_counter (countreg, 1);
10330 emit_label (label);
10331 LABEL_NUSES (label) = 1;
10335 rtx label = ix86_expand_aligntest (destreg, 2);
10336 emit_insn (gen_strsethi (destreg,
10337 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10338 ix86_adjust_counter (countreg, 2);
10339 emit_label (label);
10340 LABEL_NUSES (label) = 1;
10342 if (align <= 4 && desired_alignment > 4)
10344 rtx label = ix86_expand_aligntest (destreg, 4);
10345 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10346 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10348 ix86_adjust_counter (countreg, 4);
10349 emit_label (label);
10350 LABEL_NUSES (label) = 1;
10353 if (label && desired_alignment > 4 && !TARGET_64BIT)
10355 emit_label (label);
10356 LABEL_NUSES (label) = 1;
10360 if (!TARGET_SINGLE_STRINGOP)
10361 emit_insn (gen_cld ());
10364 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10366 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10367 destreg, countreg2));
10371 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10372 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10373 destreg, countreg2));
10377 emit_label (label);
10378 LABEL_NUSES (label) = 1;
10381 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10382 emit_insn (gen_strsetsi (destreg,
10383 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10384 if (TARGET_64BIT && (align <= 4 || count == 0))
10386 rtx label = ix86_expand_aligntest (countreg, 4);
10387 emit_insn (gen_strsetsi (destreg,
10388 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10389 emit_label (label);
10390 LABEL_NUSES (label) = 1;
10392 if (align > 2 && count != 0 && (count & 2))
10393 emit_insn (gen_strsethi (destreg,
10394 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10395 if (align <= 2 || count == 0)
10397 rtx label = ix86_expand_aligntest (countreg, 2);
10398 emit_insn (gen_strsethi (destreg,
10399 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10400 emit_label (label);
10401 LABEL_NUSES (label) = 1;
10403 if (align > 1 && count != 0 && (count & 1))
10404 emit_insn (gen_strsetqi (destreg,
10405 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10406 if (align <= 1 || count == 0)
10408 rtx label = ix86_expand_aligntest (countreg, 1);
10409 emit_insn (gen_strsetqi (destreg,
10410 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10411 emit_label (label);
10412 LABEL_NUSES (label) = 1;
10417 /* Expand strlen. */
10419 ix86_expand_strlen (out, src, eoschar, align)
10420 rtx out, src, eoschar, align;
10422 rtx addr, scratch1, scratch2, scratch3, scratch4;
10424 /* The generic case of strlen expander is long. Avoid it's
10425 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10427 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10428 && !TARGET_INLINE_ALL_STRINGOPS
10430 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10433 addr = force_reg (Pmode, XEXP (src, 0));
10434 scratch1 = gen_reg_rtx (Pmode);
10436 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10439 /* Well it seems that some optimizer does not combine a call like
10440 foo(strlen(bar), strlen(bar));
10441 when the move and the subtraction is done here. It does calculate
10442 the length just once when these instructions are done inside of
10443 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10444 often used and I use one fewer register for the lifetime of
10445 output_strlen_unroll() this is better. */
10447 emit_move_insn (out, addr);
10449 ix86_expand_strlensi_unroll_1 (out, align);
10451 /* strlensi_unroll_1 returns the address of the zero at the end of
10452 the string, like memchr(), so compute the length by subtracting
10453 the start address. */
10455 emit_insn (gen_subdi3 (out, out, addr));
10457 emit_insn (gen_subsi3 (out, out, addr));
10461 scratch2 = gen_reg_rtx (Pmode);
10462 scratch3 = gen_reg_rtx (Pmode);
10463 scratch4 = force_reg (Pmode, constm1_rtx);
10465 emit_move_insn (scratch3, addr);
10466 eoschar = force_reg (QImode, eoschar);
10468 emit_insn (gen_cld ());
10471 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10472 align, scratch4, scratch3));
10473 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10474 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10478 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10479 align, scratch4, scratch3));
10480 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10481 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10487 /* Expand the appropriate insns for doing strlen if not just doing
10490 out = result, initialized with the start address
10491 align_rtx = alignment of the address.
10492 scratch = scratch register, initialized with the startaddress when
10493 not aligned, otherwise undefined
10495 This is just the body. It needs the initialisations mentioned above and
10496 some address computing at the end. These things are done in i386.md. */
10499 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10500 rtx out, align_rtx;
10504 rtx align_2_label = NULL_RTX;
10505 rtx align_3_label = NULL_RTX;
10506 rtx align_4_label = gen_label_rtx ();
10507 rtx end_0_label = gen_label_rtx ();
10509 rtx tmpreg = gen_reg_rtx (SImode);
10510 rtx scratch = gen_reg_rtx (SImode);
10513 if (GET_CODE (align_rtx) == CONST_INT)
10514 align = INTVAL (align_rtx);
10516 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10518 /* Is there a known alignment and is it less than 4? */
10521 rtx scratch1 = gen_reg_rtx (Pmode);
10522 emit_move_insn (scratch1, out);
10523 /* Is there a known alignment and is it not 2? */
10526 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10527 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10529 /* Leave just the 3 lower bits. */
10530 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10531 NULL_RTX, 0, OPTAB_WIDEN);
10533 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10534 Pmode, 1, align_4_label);
10535 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10536 Pmode, 1, align_2_label);
10537 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10538 Pmode, 1, align_3_label);
10542 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10543 check if is aligned to 4 - byte. */
10545 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10546 NULL_RTX, 0, OPTAB_WIDEN);
10548 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10549 Pmode, 1, align_4_label);
10552 mem = gen_rtx_MEM (QImode, out);
10554 /* Now compare the bytes. */
10556 /* Compare the first n unaligned byte on a byte per byte basis. */
10557 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10558 QImode, 1, end_0_label);
10560 /* Increment the address. */
10562 emit_insn (gen_adddi3 (out, out, const1_rtx));
10564 emit_insn (gen_addsi3 (out, out, const1_rtx));
10566 /* Not needed with an alignment of 2 */
10569 emit_label (align_2_label);
10571 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10575 emit_insn (gen_adddi3 (out, out, const1_rtx));
10577 emit_insn (gen_addsi3 (out, out, const1_rtx));
10579 emit_label (align_3_label);
10582 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10586 emit_insn (gen_adddi3 (out, out, const1_rtx));
10588 emit_insn (gen_addsi3 (out, out, const1_rtx));
10591 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10592 align this loop. It gives only huge programs, but does not help to
10594 emit_label (align_4_label);
10596 mem = gen_rtx_MEM (SImode, out);
10597 emit_move_insn (scratch, mem);
10599 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10601 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10603 /* This formula yields a nonzero result iff one of the bytes is zero.
10604 This saves three branches inside loop and many cycles. */
10606 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10607 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10608 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10609 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10610 gen_int_mode (0x80808080, SImode)));
10611 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10616 rtx reg = gen_reg_rtx (SImode);
10617 rtx reg2 = gen_reg_rtx (Pmode);
10618 emit_move_insn (reg, tmpreg);
10619 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10621 /* If zero is not in the first two bytes, move two bytes forward. */
10622 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10623 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10624 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10625 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10626 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10629 /* Emit lea manually to avoid clobbering of flags. */
10630 emit_insn (gen_rtx_SET (SImode, reg2,
10631 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10633 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10634 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10635 emit_insn (gen_rtx_SET (VOIDmode, out,
10636 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10643 rtx end_2_label = gen_label_rtx ();
10644 /* Is zero in the first two bytes? */
10646 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10647 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10648 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10649 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10650 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10652 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10653 JUMP_LABEL (tmp) = end_2_label;
10655 /* Not in the first two. Move two bytes forward. */
10656 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10658 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10660 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10662 emit_label (end_2_label);
10666 /* Avoid branch in fixing the byte. */
10667 tmpreg = gen_lowpart (QImode, tmpreg);
10668 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10670 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10672 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10674 emit_label (end_0_label);
10678 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10679 rtx retval, fnaddr, callarg1, callarg2, pop;
10681 rtx use = NULL, call;
10683 if (pop == const0_rtx)
10685 if (TARGET_64BIT && pop)
10689 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10690 fnaddr = machopic_indirect_call_target (fnaddr);
10692 /* Static functions and indirect calls don't need the pic register. */
10693 if (! TARGET_64BIT && flag_pic
10694 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10695 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10696 use_reg (&use, pic_offset_table_rtx);
10698 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10700 rtx al = gen_rtx_REG (QImode, 0);
10701 emit_move_insn (al, callarg2);
10702 use_reg (&use, al);
10704 #endif /* TARGET_MACHO */
10706 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10708 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10709 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10712 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10714 call = gen_rtx_SET (VOIDmode, retval, call);
10717 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10718 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10719 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10722 call = emit_call_insn (call);
10724 CALL_INSN_FUNCTION_USAGE (call) = use;
10728 /* Clear stack slot assignments remembered from previous functions.
10729 This is called from INIT_EXPANDERS once before RTL is emitted for each
10732 static struct machine_function *
10733 ix86_init_machine_status ()
10735 return ggc_alloc_cleared (sizeof (struct machine_function));
10738 /* Return a MEM corresponding to a stack slot with mode MODE.
10739 Allocate a new slot if necessary.
10741 The RTL for a function can have several slots available: N is
10742 which slot to use. */
10745 assign_386_stack_local (mode, n)
10746 enum machine_mode mode;
10749 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10752 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10753 ix86_stack_locals[(int) mode][n]
10754 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10756 return ix86_stack_locals[(int) mode][n];
10759 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10761 static GTY(()) rtx ix86_tls_symbol;
10763 ix86_tls_get_addr ()
10766 if (!ix86_tls_symbol)
10768 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10769 ? "___tls_get_addr"
10770 : "__tls_get_addr"));
10773 return ix86_tls_symbol;
10776 /* Calculate the length of the memory address in the instruction
10777 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10780 memory_address_length (addr)
10783 struct ix86_address parts;
10784 rtx base, index, disp;
10787 if (GET_CODE (addr) == PRE_DEC
10788 || GET_CODE (addr) == POST_INC
10789 || GET_CODE (addr) == PRE_MODIFY
10790 || GET_CODE (addr) == POST_MODIFY)
10793 if (! ix86_decompose_address (addr, &parts))
10797 index = parts.index;
10801 /* Register Indirect. */
10802 if (base && !index && !disp)
10804 /* Special cases: ebp and esp need the two-byte modrm form. */
10805 if (addr == stack_pointer_rtx
10806 || addr == arg_pointer_rtx
10807 || addr == frame_pointer_rtx
10808 || addr == hard_frame_pointer_rtx)
10812 /* Direct Addressing. */
10813 else if (disp && !base && !index)
10818 /* Find the length of the displacement constant. */
10821 if (GET_CODE (disp) == CONST_INT
10822 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10828 /* An index requires the two-byte modrm form. */
10836 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10837 is set, expect that insn have 8bit immediate alternative. */
10839 ix86_attr_length_immediate_default (insn, shortform)
10845 extract_insn_cached (insn);
10846 for (i = recog_data.n_operands - 1; i >= 0; --i)
10847 if (CONSTANT_P (recog_data.operand[i]))
10852 && GET_CODE (recog_data.operand[i]) == CONST_INT
10853 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10857 switch (get_attr_mode (insn))
10868 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10873 fatal_insn ("unknown insn mode", insn);
10879 /* Compute default value for "length_address" attribute. */
10881 ix86_attr_length_address_default (insn)
10885 extract_insn_cached (insn);
10886 for (i = recog_data.n_operands - 1; i >= 0; --i)
10887 if (GET_CODE (recog_data.operand[i]) == MEM)
10889 return memory_address_length (XEXP (recog_data.operand[i], 0));
10895 /* Return the maximum number of instructions a cpu can issue. */
10902 case PROCESSOR_PENTIUM:
10906 case PROCESSOR_PENTIUMPRO:
10907 case PROCESSOR_PENTIUM4:
10908 case PROCESSOR_ATHLON:
10916 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10917 by DEP_INSN and nothing set by DEP_INSN. */
10920 ix86_flags_dependant (insn, dep_insn, insn_type)
10921 rtx insn, dep_insn;
10922 enum attr_type insn_type;
10926 /* Simplify the test for uninteresting insns. */
10927 if (insn_type != TYPE_SETCC
10928 && insn_type != TYPE_ICMOV
10929 && insn_type != TYPE_FCMOV
10930 && insn_type != TYPE_IBR)
10933 if ((set = single_set (dep_insn)) != 0)
10935 set = SET_DEST (set);
10938 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10939 && XVECLEN (PATTERN (dep_insn), 0) == 2
10940 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10941 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10943 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10944 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10949 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10952 /* This test is true if the dependent insn reads the flags but
10953 not any other potentially set register. */
10954 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10957 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10963 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10964 address with operands set by DEP_INSN. */
10967 ix86_agi_dependant (insn, dep_insn, insn_type)
10968 rtx insn, dep_insn;
10969 enum attr_type insn_type;
10973 if (insn_type == TYPE_LEA
10976 addr = PATTERN (insn);
10977 if (GET_CODE (addr) == SET)
10979 else if (GET_CODE (addr) == PARALLEL
10980 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10981 addr = XVECEXP (addr, 0, 0);
10984 addr = SET_SRC (addr);
10989 extract_insn_cached (insn);
10990 for (i = recog_data.n_operands - 1; i >= 0; --i)
10991 if (GET_CODE (recog_data.operand[i]) == MEM)
10993 addr = XEXP (recog_data.operand[i], 0);
11000 return modified_in_p (addr, dep_insn);
11004 ix86_adjust_cost (insn, link, dep_insn, cost)
11005 rtx insn, link, dep_insn;
11008 enum attr_type insn_type, dep_insn_type;
11009 enum attr_memory memory, dep_memory;
11011 int dep_insn_code_number;
11013 /* Anti and output depenancies have zero cost on all CPUs. */
11014 if (REG_NOTE_KIND (link) != 0)
11017 dep_insn_code_number = recog_memoized (dep_insn);
11019 /* If we can't recognize the insns, we can't really do anything. */
11020 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11023 insn_type = get_attr_type (insn);
11024 dep_insn_type = get_attr_type (dep_insn);
11028 case PROCESSOR_PENTIUM:
11029 /* Address Generation Interlock adds a cycle of latency. */
11030 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11033 /* ??? Compares pair with jump/setcc. */
11034 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11037 /* Floating point stores require value to be ready one cycle ealier. */
11038 if (insn_type == TYPE_FMOV
11039 && get_attr_memory (insn) == MEMORY_STORE
11040 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11044 case PROCESSOR_PENTIUMPRO:
11045 memory = get_attr_memory (insn);
11046 dep_memory = get_attr_memory (dep_insn);
11048 /* Since we can't represent delayed latencies of load+operation,
11049 increase the cost here for non-imov insns. */
11050 if (dep_insn_type != TYPE_IMOV
11051 && dep_insn_type != TYPE_FMOV
11052 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11055 /* INT->FP conversion is expensive. */
11056 if (get_attr_fp_int_src (dep_insn))
11059 /* There is one cycle extra latency between an FP op and a store. */
11060 if (insn_type == TYPE_FMOV
11061 && (set = single_set (dep_insn)) != NULL_RTX
11062 && (set2 = single_set (insn)) != NULL_RTX
11063 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11064 && GET_CODE (SET_DEST (set2)) == MEM)
11067 /* Show ability of reorder buffer to hide latency of load by executing
11068 in parallel with previous instruction in case
11069 previous instruction is not needed to compute the address. */
11070 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11071 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11073 /* Claim moves to take one cycle, as core can issue one load
11074 at time and the next load can start cycle later. */
11075 if (dep_insn_type == TYPE_IMOV
11076 || dep_insn_type == TYPE_FMOV)
11084 memory = get_attr_memory (insn);
11085 dep_memory = get_attr_memory (dep_insn);
11086 /* The esp dependency is resolved before the instruction is really
11088 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11089 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11092 /* Since we can't represent delayed latencies of load+operation,
11093 increase the cost here for non-imov insns. */
11094 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11095 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11097 /* INT->FP conversion is expensive. */
11098 if (get_attr_fp_int_src (dep_insn))
11101 /* Show ability of reorder buffer to hide latency of load by executing
11102 in parallel with previous instruction in case
11103 previous instruction is not needed to compute the address. */
11104 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11105 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11107 /* Claim moves to take one cycle, as core can issue one load
11108 at time and the next load can start cycle later. */
11109 if (dep_insn_type == TYPE_IMOV
11110 || dep_insn_type == TYPE_FMOV)
11119 case PROCESSOR_ATHLON:
11120 memory = get_attr_memory (insn);
11121 dep_memory = get_attr_memory (dep_insn);
11123 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11125 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11130 /* Show ability of reorder buffer to hide latency of load by executing
11131 in parallel with previous instruction in case
11132 previous instruction is not needed to compute the address. */
11133 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11134 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11136 /* Claim moves to take one cycle, as core can issue one load
11137 at time and the next load can start cycle later. */
11138 if (dep_insn_type == TYPE_IMOV
11139 || dep_insn_type == TYPE_FMOV)
11141 else if (cost >= 3)
11156 struct ppro_sched_data
11159 int issued_this_cycle;
11163 static enum attr_ppro_uops
11164 ix86_safe_ppro_uops (insn)
11167 if (recog_memoized (insn) >= 0)
11168 return get_attr_ppro_uops (insn);
11170 return PPRO_UOPS_MANY;
11174 ix86_dump_ppro_packet (dump)
11177 if (ix86_sched_data.ppro.decode[0])
11179 fprintf (dump, "PPRO packet: %d",
11180 INSN_UID (ix86_sched_data.ppro.decode[0]));
11181 if (ix86_sched_data.ppro.decode[1])
11182 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11183 if (ix86_sched_data.ppro.decode[2])
11184 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11185 fputc ('\n', dump);
11189 /* We're beginning a new block. Initialize data structures as necessary. */
11192 ix86_sched_init (dump, sched_verbose, veclen)
11193 FILE *dump ATTRIBUTE_UNUSED;
11194 int sched_verbose ATTRIBUTE_UNUSED;
11195 int veclen ATTRIBUTE_UNUSED;
11197 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11200 /* Shift INSN to SLOT, and shift everything else down. */
11203 ix86_reorder_insn (insnp, slot)
11210 insnp[0] = insnp[1];
11211 while (++insnp != slot);
11217 ix86_sched_reorder_ppro (ready, e_ready)
11222 enum attr_ppro_uops cur_uops;
11223 int issued_this_cycle;
11227 /* At this point .ppro.decode contains the state of the three
11228 decoders from last "cycle". That is, those insns that were
11229 actually independent. But here we're scheduling for the
11230 decoder, and we may find things that are decodable in the
11233 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11234 issued_this_cycle = 0;
11237 cur_uops = ix86_safe_ppro_uops (*insnp);
11239 /* If the decoders are empty, and we've a complex insn at the
11240 head of the priority queue, let it issue without complaint. */
11241 if (decode[0] == NULL)
11243 if (cur_uops == PPRO_UOPS_MANY)
11245 decode[0] = *insnp;
11249 /* Otherwise, search for a 2-4 uop unsn to issue. */
11250 while (cur_uops != PPRO_UOPS_FEW)
11252 if (insnp == ready)
11254 cur_uops = ix86_safe_ppro_uops (*--insnp);
11257 /* If so, move it to the head of the line. */
11258 if (cur_uops == PPRO_UOPS_FEW)
11259 ix86_reorder_insn (insnp, e_ready);
11261 /* Issue the head of the queue. */
11262 issued_this_cycle = 1;
11263 decode[0] = *e_ready--;
11266 /* Look for simple insns to fill in the other two slots. */
11267 for (i = 1; i < 3; ++i)
11268 if (decode[i] == NULL)
11270 if (ready > e_ready)
11274 cur_uops = ix86_safe_ppro_uops (*insnp);
11275 while (cur_uops != PPRO_UOPS_ONE)
11277 if (insnp == ready)
11279 cur_uops = ix86_safe_ppro_uops (*--insnp);
11282 /* Found one. Move it to the head of the queue and issue it. */
11283 if (cur_uops == PPRO_UOPS_ONE)
11285 ix86_reorder_insn (insnp, e_ready);
11286 decode[i] = *e_ready--;
11287 issued_this_cycle++;
11291 /* ??? Didn't find one. Ideally, here we would do a lazy split
11292 of 2-uop insns, issue one and queue the other. */
11296 if (issued_this_cycle == 0)
11297 issued_this_cycle = 1;
11298 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11301 /* We are about to being issuing insns for this clock cycle.
11302 Override the default sort algorithm to better slot instructions. */
11304 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11305 FILE *dump ATTRIBUTE_UNUSED;
11306 int sched_verbose ATTRIBUTE_UNUSED;
11309 int clock_var ATTRIBUTE_UNUSED;
11311 int n_ready = *n_readyp;
11312 rtx *e_ready = ready + n_ready - 1;
11314 /* Make sure to go ahead and initialize key items in
11315 ix86_sched_data if we are not going to bother trying to
11316 reorder the ready queue. */
11319 ix86_sched_data.ppro.issued_this_cycle = 1;
11328 case PROCESSOR_PENTIUMPRO:
11329 ix86_sched_reorder_ppro (ready, e_ready);
11334 return ix86_issue_rate ();
11337 /* We are about to issue INSN. Return the number of insns left on the
11338 ready queue that can be issued this cycle. */
11341 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11345 int can_issue_more;
11351 return can_issue_more - 1;
11353 case PROCESSOR_PENTIUMPRO:
11355 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11357 if (uops == PPRO_UOPS_MANY)
11360 ix86_dump_ppro_packet (dump);
11361 ix86_sched_data.ppro.decode[0] = insn;
11362 ix86_sched_data.ppro.decode[1] = NULL;
11363 ix86_sched_data.ppro.decode[2] = NULL;
11365 ix86_dump_ppro_packet (dump);
11366 ix86_sched_data.ppro.decode[0] = NULL;
11368 else if (uops == PPRO_UOPS_FEW)
11371 ix86_dump_ppro_packet (dump);
11372 ix86_sched_data.ppro.decode[0] = insn;
11373 ix86_sched_data.ppro.decode[1] = NULL;
11374 ix86_sched_data.ppro.decode[2] = NULL;
11378 for (i = 0; i < 3; ++i)
11379 if (ix86_sched_data.ppro.decode[i] == NULL)
11381 ix86_sched_data.ppro.decode[i] = insn;
11389 ix86_dump_ppro_packet (dump);
11390 ix86_sched_data.ppro.decode[0] = NULL;
11391 ix86_sched_data.ppro.decode[1] = NULL;
11392 ix86_sched_data.ppro.decode[2] = NULL;
11396 return --ix86_sched_data.ppro.issued_this_cycle;
11401 ia32_use_dfa_pipeline_interface ()
11403 if (ix86_cpu == PROCESSOR_PENTIUM)
11408 /* How many alternative schedules to try. This should be as wide as the
11409 scheduling freedom in the DFA, but no wider. Making this value too
11410 large results extra work for the scheduler. */
11413 ia32_multipass_dfa_lookahead ()
11415 if (ix86_cpu == PROCESSOR_PENTIUM)
11422 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11423 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11427 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11429 rtx dstref, srcref, dstreg, srcreg;
11433 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11435 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11439 /* Subroutine of above to actually do the updating by recursively walking
11443 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11445 rtx dstref, srcref, dstreg, srcreg;
11447 enum rtx_code code = GET_CODE (x);
11448 const char *format_ptr = GET_RTX_FORMAT (code);
11451 if (code == MEM && XEXP (x, 0) == dstreg)
11452 MEM_COPY_ATTRIBUTES (x, dstref);
11453 else if (code == MEM && XEXP (x, 0) == srcreg)
11454 MEM_COPY_ATTRIBUTES (x, srcref);
11456 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11458 if (*format_ptr == 'e')
11459 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11461 else if (*format_ptr == 'E')
11462 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11463 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11468 /* Compute the alignment given to a constant that is being placed in memory.
11469 EXP is the constant and ALIGN is the alignment that the object would
11471 The value of this function is used instead of that alignment to align
11475 ix86_constant_alignment (exp, align)
11479 if (TREE_CODE (exp) == REAL_CST)
11481 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11483 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11486 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11493 /* Compute the alignment for a static variable.
11494 TYPE is the data type, and ALIGN is the alignment that
11495 the object would ordinarily have. The value of this function is used
11496 instead of that alignment to align the object. */
11499 ix86_data_alignment (type, align)
11503 if (AGGREGATE_TYPE_P (type)
11504 && TYPE_SIZE (type)
11505 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11506 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11507 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11510 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11511 to 16byte boundary. */
11514 if (AGGREGATE_TYPE_P (type)
11515 && TYPE_SIZE (type)
11516 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11517 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11518 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11522 if (TREE_CODE (type) == ARRAY_TYPE)
11524 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11526 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11529 else if (TREE_CODE (type) == COMPLEX_TYPE)
11532 if (TYPE_MODE (type) == DCmode && align < 64)
11534 if (TYPE_MODE (type) == XCmode && align < 128)
11537 else if ((TREE_CODE (type) == RECORD_TYPE
11538 || TREE_CODE (type) == UNION_TYPE
11539 || TREE_CODE (type) == QUAL_UNION_TYPE)
11540 && TYPE_FIELDS (type))
11542 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11544 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11547 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11548 || TREE_CODE (type) == INTEGER_TYPE)
11550 if (TYPE_MODE (type) == DFmode && align < 64)
11552 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11559 /* Compute the alignment for a local variable.
11560 TYPE is the data type, and ALIGN is the alignment that
11561 the object would ordinarily have. The value of this macro is used
11562 instead of that alignment to align the object. */
11565 ix86_local_alignment (type, align)
11569 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11570 to 16byte boundary. */
11573 if (AGGREGATE_TYPE_P (type)
11574 && TYPE_SIZE (type)
11575 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11576 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11577 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11580 if (TREE_CODE (type) == ARRAY_TYPE)
11582 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11584 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11587 else if (TREE_CODE (type) == COMPLEX_TYPE)
11589 if (TYPE_MODE (type) == DCmode && align < 64)
11591 if (TYPE_MODE (type) == XCmode && align < 128)
11594 else if ((TREE_CODE (type) == RECORD_TYPE
11595 || TREE_CODE (type) == UNION_TYPE
11596 || TREE_CODE (type) == QUAL_UNION_TYPE)
11597 && TYPE_FIELDS (type))
11599 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11601 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11604 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11605 || TREE_CODE (type) == INTEGER_TYPE)
11608 if (TYPE_MODE (type) == DFmode && align < 64)
11610 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11616 /* Emit RTL insns to initialize the variable parts of a trampoline.
11617 FNADDR is an RTX for the address of the function's pure code.
11618 CXT is an RTX for the static chain value for the function. */
11620 x86_initialize_trampoline (tramp, fnaddr, cxt)
11621 rtx tramp, fnaddr, cxt;
11625 /* Compute offset from the end of the jmp to the target function. */
11626 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11627 plus_constant (tramp, 10),
11628 NULL_RTX, 1, OPTAB_DIRECT);
11629 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11630 gen_int_mode (0xb9, QImode));
11631 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11632 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11633 gen_int_mode (0xe9, QImode));
11634 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11639 /* Try to load address using shorter movl instead of movabs.
11640 We may want to support movq for kernel mode, but kernel does not use
11641 trampolines at the moment. */
11642 if (x86_64_zero_extended_value (fnaddr))
11644 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11645 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11646 gen_int_mode (0xbb41, HImode));
11647 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11648 gen_lowpart (SImode, fnaddr));
11653 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11654 gen_int_mode (0xbb49, HImode));
11655 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11659 /* Load static chain using movabs to r10. */
11660 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11661 gen_int_mode (0xba49, HImode));
11662 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11665 /* Jump to the r11 */
11666 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11667 gen_int_mode (0xff49, HImode));
11668 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11669 gen_int_mode (0xe3, QImode));
11671 if (offset > TRAMPOLINE_SIZE)
11676 #define def_builtin(MASK, NAME, TYPE, CODE) \
11678 if ((MASK) & target_flags) \
11679 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11680 NULL, NULL_TREE); \
11683 struct builtin_description
11685 const unsigned int mask;
11686 const enum insn_code icode;
11687 const char *const name;
11688 const enum ix86_builtins code;
11689 const enum rtx_code comparison;
11690 const unsigned int flag;
11693 /* Used for builtins that are enabled both by -msse and -msse2. */
11694 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11696 static const struct builtin_description bdesc_comi[] =
11698 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11699 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11700 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11701 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11702 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11703 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11704 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11705 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11706 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11707 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11708 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11709 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11710 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11711 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11712 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11713 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11714 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11715 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11716 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11717 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11718 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11719 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11720 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11721 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11724 static const struct builtin_description bdesc_2arg[] =
11727 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11728 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11729 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11730 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11731 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11732 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11733 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11734 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11736 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11737 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11738 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11739 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11740 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11741 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11742 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11743 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11744 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11745 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11746 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11747 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11748 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11749 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11750 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11751 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11752 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11753 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11754 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11755 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11756 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11757 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11758 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11759 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11761 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11762 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11763 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11764 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11766 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11767 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11768 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11769 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11770 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11773 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11774 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11775 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11776 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11777 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11778 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11780 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11781 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11782 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11783 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11784 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11785 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11786 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11787 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11789 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11790 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11791 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11793 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11794 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11795 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11796 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11798 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11799 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11801 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11802 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11803 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11804 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11805 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11806 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11808 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11809 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11810 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11811 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11813 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11814 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11815 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11816 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11817 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11818 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11821 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11822 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11823 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11825 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11826 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11828 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11829 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11830 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11831 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11832 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11833 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11835 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11836 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11837 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11838 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11839 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11840 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11842 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11843 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11844 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11845 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11847 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11848 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11851 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11852 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11853 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11854 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11855 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11856 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11857 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11858 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11860 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11861 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11862 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11863 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11864 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11865 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11866 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11867 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11868 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11869 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11870 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11871 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11872 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11873 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11874 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11875 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11876 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11877 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11878 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11879 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11880 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11881 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11882 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11883 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11885 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11886 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11887 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11888 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11890 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11891 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11892 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11893 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11897 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11905 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11909 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11910 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11911 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11912 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11913 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11914 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11915 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11916 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11918 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11919 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11920 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11921 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11923 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11924 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11925 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11926 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11928 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11929 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11931 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11932 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11933 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11934 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11935 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11938 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11939 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11940 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11941 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11945 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11946 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11947 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11948 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11950 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11951 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11952 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11954 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11955 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11957 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11958 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11959 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11960 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11961 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11962 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11964 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11965 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11966 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11967 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11968 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11969 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11971 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11972 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11973 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11974 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11976 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11978 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11979 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11980 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11983 static const struct builtin_description bdesc_1arg[] =
11985 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11986 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11988 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11989 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11990 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11992 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11993 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11994 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11995 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11997 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11998 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11999 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12001 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12003 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12004 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12006 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12007 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12008 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12009 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12010 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12012 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12014 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12015 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12017 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12018 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12019 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
12023 ix86_init_builtins ()
12026 ix86_init_mmx_sse_builtins ();
12029 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12030 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12033 ix86_init_mmx_sse_builtins ()
12035 const struct builtin_description * d;
12038 tree pchar_type_node = build_pointer_type (char_type_node);
12039 tree pfloat_type_node = build_pointer_type (float_type_node);
12040 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12041 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12042 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12045 tree int_ftype_v4sf_v4sf
12046 = build_function_type_list (integer_type_node,
12047 V4SF_type_node, V4SF_type_node, NULL_TREE);
12048 tree v4si_ftype_v4sf_v4sf
12049 = build_function_type_list (V4SI_type_node,
12050 V4SF_type_node, V4SF_type_node, NULL_TREE);
12051 /* MMX/SSE/integer conversions. */
12052 tree int_ftype_v4sf
12053 = build_function_type_list (integer_type_node,
12054 V4SF_type_node, NULL_TREE);
12055 tree int_ftype_v8qi
12056 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12057 tree v4sf_ftype_v4sf_int
12058 = build_function_type_list (V4SF_type_node,
12059 V4SF_type_node, integer_type_node, NULL_TREE);
12060 tree v4sf_ftype_v4sf_v2si
12061 = build_function_type_list (V4SF_type_node,
12062 V4SF_type_node, V2SI_type_node, NULL_TREE);
12063 tree int_ftype_v4hi_int
12064 = build_function_type_list (integer_type_node,
12065 V4HI_type_node, integer_type_node, NULL_TREE);
12066 tree v4hi_ftype_v4hi_int_int
12067 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12068 integer_type_node, integer_type_node,
12070 /* Miscellaneous. */
12071 tree v8qi_ftype_v4hi_v4hi
12072 = build_function_type_list (V8QI_type_node,
12073 V4HI_type_node, V4HI_type_node, NULL_TREE);
12074 tree v4hi_ftype_v2si_v2si
12075 = build_function_type_list (V4HI_type_node,
12076 V2SI_type_node, V2SI_type_node, NULL_TREE);
12077 tree v4sf_ftype_v4sf_v4sf_int
12078 = build_function_type_list (V4SF_type_node,
12079 V4SF_type_node, V4SF_type_node,
12080 integer_type_node, NULL_TREE);
12081 tree v2si_ftype_v4hi_v4hi
12082 = build_function_type_list (V2SI_type_node,
12083 V4HI_type_node, V4HI_type_node, NULL_TREE);
12084 tree v4hi_ftype_v4hi_int
12085 = build_function_type_list (V4HI_type_node,
12086 V4HI_type_node, integer_type_node, NULL_TREE);
12087 tree v4hi_ftype_v4hi_di
12088 = build_function_type_list (V4HI_type_node,
12089 V4HI_type_node, long_long_unsigned_type_node,
12091 tree v2si_ftype_v2si_di
12092 = build_function_type_list (V2SI_type_node,
12093 V2SI_type_node, long_long_unsigned_type_node,
12095 tree void_ftype_void
12096 = build_function_type (void_type_node, void_list_node);
12097 tree void_ftype_unsigned
12098 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12099 tree unsigned_ftype_void
12100 = build_function_type (unsigned_type_node, void_list_node);
12102 = build_function_type (long_long_unsigned_type_node, void_list_node);
12103 tree v4sf_ftype_void
12104 = build_function_type (V4SF_type_node, void_list_node);
12105 tree v2si_ftype_v4sf
12106 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12107 /* Loads/stores. */
12108 tree void_ftype_v8qi_v8qi_pchar
12109 = build_function_type_list (void_type_node,
12110 V8QI_type_node, V8QI_type_node,
12111 pchar_type_node, NULL_TREE);
12112 tree v4sf_ftype_pfloat
12113 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12114 /* @@@ the type is bogus */
12115 tree v4sf_ftype_v4sf_pv2si
12116 = build_function_type_list (V4SF_type_node,
12117 V4SF_type_node, pv2di_type_node, NULL_TREE);
12118 tree void_ftype_pv2si_v4sf
12119 = build_function_type_list (void_type_node,
12120 pv2di_type_node, V4SF_type_node, NULL_TREE);
12121 tree void_ftype_pfloat_v4sf
12122 = build_function_type_list (void_type_node,
12123 pfloat_type_node, V4SF_type_node, NULL_TREE);
12124 tree void_ftype_pdi_di
12125 = build_function_type_list (void_type_node,
12126 pdi_type_node, long_long_unsigned_type_node,
12128 tree void_ftype_pv2di_v2di
12129 = build_function_type_list (void_type_node,
12130 pv2di_type_node, V2DI_type_node, NULL_TREE);
12131 /* Normal vector unops. */
12132 tree v4sf_ftype_v4sf
12133 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12135 /* Normal vector binops. */
12136 tree v4sf_ftype_v4sf_v4sf
12137 = build_function_type_list (V4SF_type_node,
12138 V4SF_type_node, V4SF_type_node, NULL_TREE);
12139 tree v8qi_ftype_v8qi_v8qi
12140 = build_function_type_list (V8QI_type_node,
12141 V8QI_type_node, V8QI_type_node, NULL_TREE);
12142 tree v4hi_ftype_v4hi_v4hi
12143 = build_function_type_list (V4HI_type_node,
12144 V4HI_type_node, V4HI_type_node, NULL_TREE);
12145 tree v2si_ftype_v2si_v2si
12146 = build_function_type_list (V2SI_type_node,
12147 V2SI_type_node, V2SI_type_node, NULL_TREE);
12148 tree di_ftype_di_di
12149 = build_function_type_list (long_long_unsigned_type_node,
12150 long_long_unsigned_type_node,
12151 long_long_unsigned_type_node, NULL_TREE);
12153 tree v2si_ftype_v2sf
12154 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12155 tree v2sf_ftype_v2si
12156 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12157 tree v2si_ftype_v2si
12158 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12159 tree v2sf_ftype_v2sf
12160 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12161 tree v2sf_ftype_v2sf_v2sf
12162 = build_function_type_list (V2SF_type_node,
12163 V2SF_type_node, V2SF_type_node, NULL_TREE);
12164 tree v2si_ftype_v2sf_v2sf
12165 = build_function_type_list (V2SI_type_node,
12166 V2SF_type_node, V2SF_type_node, NULL_TREE);
12167 tree pint_type_node = build_pointer_type (integer_type_node);
12168 tree pdouble_type_node = build_pointer_type (double_type_node);
12169 tree int_ftype_v2df_v2df
12170 = build_function_type_list (integer_type_node,
12171 V2DF_type_node, V2DF_type_node, NULL_TREE);
12174 = build_function_type (intTI_type_node, void_list_node);
12175 tree ti_ftype_ti_ti
12176 = build_function_type_list (intTI_type_node,
12177 intTI_type_node, intTI_type_node, NULL_TREE);
12178 tree void_ftype_pvoid
12179 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12181 = build_function_type_list (V2DI_type_node,
12182 long_long_unsigned_type_node, NULL_TREE);
12183 tree v4sf_ftype_v4si
12184 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12185 tree v4si_ftype_v4sf
12186 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12187 tree v2df_ftype_v4si
12188 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12189 tree v4si_ftype_v2df
12190 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12191 tree v2si_ftype_v2df
12192 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12193 tree v4sf_ftype_v2df
12194 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12195 tree v2df_ftype_v2si
12196 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12197 tree v2df_ftype_v4sf
12198 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12199 tree int_ftype_v2df
12200 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12201 tree v2df_ftype_v2df_int
12202 = build_function_type_list (V2DF_type_node,
12203 V2DF_type_node, integer_type_node, NULL_TREE);
12204 tree v4sf_ftype_v4sf_v2df
12205 = build_function_type_list (V4SF_type_node,
12206 V4SF_type_node, V2DF_type_node, NULL_TREE);
12207 tree v2df_ftype_v2df_v4sf
12208 = build_function_type_list (V2DF_type_node,
12209 V2DF_type_node, V4SF_type_node, NULL_TREE);
12210 tree v2df_ftype_v2df_v2df_int
12211 = build_function_type_list (V2DF_type_node,
12212 V2DF_type_node, V2DF_type_node,
12215 tree v2df_ftype_v2df_pv2si
12216 = build_function_type_list (V2DF_type_node,
12217 V2DF_type_node, pv2si_type_node, NULL_TREE);
12218 tree void_ftype_pv2si_v2df
12219 = build_function_type_list (void_type_node,
12220 pv2si_type_node, V2DF_type_node, NULL_TREE);
12221 tree void_ftype_pdouble_v2df
12222 = build_function_type_list (void_type_node,
12223 pdouble_type_node, V2DF_type_node, NULL_TREE);
12224 tree void_ftype_pint_int
12225 = build_function_type_list (void_type_node,
12226 pint_type_node, integer_type_node, NULL_TREE);
12227 tree void_ftype_v16qi_v16qi_pchar
12228 = build_function_type_list (void_type_node,
12229 V16QI_type_node, V16QI_type_node,
12230 pchar_type_node, NULL_TREE);
12231 tree v2df_ftype_pdouble
12232 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12233 tree v2df_ftype_v2df_v2df
12234 = build_function_type_list (V2DF_type_node,
12235 V2DF_type_node, V2DF_type_node, NULL_TREE);
12236 tree v16qi_ftype_v16qi_v16qi
12237 = build_function_type_list (V16QI_type_node,
12238 V16QI_type_node, V16QI_type_node, NULL_TREE);
12239 tree v8hi_ftype_v8hi_v8hi
12240 = build_function_type_list (V8HI_type_node,
12241 V8HI_type_node, V8HI_type_node, NULL_TREE);
12242 tree v4si_ftype_v4si_v4si
12243 = build_function_type_list (V4SI_type_node,
12244 V4SI_type_node, V4SI_type_node, NULL_TREE);
12245 tree v2di_ftype_v2di_v2di
12246 = build_function_type_list (V2DI_type_node,
12247 V2DI_type_node, V2DI_type_node, NULL_TREE);
12248 tree v2di_ftype_v2df_v2df
12249 = build_function_type_list (V2DI_type_node,
12250 V2DF_type_node, V2DF_type_node, NULL_TREE);
12251 tree v2df_ftype_v2df
12252 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12253 tree v2df_ftype_double
12254 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12255 tree v2df_ftype_double_double
12256 = build_function_type_list (V2DF_type_node,
12257 double_type_node, double_type_node, NULL_TREE);
12258 tree int_ftype_v8hi_int
12259 = build_function_type_list (integer_type_node,
12260 V8HI_type_node, integer_type_node, NULL_TREE);
12261 tree v8hi_ftype_v8hi_int_int
12262 = build_function_type_list (V8HI_type_node,
12263 V8HI_type_node, integer_type_node,
12264 integer_type_node, NULL_TREE);
12265 tree v2di_ftype_v2di_int
12266 = build_function_type_list (V2DI_type_node,
12267 V2DI_type_node, integer_type_node, NULL_TREE);
12268 tree v4si_ftype_v4si_int
12269 = build_function_type_list (V4SI_type_node,
12270 V4SI_type_node, integer_type_node, NULL_TREE);
12271 tree v8hi_ftype_v8hi_int
12272 = build_function_type_list (V8HI_type_node,
12273 V8HI_type_node, integer_type_node, NULL_TREE);
12274 tree v8hi_ftype_v8hi_v2di
12275 = build_function_type_list (V8HI_type_node,
12276 V8HI_type_node, V2DI_type_node, NULL_TREE);
12277 tree v4si_ftype_v4si_v2di
12278 = build_function_type_list (V4SI_type_node,
12279 V4SI_type_node, V2DI_type_node, NULL_TREE);
12280 tree v4si_ftype_v8hi_v8hi
12281 = build_function_type_list (V4SI_type_node,
12282 V8HI_type_node, V8HI_type_node, NULL_TREE);
12283 tree di_ftype_v8qi_v8qi
12284 = build_function_type_list (long_long_unsigned_type_node,
12285 V8QI_type_node, V8QI_type_node, NULL_TREE);
12286 tree v2di_ftype_v16qi_v16qi
12287 = build_function_type_list (V2DI_type_node,
12288 V16QI_type_node, V16QI_type_node, NULL_TREE);
12289 tree int_ftype_v16qi
12290 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12292 /* Add all builtins that are more or less simple operations on two
12294 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12296 /* Use one of the operands; the target can have a different mode for
12297 mask-generating compares. */
12298 enum machine_mode mode;
12303 mode = insn_data[d->icode].operand[1].mode;
12308 type = v16qi_ftype_v16qi_v16qi;
12311 type = v8hi_ftype_v8hi_v8hi;
12314 type = v4si_ftype_v4si_v4si;
12317 type = v2di_ftype_v2di_v2di;
12320 type = v2df_ftype_v2df_v2df;
12323 type = ti_ftype_ti_ti;
12326 type = v4sf_ftype_v4sf_v4sf;
12329 type = v8qi_ftype_v8qi_v8qi;
12332 type = v4hi_ftype_v4hi_v4hi;
12335 type = v2si_ftype_v2si_v2si;
12338 type = di_ftype_di_di;
12345 /* Override for comparisons. */
12346 if (d->icode == CODE_FOR_maskcmpv4sf3
12347 || d->icode == CODE_FOR_maskncmpv4sf3
12348 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12349 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12350 type = v4si_ftype_v4sf_v4sf;
12352 if (d->icode == CODE_FOR_maskcmpv2df3
12353 || d->icode == CODE_FOR_maskncmpv2df3
12354 || d->icode == CODE_FOR_vmmaskcmpv2df3
12355 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12356 type = v2di_ftype_v2df_v2df;
12358 def_builtin (d->mask, d->name, type, d->code);
12361 /* Add the remaining MMX insns with somewhat more complicated types. */
12362 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12363 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12364 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12365 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12366 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12367 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12368 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12370 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12371 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12372 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12374 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12375 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12377 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12378 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12380 /* comi/ucomi insns. */
12381 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12382 if (d->mask == MASK_SSE2)
12383 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12385 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12387 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12388 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12389 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12391 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12392 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12393 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12394 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12395 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12396 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12398 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12399 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12400 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12401 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12403 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12404 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12406 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12408 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12409 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12410 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12411 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12412 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12413 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12415 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12416 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12417 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12418 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12420 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12421 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12422 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12423 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12425 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12427 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12429 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12430 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12431 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12432 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12433 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12434 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12436 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12438 /* Original 3DNow! */
12439 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12440 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12441 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12442 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12443 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12444 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12445 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12446 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12447 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12448 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12449 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12450 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12451 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12452 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12453 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12454 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12455 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12456 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12457 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12458 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12460 /* 3DNow! extension as used in the Athlon CPU. */
12461 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12462 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12463 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12464 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12465 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12466 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12468 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12471 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12472 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12474 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12475 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12477 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12478 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12479 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12480 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12481 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12482 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12484 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12485 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12486 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12487 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12489 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12490 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12491 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12492 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12493 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12495 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12496 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12497 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12498 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12500 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12501 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12503 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12505 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12506 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12508 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12509 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12510 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12511 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12512 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12514 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12516 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12517 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12519 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12520 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12521 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12523 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12524 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12525 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12527 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12528 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12529 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12530 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12531 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12532 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12533 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12535 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12536 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12537 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12539 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12540 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12541 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12544 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12545 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12547 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12548 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12550 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12551 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12552 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12554 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12555 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12556 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12558 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12559 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12561 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12564 /* Errors in the source file can cause expand_expr to return const0_rtx
12565 where we expect a vector. To avoid crashing, use one of the vector
12566 clear instructions. */
12568 safe_vector_operand (x, mode)
12570 enum machine_mode mode;
12572 if (x != const0_rtx)
12574 x = gen_reg_rtx (mode);
12576 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12577 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12578 : gen_rtx_SUBREG (DImode, x, 0)));
12580 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12581 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12585 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12588 ix86_expand_binop_builtin (icode, arglist, target)
12589 enum insn_code icode;
12594 tree arg0 = TREE_VALUE (arglist);
12595 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12596 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12597 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12598 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12599 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12600 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12602 if (VECTOR_MODE_P (mode0))
12603 op0 = safe_vector_operand (op0, mode0);
12604 if (VECTOR_MODE_P (mode1))
12605 op1 = safe_vector_operand (op1, mode1);
12608 || GET_MODE (target) != tmode
12609 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12610 target = gen_reg_rtx (tmode);
12612 /* In case the insn wants input operands in modes different from
12613 the result, abort. */
12614 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12617 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12618 op0 = copy_to_mode_reg (mode0, op0);
12619 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12620 op1 = copy_to_mode_reg (mode1, op1);
12622 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12623 yet one of the two must not be a memory. This is normally enforced
12624 by expanders, but we didn't bother to create one here. */
12625 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12626 op0 = copy_to_mode_reg (mode0, op0);
12628 pat = GEN_FCN (icode) (target, op0, op1);
12635 /* In type_for_mode we restrict the ability to create TImode types
12636 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12637 to have a V4SFmode signature. Convert them in-place to TImode. */
12640 ix86_expand_timode_binop_builtin (icode, arglist, target)
12641 enum insn_code icode;
12646 tree arg0 = TREE_VALUE (arglist);
12647 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12648 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12649 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12651 op0 = gen_lowpart (TImode, op0);
12652 op1 = gen_lowpart (TImode, op1);
12653 target = gen_reg_rtx (TImode);
12655 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12656 op0 = copy_to_mode_reg (TImode, op0);
12657 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12658 op1 = copy_to_mode_reg (TImode, op1);
12660 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12661 yet one of the two must not be a memory. This is normally enforced
12662 by expanders, but we didn't bother to create one here. */
12663 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12664 op0 = copy_to_mode_reg (TImode, op0);
12666 pat = GEN_FCN (icode) (target, op0, op1);
12671 return gen_lowpart (V4SFmode, target);
12674 /* Subroutine of ix86_expand_builtin to take care of stores. */
12677 ix86_expand_store_builtin (icode, arglist)
12678 enum insn_code icode;
12682 tree arg0 = TREE_VALUE (arglist);
12683 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12684 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12685 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12686 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12687 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12689 if (VECTOR_MODE_P (mode1))
12690 op1 = safe_vector_operand (op1, mode1);
12692 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12694 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12695 op1 = copy_to_mode_reg (mode1, op1);
12697 pat = GEN_FCN (icode) (op0, op1);
12703 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12706 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12707 enum insn_code icode;
12713 tree arg0 = TREE_VALUE (arglist);
12714 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12715 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12716 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12719 || GET_MODE (target) != tmode
12720 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12721 target = gen_reg_rtx (tmode);
12723 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12726 if (VECTOR_MODE_P (mode0))
12727 op0 = safe_vector_operand (op0, mode0);
12729 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12730 op0 = copy_to_mode_reg (mode0, op0);
12733 pat = GEN_FCN (icode) (target, op0);
12740 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12741 sqrtss, rsqrtss, rcpss. */
12744 ix86_expand_unop1_builtin (icode, arglist, target)
12745 enum insn_code icode;
12750 tree arg0 = TREE_VALUE (arglist);
12751 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12752 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12753 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12756 || GET_MODE (target) != tmode
12757 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12758 target = gen_reg_rtx (tmode);
12760 if (VECTOR_MODE_P (mode0))
12761 op0 = safe_vector_operand (op0, mode0);
12763 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12764 op0 = copy_to_mode_reg (mode0, op0);
12767 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12768 op1 = copy_to_mode_reg (mode0, op1);
12770 pat = GEN_FCN (icode) (target, op0, op1);
12777 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12780 ix86_expand_sse_compare (d, arglist, target)
12781 const struct builtin_description *d;
12786 tree arg0 = TREE_VALUE (arglist);
12787 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12788 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12789 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12791 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12792 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12793 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12794 enum rtx_code comparison = d->comparison;
12796 if (VECTOR_MODE_P (mode0))
12797 op0 = safe_vector_operand (op0, mode0);
12798 if (VECTOR_MODE_P (mode1))
12799 op1 = safe_vector_operand (op1, mode1);
12801 /* Swap operands if we have a comparison that isn't available in
12805 rtx tmp = gen_reg_rtx (mode1);
12806 emit_move_insn (tmp, op1);
12812 || GET_MODE (target) != tmode
12813 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12814 target = gen_reg_rtx (tmode);
12816 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12817 op0 = copy_to_mode_reg (mode0, op0);
12818 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12819 op1 = copy_to_mode_reg (mode1, op1);
12821 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12822 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12829 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12832 ix86_expand_sse_comi (d, arglist, target)
12833 const struct builtin_description *d;
12838 tree arg0 = TREE_VALUE (arglist);
12839 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12840 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12841 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12843 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12844 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12845 enum rtx_code comparison = d->comparison;
12847 if (VECTOR_MODE_P (mode0))
12848 op0 = safe_vector_operand (op0, mode0);
12849 if (VECTOR_MODE_P (mode1))
12850 op1 = safe_vector_operand (op1, mode1);
12852 /* Swap operands if we have a comparison that isn't available in
12861 target = gen_reg_rtx (SImode);
12862 emit_move_insn (target, const0_rtx);
12863 target = gen_rtx_SUBREG (QImode, target, 0);
12865 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12866 op0 = copy_to_mode_reg (mode0, op0);
12867 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12868 op1 = copy_to_mode_reg (mode1, op1);
12870 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12871 pat = GEN_FCN (d->icode) (op0, op1, op2);
12875 emit_insn (gen_rtx_SET (VOIDmode,
12876 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12877 gen_rtx_fmt_ee (comparison, QImode,
12878 gen_rtx_REG (CCmode, FLAGS_REG),
12881 return SUBREG_REG (target);
12884 /* Expand an expression EXP that calls a built-in function,
12885 with result going to TARGET if that's convenient
12886 (and in mode MODE if that's convenient).
12887 SUBTARGET may be used as the target for computing one of EXP's operands.
12888 IGNORE is nonzero if the value is to be ignored. */
12891 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12894 rtx subtarget ATTRIBUTE_UNUSED;
12895 enum machine_mode mode ATTRIBUTE_UNUSED;
12896 int ignore ATTRIBUTE_UNUSED;
12898 const struct builtin_description *d;
12900 enum insn_code icode;
12901 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12902 tree arglist = TREE_OPERAND (exp, 1);
12903 tree arg0, arg1, arg2;
12904 rtx op0, op1, op2, pat;
12905 enum machine_mode tmode, mode0, mode1, mode2;
12906 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12910 case IX86_BUILTIN_EMMS:
12911 emit_insn (gen_emms ());
12914 case IX86_BUILTIN_SFENCE:
12915 emit_insn (gen_sfence ());
12918 case IX86_BUILTIN_PEXTRW:
12919 case IX86_BUILTIN_PEXTRW128:
12920 icode = (fcode == IX86_BUILTIN_PEXTRW
12921 ? CODE_FOR_mmx_pextrw
12922 : CODE_FOR_sse2_pextrw);
12923 arg0 = TREE_VALUE (arglist);
12924 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12925 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12926 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12927 tmode = insn_data[icode].operand[0].mode;
12928 mode0 = insn_data[icode].operand[1].mode;
12929 mode1 = insn_data[icode].operand[2].mode;
12931 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12932 op0 = copy_to_mode_reg (mode0, op0);
12933 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12935 /* @@@ better error message */
12936 error ("selector must be an immediate");
12937 return gen_reg_rtx (tmode);
12940 || GET_MODE (target) != tmode
12941 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12942 target = gen_reg_rtx (tmode);
12943 pat = GEN_FCN (icode) (target, op0, op1);
12949 case IX86_BUILTIN_PINSRW:
12950 case IX86_BUILTIN_PINSRW128:
12951 icode = (fcode == IX86_BUILTIN_PINSRW
12952 ? CODE_FOR_mmx_pinsrw
12953 : CODE_FOR_sse2_pinsrw);
12954 arg0 = TREE_VALUE (arglist);
12955 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12956 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12957 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12958 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12959 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12960 tmode = insn_data[icode].operand[0].mode;
12961 mode0 = insn_data[icode].operand[1].mode;
12962 mode1 = insn_data[icode].operand[2].mode;
12963 mode2 = insn_data[icode].operand[3].mode;
12965 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12966 op0 = copy_to_mode_reg (mode0, op0);
12967 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12968 op1 = copy_to_mode_reg (mode1, op1);
12969 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12971 /* @@@ better error message */
12972 error ("selector must be an immediate");
12976 || GET_MODE (target) != tmode
12977 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12978 target = gen_reg_rtx (tmode);
12979 pat = GEN_FCN (icode) (target, op0, op1, op2);
12985 case IX86_BUILTIN_MASKMOVQ:
12986 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12987 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12988 : CODE_FOR_sse2_maskmovdqu);
12989 /* Note the arg order is different from the operand order. */
12990 arg1 = TREE_VALUE (arglist);
12991 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12992 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12993 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12994 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12995 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12996 mode0 = insn_data[icode].operand[0].mode;
12997 mode1 = insn_data[icode].operand[1].mode;
12998 mode2 = insn_data[icode].operand[2].mode;
13000 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13001 op0 = copy_to_mode_reg (mode0, op0);
13002 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13003 op1 = copy_to_mode_reg (mode1, op1);
13004 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13005 op2 = copy_to_mode_reg (mode2, op2);
13006 pat = GEN_FCN (icode) (op0, op1, op2);
13012 case IX86_BUILTIN_SQRTSS:
13013 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13014 case IX86_BUILTIN_RSQRTSS:
13015 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13016 case IX86_BUILTIN_RCPSS:
13017 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13019 case IX86_BUILTIN_ANDPS:
13020 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
13022 case IX86_BUILTIN_ANDNPS:
13023 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
13025 case IX86_BUILTIN_ORPS:
13026 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
13028 case IX86_BUILTIN_XORPS:
13029 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
13032 case IX86_BUILTIN_LOADAPS:
13033 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13035 case IX86_BUILTIN_LOADUPS:
13036 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13038 case IX86_BUILTIN_STOREAPS:
13039 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13040 case IX86_BUILTIN_STOREUPS:
13041 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13043 case IX86_BUILTIN_LOADSS:
13044 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13046 case IX86_BUILTIN_STORESS:
13047 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13049 case IX86_BUILTIN_LOADHPS:
13050 case IX86_BUILTIN_LOADLPS:
13051 case IX86_BUILTIN_LOADHPD:
13052 case IX86_BUILTIN_LOADLPD:
13053 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13054 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13055 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13056 : CODE_FOR_sse2_movlpd);
13057 arg0 = TREE_VALUE (arglist);
13058 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13059 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13060 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13061 tmode = insn_data[icode].operand[0].mode;
13062 mode0 = insn_data[icode].operand[1].mode;
13063 mode1 = insn_data[icode].operand[2].mode;
13065 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13066 op0 = copy_to_mode_reg (mode0, op0);
13067 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13069 || GET_MODE (target) != tmode
13070 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13071 target = gen_reg_rtx (tmode);
13072 pat = GEN_FCN (icode) (target, op0, op1);
13078 case IX86_BUILTIN_STOREHPS:
13079 case IX86_BUILTIN_STORELPS:
13080 case IX86_BUILTIN_STOREHPD:
13081 case IX86_BUILTIN_STORELPD:
13082 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13083 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13084 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13085 : CODE_FOR_sse2_movlpd);
13086 arg0 = TREE_VALUE (arglist);
13087 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13088 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13089 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13090 mode0 = insn_data[icode].operand[1].mode;
13091 mode1 = insn_data[icode].operand[2].mode;
13093 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13094 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13095 op1 = copy_to_mode_reg (mode1, op1);
13097 pat = GEN_FCN (icode) (op0, op0, op1);
13103 case IX86_BUILTIN_MOVNTPS:
13104 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13105 case IX86_BUILTIN_MOVNTQ:
13106 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13108 case IX86_BUILTIN_LDMXCSR:
13109 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13110 target = assign_386_stack_local (SImode, 0);
13111 emit_move_insn (target, op0);
13112 emit_insn (gen_ldmxcsr (target));
13115 case IX86_BUILTIN_STMXCSR:
13116 target = assign_386_stack_local (SImode, 0);
13117 emit_insn (gen_stmxcsr (target));
13118 return copy_to_mode_reg (SImode, target);
13120 case IX86_BUILTIN_SHUFPS:
13121 case IX86_BUILTIN_SHUFPD:
13122 icode = (fcode == IX86_BUILTIN_SHUFPS
13123 ? CODE_FOR_sse_shufps
13124 : CODE_FOR_sse2_shufpd);
13125 arg0 = TREE_VALUE (arglist);
13126 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13127 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13128 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13129 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13130 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13131 tmode = insn_data[icode].operand[0].mode;
13132 mode0 = insn_data[icode].operand[1].mode;
13133 mode1 = insn_data[icode].operand[2].mode;
13134 mode2 = insn_data[icode].operand[3].mode;
13136 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13137 op0 = copy_to_mode_reg (mode0, op0);
13138 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13139 op1 = copy_to_mode_reg (mode1, op1);
13140 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13142 /* @@@ better error message */
13143 error ("mask must be an immediate");
13144 return gen_reg_rtx (tmode);
13147 || GET_MODE (target) != tmode
13148 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13149 target = gen_reg_rtx (tmode);
13150 pat = GEN_FCN (icode) (target, op0, op1, op2);
13156 case IX86_BUILTIN_PSHUFW:
13157 case IX86_BUILTIN_PSHUFD:
13158 case IX86_BUILTIN_PSHUFHW:
13159 case IX86_BUILTIN_PSHUFLW:
13160 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13161 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13162 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13163 : CODE_FOR_mmx_pshufw);
13164 arg0 = TREE_VALUE (arglist);
13165 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13166 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13167 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13168 tmode = insn_data[icode].operand[0].mode;
13169 mode1 = insn_data[icode].operand[1].mode;
13170 mode2 = insn_data[icode].operand[2].mode;
13172 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13173 op0 = copy_to_mode_reg (mode1, op0);
13174 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13176 /* @@@ better error message */
13177 error ("mask must be an immediate");
13181 || GET_MODE (target) != tmode
13182 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13183 target = gen_reg_rtx (tmode);
13184 pat = GEN_FCN (icode) (target, op0, op1);
13190 case IX86_BUILTIN_FEMMS:
13191 emit_insn (gen_femms ());
13194 case IX86_BUILTIN_PAVGUSB:
13195 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13197 case IX86_BUILTIN_PF2ID:
13198 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13200 case IX86_BUILTIN_PFACC:
13201 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13203 case IX86_BUILTIN_PFADD:
13204 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13206 case IX86_BUILTIN_PFCMPEQ:
13207 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13209 case IX86_BUILTIN_PFCMPGE:
13210 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13212 case IX86_BUILTIN_PFCMPGT:
13213 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13215 case IX86_BUILTIN_PFMAX:
13216 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13218 case IX86_BUILTIN_PFMIN:
13219 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13221 case IX86_BUILTIN_PFMUL:
13222 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13224 case IX86_BUILTIN_PFRCP:
13225 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13227 case IX86_BUILTIN_PFRCPIT1:
13228 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13230 case IX86_BUILTIN_PFRCPIT2:
13231 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13233 case IX86_BUILTIN_PFRSQIT1:
13234 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13236 case IX86_BUILTIN_PFRSQRT:
13237 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13239 case IX86_BUILTIN_PFSUB:
13240 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13242 case IX86_BUILTIN_PFSUBR:
13243 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13245 case IX86_BUILTIN_PI2FD:
13246 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13248 case IX86_BUILTIN_PMULHRW:
13249 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13251 case IX86_BUILTIN_PF2IW:
13252 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13254 case IX86_BUILTIN_PFNACC:
13255 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13257 case IX86_BUILTIN_PFPNACC:
13258 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13260 case IX86_BUILTIN_PI2FW:
13261 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13263 case IX86_BUILTIN_PSWAPDSI:
13264 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13266 case IX86_BUILTIN_PSWAPDSF:
13267 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13269 case IX86_BUILTIN_SSE_ZERO:
13270 target = gen_reg_rtx (V4SFmode);
13271 emit_insn (gen_sse_clrv4sf (target));
13274 case IX86_BUILTIN_MMX_ZERO:
13275 target = gen_reg_rtx (DImode);
13276 emit_insn (gen_mmx_clrdi (target));
13279 case IX86_BUILTIN_SQRTSD:
13280 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13281 case IX86_BUILTIN_LOADAPD:
13282 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13283 case IX86_BUILTIN_LOADUPD:
13284 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13286 case IX86_BUILTIN_STOREAPD:
13287 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13288 case IX86_BUILTIN_STOREUPD:
13289 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13291 case IX86_BUILTIN_LOADSD:
13292 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13294 case IX86_BUILTIN_STORESD:
13295 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13297 case IX86_BUILTIN_SETPD1:
13298 target = assign_386_stack_local (DFmode, 0);
13299 arg0 = TREE_VALUE (arglist);
13300 emit_move_insn (adjust_address (target, DFmode, 0),
13301 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13302 op0 = gen_reg_rtx (V2DFmode);
13303 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13304 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13307 case IX86_BUILTIN_SETPD:
13308 target = assign_386_stack_local (V2DFmode, 0);
13309 arg0 = TREE_VALUE (arglist);
13310 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13311 emit_move_insn (adjust_address (target, DFmode, 0),
13312 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13313 emit_move_insn (adjust_address (target, DFmode, 8),
13314 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13315 op0 = gen_reg_rtx (V2DFmode);
13316 emit_insn (gen_sse2_movapd (op0, target));
13319 case IX86_BUILTIN_LOADRPD:
13320 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13321 gen_reg_rtx (V2DFmode), 1);
13322 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13325 case IX86_BUILTIN_LOADPD1:
13326 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13327 gen_reg_rtx (V2DFmode), 1);
13328 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13331 case IX86_BUILTIN_STOREPD1:
13332 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13333 case IX86_BUILTIN_STORERPD:
13334 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13336 case IX86_BUILTIN_MFENCE:
13337 emit_insn (gen_sse2_mfence ());
13339 case IX86_BUILTIN_LFENCE:
13340 emit_insn (gen_sse2_lfence ());
13343 case IX86_BUILTIN_CLFLUSH:
13344 arg0 = TREE_VALUE (arglist);
13345 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13346 icode = CODE_FOR_sse2_clflush;
13347 mode0 = insn_data[icode].operand[0].mode;
13348 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13349 op0 = copy_to_mode_reg (mode0, op0);
13351 emit_insn (gen_sse2_clflush (op0));
13354 case IX86_BUILTIN_MOVNTPD:
13355 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13356 case IX86_BUILTIN_MOVNTDQ:
13357 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13358 case IX86_BUILTIN_MOVNTI:
13359 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13365 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13366 if (d->code == fcode)
13368 /* Compares are treated specially. */
13369 if (d->icode == CODE_FOR_maskcmpv4sf3
13370 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13371 || d->icode == CODE_FOR_maskncmpv4sf3
13372 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13373 || d->icode == CODE_FOR_maskcmpv2df3
13374 || d->icode == CODE_FOR_vmmaskcmpv2df3
13375 || d->icode == CODE_FOR_maskncmpv2df3
13376 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13377 return ix86_expand_sse_compare (d, arglist, target);
13379 return ix86_expand_binop_builtin (d->icode, arglist, target);
13382 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13383 if (d->code == fcode)
13384 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13386 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13387 if (d->code == fcode)
13388 return ix86_expand_sse_comi (d, arglist, target);
13390 /* @@@ Should really do something sensible here. */
13394 /* Store OPERAND to the memory after reload is completed. This means
13395 that we can't easily use assign_stack_local. */
13397 ix86_force_to_memory (mode, operand)
13398 enum machine_mode mode;
13402 if (!reload_completed)
13404 if (TARGET_64BIT && TARGET_RED_ZONE)
13406 result = gen_rtx_MEM (mode,
13407 gen_rtx_PLUS (Pmode,
13409 GEN_INT (-RED_ZONE_SIZE)));
13410 emit_move_insn (result, operand);
13412 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13418 operand = gen_lowpart (DImode, operand);
13422 gen_rtx_SET (VOIDmode,
13423 gen_rtx_MEM (DImode,
13424 gen_rtx_PRE_DEC (DImode,
13425 stack_pointer_rtx)),
13431 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13440 split_di (&operand, 1, operands, operands + 1);
13442 gen_rtx_SET (VOIDmode,
13443 gen_rtx_MEM (SImode,
13444 gen_rtx_PRE_DEC (Pmode,
13445 stack_pointer_rtx)),
13448 gen_rtx_SET (VOIDmode,
13449 gen_rtx_MEM (SImode,
13450 gen_rtx_PRE_DEC (Pmode,
13451 stack_pointer_rtx)),
13456 /* It is better to store HImodes as SImodes. */
13457 if (!TARGET_PARTIAL_REG_STALL)
13458 operand = gen_lowpart (SImode, operand);
13462 gen_rtx_SET (VOIDmode,
13463 gen_rtx_MEM (GET_MODE (operand),
13464 gen_rtx_PRE_DEC (SImode,
13465 stack_pointer_rtx)),
13471 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13476 /* Free operand from the memory. */
13478 ix86_free_from_memory (mode)
13479 enum machine_mode mode;
13481 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13485 if (mode == DImode || TARGET_64BIT)
13487 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13491 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13492 to pop or add instruction if registers are available. */
13493 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13494 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13499 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13500 QImode must go into class Q_REGS.
13501 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13502 movdf to do mem-to-mem moves through integer regs. */
13504 ix86_preferred_reload_class (x, class)
13506 enum reg_class class;
13508 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13510 /* SSE can't load any constant directly yet. */
13511 if (SSE_CLASS_P (class))
13513 /* Floats can load 0 and 1. */
13514 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13516 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13517 if (MAYBE_SSE_CLASS_P (class))
13518 return (reg_class_subset_p (class, GENERAL_REGS)
13519 ? GENERAL_REGS : FLOAT_REGS);
13523 /* General regs can load everything. */
13524 if (reg_class_subset_p (class, GENERAL_REGS))
13525 return GENERAL_REGS;
13526 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13527 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13530 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13532 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13537 /* If we are copying between general and FP registers, we need a memory
13538 location. The same is true for SSE and MMX registers.
13540 The macro can't work reliably when one of the CLASSES is class containing
13541 registers from multiple units (SSE, MMX, integer). We avoid this by never
13542 combining those units in single alternative in the machine description.
13543 Ensure that this constraint holds to avoid unexpected surprises.
13545 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13546 enforce these sanity checks. */
13548 ix86_secondary_memory_needed (class1, class2, mode, strict)
13549 enum reg_class class1, class2;
13550 enum machine_mode mode;
13553 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13554 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13555 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13556 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13557 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13558 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13565 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13566 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13567 && (mode) != SImode)
13568 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13569 && (mode) != SImode));
13571 /* Return the cost of moving data from a register in class CLASS1 to
13572 one in class CLASS2.
13574 It is not required that the cost always equal 2 when FROM is the same as TO;
13575 on some machines it is expensive to move between registers if they are not
13576 general registers. */
13578 ix86_register_move_cost (mode, class1, class2)
13579 enum machine_mode mode;
13580 enum reg_class class1, class2;
13582 /* In case we require secondary memory, compute cost of the store followed
13583 by load. In case of copying from general_purpose_register we may emit
13584 multiple stores followed by single load causing memory size mismatch
13585 stall. Count this as arbitarily high cost of 20. */
13586 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13589 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13591 return (MEMORY_MOVE_COST (mode, class1, 0)
13592 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13594 /* Moves between SSE/MMX and integer unit are expensive. */
13595 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13596 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13597 return ix86_cost->mmxsse_to_integer;
13598 if (MAYBE_FLOAT_CLASS_P (class1))
13599 return ix86_cost->fp_move;
13600 if (MAYBE_SSE_CLASS_P (class1))
13601 return ix86_cost->sse_move;
13602 if (MAYBE_MMX_CLASS_P (class1))
13603 return ix86_cost->mmx_move;
13607 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13609 ix86_hard_regno_mode_ok (regno, mode)
13611 enum machine_mode mode;
13613 /* Flags and only flags can only hold CCmode values. */
13614 if (CC_REGNO_P (regno))
13615 return GET_MODE_CLASS (mode) == MODE_CC;
13616 if (GET_MODE_CLASS (mode) == MODE_CC
13617 || GET_MODE_CLASS (mode) == MODE_RANDOM
13618 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13620 if (FP_REGNO_P (regno))
13621 return VALID_FP_MODE_P (mode);
13622 if (SSE_REGNO_P (regno))
13623 return VALID_SSE_REG_MODE (mode);
13624 if (MMX_REGNO_P (regno))
13625 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13626 /* We handle both integer and floats in the general purpose registers.
13627 In future we should be able to handle vector modes as well. */
13628 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13630 /* Take care for QImode values - they can be in non-QI regs, but then
13631 they do cause partial register stalls. */
13632 if (regno < 4 || mode != QImode || TARGET_64BIT)
13634 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13637 /* Return the cost of moving data of mode M between a
13638 register and memory. A value of 2 is the default; this cost is
13639 relative to those in `REGISTER_MOVE_COST'.
13641 If moving between registers and memory is more expensive than
13642 between two registers, you should define this macro to express the
13645 Model also increased moving costs of QImode registers in non
13649 ix86_memory_move_cost (mode, class, in)
13650 enum machine_mode mode;
13651 enum reg_class class;
13654 if (FLOAT_CLASS_P (class))
13672 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13674 if (SSE_CLASS_P (class))
13677 switch (GET_MODE_SIZE (mode))
13691 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13693 if (MMX_CLASS_P (class))
13696 switch (GET_MODE_SIZE (mode))
13707 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13709 switch (GET_MODE_SIZE (mode))
13713 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13714 : ix86_cost->movzbl_load);
13716 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13717 : ix86_cost->int_store[0] + 4);
13720 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13722 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13723 if (mode == TFmode)
13725 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13726 * (int) GET_MODE_SIZE (mode) / 4);
13730 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13732 ix86_svr3_asm_out_constructor (symbol, priority)
13734 int priority ATTRIBUTE_UNUSED;
13737 fputs ("\tpushl $", asm_out_file);
13738 assemble_name (asm_out_file, XSTR (symbol, 0));
13739 fputc ('\n', asm_out_file);
13745 static int current_machopic_label_num;
13747 /* Given a symbol name and its associated stub, write out the
13748 definition of the stub. */
13751 machopic_output_stub (file, symb, stub)
13753 const char *symb, *stub;
13755 unsigned int length;
13756 char *binder_name, *symbol_name, lazy_ptr_name[32];
13757 int label = ++current_machopic_label_num;
13759 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13760 symb = (*targetm.strip_name_encoding) (symb);
13762 length = strlen (stub);
13763 binder_name = alloca (length + 32);
13764 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13766 length = strlen (symb);
13767 symbol_name = alloca (length + 32);
13768 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13770 sprintf (lazy_ptr_name, "L%d$lz", label);
13773 machopic_picsymbol_stub_section ();
13775 machopic_symbol_stub_section ();
13777 fprintf (file, "%s:\n", stub);
13778 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13782 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13783 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13784 fprintf (file, "\tjmp %%edx\n");
13787 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13789 fprintf (file, "%s:\n", binder_name);
13793 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13794 fprintf (file, "\tpushl %%eax\n");
13797 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13799 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13801 machopic_lazy_symbol_ptr_section ();
13802 fprintf (file, "%s:\n", lazy_ptr_name);
13803 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13804 fprintf (file, "\t.long %s\n", binder_name);
13806 #endif /* TARGET_MACHO */
13808 /* Order the registers for register allocator. */
13811 x86_order_regs_for_local_alloc ()
13816 /* First allocate the local general purpose registers. */
13817 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13818 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13819 reg_alloc_order [pos++] = i;
13821 /* Global general purpose registers. */
13822 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13823 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13824 reg_alloc_order [pos++] = i;
13826 /* x87 registers come first in case we are doing FP math
13828 if (!TARGET_SSE_MATH)
13829 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13830 reg_alloc_order [pos++] = i;
13832 /* SSE registers. */
13833 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13834 reg_alloc_order [pos++] = i;
13835 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13836 reg_alloc_order [pos++] = i;
13838 /* x87 registerts. */
13839 if (TARGET_SSE_MATH)
13840 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13841 reg_alloc_order [pos++] = i;
13843 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13844 reg_alloc_order [pos++] = i;
13846 /* Initialize the rest of array as we do not allocate some registers
13848 while (pos < FIRST_PSEUDO_REGISTER)
13849 reg_alloc_order [pos++] = 0;
13853 x86_output_mi_thunk (file, delta, function)
13861 if (ix86_regparm > 0)
13862 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13865 for (; parm; parm = TREE_CHAIN (parm))
13866 if (TREE_VALUE (parm) == void_type_node)
13869 xops[0] = GEN_INT (delta);
13872 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13873 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13874 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13877 fprintf (file, "\tjmp *");
13878 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13879 fprintf (file, "@GOTPCREL(%%rip)\n");
13883 fprintf (file, "\tjmp ");
13884 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13885 fprintf (file, "\n");
13891 xops[1] = gen_rtx_REG (SImode, 0);
13892 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13893 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13895 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13896 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13900 xops[0] = pic_offset_table_rtx;
13901 xops[1] = gen_label_rtx ();
13902 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13904 if (ix86_regparm > 2)
13906 output_asm_insn ("push{l}\t%0", xops);
13907 output_asm_insn ("call\t%P1", xops);
13908 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13909 output_asm_insn ("pop{l}\t%0", xops);
13911 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13912 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13914 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13915 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13916 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13920 fprintf (file, "\tjmp ");
13921 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13922 fprintf (file, "\n");
13928 x86_field_alignment (field, computed)
13932 enum machine_mode mode;
13933 tree type = TREE_TYPE (field);
13935 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13937 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13938 ? get_inner_array_type (type) : type);
13939 if (mode == DFmode || mode == DCmode
13940 || GET_MODE_CLASS (mode) == MODE_INT
13941 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13942 return MIN (32, computed);
13946 /* Implement machine specific optimizations.
13947 At the moment we implement single transformation: AMD Athlon works faster
13948 when RET is not destination of conditional jump or directly preceeded
13949 by other jump instruction. We avoid the penalty by inserting NOP just
13950 before the RET instructions in such cases. */
13952 x86_machine_dependent_reorg (first)
13953 rtx first ATTRIBUTE_UNUSED;
13957 if (!TARGET_ATHLON || !optimize || optimize_size)
13959 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13961 basic_block bb = e->src;
13964 bool insert = false;
13966 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13968 prev = prev_nonnote_insn (ret);
13969 if (prev && GET_CODE (prev) == CODE_LABEL)
13972 for (e = bb->pred; e; e = e->pred_next)
13973 if (EDGE_FREQUENCY (e) && e->src->index > 0
13974 && !(e->flags & EDGE_FALLTHRU))
13979 prev = prev_real_insn (ret);
13980 if (prev && GET_CODE (prev) == JUMP_INSN
13981 && any_condjump_p (prev))
13985 emit_insn_before (gen_nop (), ret);
13989 #include "gt-i386.h"