1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs *ix86_cost = &pentium_cost;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
403 #define FAST_PROLOGUE_INSN_COUNT 30
405 /* Set by prologue expander and used by epilogue expander to determine
407 static int use_fast_prologue_epilogue;
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
420 AREG, DREG, CREG, BREG,
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
440 /* The "default" register map used in 32bit mode. */
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
453 static int const x86_64_int_parameter_registers[6] =
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
459 static int const x86_64_int_return_registers[4] =
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
547 /* The encoding characters for the four TLS models present in ELF. */
549 static char const tls_model_chars[] = " GLil";
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
567 /* Structure describing stack frame layout.
568 Stack grows downward:
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
581 > to_allocate <- FRAME_POINTER
593 int outgoing_arguments_size;
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
608 enum cmodel ix86_cmodel;
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
635 /* ix86_regparm_string as a number */
638 /* Alignment to use for loops and jumps: */
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
707 rtx base, index, disp;
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
761 enum x86_64_reg_class
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
851 struct gcc_target targetm = TARGET_INITIALIZER;
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
881 const processor_target_table[PROCESSOR_max] =
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
902 PTA_PREFETCH_SSE = 8,
907 const processor_alias_table[] =
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"i686", PROCESSOR_PENTIUMPRO, 0},
915 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
916 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
917 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
918 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
919 PTA_MMX | PTA_PREFETCH_SSE},
920 {"k6", PROCESSOR_K6, PTA_MMX},
921 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
922 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
925 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
926 | PTA_3DNOW | PTA_3DNOW_A},
927 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
928 | PTA_3DNOW_A | PTA_SSE},
929 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
930 | PTA_3DNOW_A | PTA_SSE},
931 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
932 | PTA_3DNOW_A | PTA_SSE},
935 int const pta_size = ARRAY_SIZE (processor_alias_table);
937 #ifdef SUBTARGET_OVERRIDE_OPTIONS
938 SUBTARGET_OVERRIDE_OPTIONS;
941 if (!ix86_cpu_string && ix86_arch_string)
942 ix86_cpu_string = ix86_arch_string;
943 if (!ix86_cpu_string)
944 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
945 if (!ix86_arch_string)
946 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
948 if (ix86_cmodel_string != 0)
950 if (!strcmp (ix86_cmodel_string, "small"))
951 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
953 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
954 else if (!strcmp (ix86_cmodel_string, "32"))
956 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
957 ix86_cmodel = CM_KERNEL;
958 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
959 ix86_cmodel = CM_MEDIUM;
960 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
961 ix86_cmodel = CM_LARGE;
963 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
969 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
971 if (ix86_asm_string != 0)
973 if (!strcmp (ix86_asm_string, "intel"))
974 ix86_asm_dialect = ASM_INTEL;
975 else if (!strcmp (ix86_asm_string, "att"))
976 ix86_asm_dialect = ASM_ATT;
978 error ("bad value (%s) for -masm= switch", ix86_asm_string);
980 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
981 error ("code model `%s' not supported in the %s bit mode",
982 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
983 if (ix86_cmodel == CM_LARGE)
984 sorry ("code model `large' not supported yet");
985 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
986 sorry ("%i-bit mode not compiled in",
987 (target_flags & MASK_64BIT) ? 64 : 32);
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
992 ix86_arch = processor_alias_table[i].processor;
993 /* Default cpu tuning to the architecture. */
994 ix86_cpu = ix86_arch;
995 if (processor_alias_table[i].flags & PTA_MMX
996 && !(target_flags & MASK_MMX_SET))
997 target_flags |= MASK_MMX;
998 if (processor_alias_table[i].flags & PTA_3DNOW
999 && !(target_flags & MASK_3DNOW_SET))
1000 target_flags |= MASK_3DNOW;
1001 if (processor_alias_table[i].flags & PTA_3DNOW_A
1002 && !(target_flags & MASK_3DNOW_A_SET))
1003 target_flags |= MASK_3DNOW_A;
1004 if (processor_alias_table[i].flags & PTA_SSE
1005 && !(target_flags & MASK_SSE_SET))
1006 target_flags |= MASK_SSE;
1007 if (processor_alias_table[i].flags & PTA_SSE2
1008 && !(target_flags & MASK_SSE2_SET))
1009 target_flags |= MASK_SSE2;
1010 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1011 x86_prefetch_sse = true;
1016 error ("bad value (%s) for -march= switch", ix86_arch_string);
1018 for (i = 0; i < pta_size; i++)
1019 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1021 ix86_cpu = processor_alias_table[i].processor;
1024 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1025 x86_prefetch_sse = true;
1027 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1030 ix86_cost = &size_cost;
1032 ix86_cost = processor_target_table[ix86_cpu].cost;
1033 target_flags |= processor_target_table[ix86_cpu].target_enable;
1034 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1036 /* Arrange to set up i386_stack_locals for all functions. */
1037 init_machine_status = ix86_init_machine_status;
1039 /* Validate -mregparm= value. */
1040 if (ix86_regparm_string)
1042 i = atoi (ix86_regparm_string);
1043 if (i < 0 || i > REGPARM_MAX)
1044 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1050 ix86_regparm = REGPARM_MAX;
1052 /* If the user has provided any of the -malign-* options,
1053 warn and use that value only if -falign-* is not set.
1054 Remove this code in GCC 3.2 or later. */
1055 if (ix86_align_loops_string)
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops == 0)
1060 i = atoi (ix86_align_loops_string);
1061 if (i < 0 || i > MAX_CODE_ALIGN)
1062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1064 align_loops = 1 << i;
1068 if (ix86_align_jumps_string)
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps == 0)
1073 i = atoi (ix86_align_jumps_string);
1074 if (i < 0 || i > MAX_CODE_ALIGN)
1075 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1077 align_jumps = 1 << i;
1081 if (ix86_align_funcs_string)
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions == 0)
1086 i = atoi (ix86_align_funcs_string);
1087 if (i < 0 || i > MAX_CODE_ALIGN)
1088 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1090 align_functions = 1 << i;
1094 /* Default align_* from the processor table. */
1095 if (align_loops == 0)
1097 align_loops = processor_target_table[ix86_cpu].align_loop;
1098 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1100 if (align_jumps == 0)
1102 align_jumps = processor_target_table[ix86_cpu].align_jump;
1103 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1105 if (align_functions == 0)
1107 align_functions = processor_target_table[ix86_cpu].align_func;
1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary = (optimize_size
1115 ? TARGET_64BIT ? 64 : 32
1117 if (ix86_preferred_stack_boundary_string)
1119 i = atoi (ix86_preferred_stack_boundary_string);
1120 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1122 TARGET_64BIT ? 3 : 2);
1124 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1127 /* Validate -mbranch-cost= value, or provide default. */
1128 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1129 if (ix86_branch_cost_string)
1131 i = atoi (ix86_branch_cost_string);
1133 error ("-mbranch-cost=%d is not between 0 and 5", i);
1135 ix86_branch_cost = i;
1138 if (ix86_tls_dialect_string)
1140 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1141 ix86_tls_dialect = TLS_DIALECT_GNU;
1142 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1143 ix86_tls_dialect = TLS_DIALECT_SUN;
1145 error ("bad value (%s) for -mtls-dialect= switch",
1146 ix86_tls_dialect_string);
1150 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1152 /* Keep nonleaf frame pointers. */
1153 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1154 flag_omit_frame_pointer = 1;
1156 /* If we're doing fast math, we don't care about comparison order
1157 wrt NaNs. This lets us use a shorter comparison sequence. */
1158 if (flag_unsafe_math_optimizations)
1159 target_flags &= ~MASK_IEEE_FP;
1161 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1162 since the insns won't need emulation. */
1163 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1164 target_flags &= ~MASK_NO_FANCY_MATH_387;
1168 if (TARGET_ALIGN_DOUBLE)
1169 error ("-malign-double makes no sense in the 64bit mode");
1171 error ("-mrtd calling convention not supported in the 64bit mode");
1172 /* Enable by default the SSE and MMX builtins. */
1173 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1174 ix86_fpmath = FPMATH_SSE;
1177 ix86_fpmath = FPMATH_387;
1179 if (ix86_fpmath_string != 0)
1181 if (! strcmp (ix86_fpmath_string, "387"))
1182 ix86_fpmath = FPMATH_387;
1183 else if (! strcmp (ix86_fpmath_string, "sse"))
1187 warning ("SSE instruction set disabled, using 387 arithmetics");
1188 ix86_fpmath = FPMATH_387;
1191 ix86_fpmath = FPMATH_SSE;
1193 else if (! strcmp (ix86_fpmath_string, "387,sse")
1194 || ! strcmp (ix86_fpmath_string, "sse,387"))
1198 warning ("SSE instruction set disabled, using 387 arithmetics");
1199 ix86_fpmath = FPMATH_387;
1201 else if (!TARGET_80387)
1203 warning ("387 instruction set disabled, using SSE arithmetics");
1204 ix86_fpmath = FPMATH_SSE;
1207 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1210 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1213 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1217 target_flags |= MASK_MMX;
1218 x86_prefetch_sse = true;
1221 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1224 target_flags |= MASK_MMX;
1225 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1226 extensions it adds. */
1227 if (x86_3dnow_a & (1 << ix86_arch))
1228 target_flags |= MASK_3DNOW_A;
1230 if ((x86_accumulate_outgoing_args & CPUMASK)
1231 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1233 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1235 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1238 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1239 p = strchr (internal_label_prefix, 'X');
1240 internal_label_prefix_len = p - internal_label_prefix;
1246 optimization_options (level, size)
1248 int size ATTRIBUTE_UNUSED;
1250 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1251 make the problem with not enough registers even worse. */
1252 #ifdef INSN_SCHEDULING
1254 flag_schedule_insns = 0;
1256 if (TARGET_64BIT && optimize >= 1)
1257 flag_omit_frame_pointer = 1;
1260 flag_pcc_struct_return = 0;
1261 flag_asynchronous_unwind_tables = 1;
1264 flag_omit_frame_pointer = 0;
1267 /* Table of valid machine attributes. */
1268 const struct attribute_spec ix86_attribute_table[] =
1270 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1271 /* Stdcall attribute says callee is responsible for popping arguments
1272 if they are not variable. */
1273 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1274 /* Cdecl attribute says the callee is a normal C declaration */
1275 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1276 /* Regparm attribute specifies how many integer arguments are to be
1277 passed in registers. */
1278 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1279 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1280 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1281 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1282 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1284 { NULL, 0, 0, false, false, false, NULL }
1287 /* Handle a "cdecl" or "stdcall" attribute;
1288 arguments as in struct attribute_spec.handler. */
1290 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1293 tree args ATTRIBUTE_UNUSED;
1294 int flags ATTRIBUTE_UNUSED;
1297 if (TREE_CODE (*node) != FUNCTION_TYPE
1298 && TREE_CODE (*node) != METHOD_TYPE
1299 && TREE_CODE (*node) != FIELD_DECL
1300 && TREE_CODE (*node) != TYPE_DECL)
1302 warning ("`%s' attribute only applies to functions",
1303 IDENTIFIER_POINTER (name));
1304 *no_add_attrs = true;
1309 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1310 *no_add_attrs = true;
1316 /* Handle a "regparm" attribute;
1317 arguments as in struct attribute_spec.handler. */
1319 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1323 int flags ATTRIBUTE_UNUSED;
1326 if (TREE_CODE (*node) != FUNCTION_TYPE
1327 && TREE_CODE (*node) != METHOD_TYPE
1328 && TREE_CODE (*node) != FIELD_DECL
1329 && TREE_CODE (*node) != TYPE_DECL)
1331 warning ("`%s' attribute only applies to functions",
1332 IDENTIFIER_POINTER (name));
1333 *no_add_attrs = true;
1339 cst = TREE_VALUE (args);
1340 if (TREE_CODE (cst) != INTEGER_CST)
1342 warning ("`%s' attribute requires an integer constant argument",
1343 IDENTIFIER_POINTER (name));
1344 *no_add_attrs = true;
1346 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1348 warning ("argument to `%s' attribute larger than %d",
1349 IDENTIFIER_POINTER (name), REGPARM_MAX);
1350 *no_add_attrs = true;
1357 /* Return 0 if the attributes for two types are incompatible, 1 if they
1358 are compatible, and 2 if they are nearly compatible (which causes a
1359 warning to be generated). */
1362 ix86_comp_type_attributes (type1, type2)
1366 /* Check for mismatch of non-default calling convention. */
1367 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1369 if (TREE_CODE (type1) != FUNCTION_TYPE)
1372 /* Check for mismatched return types (cdecl vs stdcall). */
1373 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1374 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1379 /* Value is the number of bytes of arguments automatically
1380 popped when returning from a subroutine call.
1381 FUNDECL is the declaration node of the function (as a tree),
1382 FUNTYPE is the data type of the function (as a tree),
1383 or for a library call it is an identifier node for the subroutine name.
1384 SIZE is the number of bytes of arguments passed on the stack.
1386 On the 80386, the RTD insn may be used to pop them if the number
1387 of args is fixed, but if the number is variable then the caller
1388 must pop them all. RTD can't be used for library calls now
1389 because the library is compiled with the Unix compiler.
1390 Use of RTD is a selectable option, since it is incompatible with
1391 standard Unix calling sequences. If the option is not selected,
1392 the caller must always pop the args.
1394 The attribute stdcall is equivalent to RTD on a per module basis. */
1397 ix86_return_pops_args (fundecl, funtype, size)
1402 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1404 /* Cdecl functions override -mrtd, and never pop the stack. */
1405 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1407 /* Stdcall functions will pop the stack if not variable args. */
1408 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1412 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1413 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1414 == void_type_node)))
1418 /* Lose any fake structure return argument if it is passed on the stack. */
1419 if (aggregate_value_p (TREE_TYPE (funtype))
1422 int nregs = ix86_regparm;
1426 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1429 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1433 return GET_MODE_SIZE (Pmode);
1439 /* Argument support functions. */
1441 /* Return true when register may be used to pass function parameters. */
1443 ix86_function_arg_regno_p (regno)
1448 return (regno < REGPARM_MAX
1449 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1450 if (SSE_REGNO_P (regno) && TARGET_SSE)
1452 /* RAX is used as hidden argument to va_arg functions. */
1455 for (i = 0; i < REGPARM_MAX; i++)
1456 if (regno == x86_64_int_parameter_registers[i])
1461 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1462 for a call to a function whose data type is FNTYPE.
1463 For a library call, FNTYPE is 0. */
1466 init_cumulative_args (cum, fntype, libname)
1467 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1468 tree fntype; /* tree ptr for function decl */
1469 rtx libname; /* SYMBOL_REF of library name or 0 */
1471 static CUMULATIVE_ARGS zero_cum;
1472 tree param, next_param;
1474 if (TARGET_DEBUG_ARG)
1476 fprintf (stderr, "\ninit_cumulative_args (");
1478 fprintf (stderr, "fntype code = %s, ret code = %s",
1479 tree_code_name[(int) TREE_CODE (fntype)],
1480 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1482 fprintf (stderr, "no fntype");
1485 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1490 /* Set up the number of registers to use for passing arguments. */
1491 cum->nregs = ix86_regparm;
1492 cum->sse_nregs = SSE_REGPARM_MAX;
1493 if (fntype && !TARGET_64BIT)
1495 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1498 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1500 cum->maybe_vaarg = false;
1502 /* Determine if this function has variable arguments. This is
1503 indicated by the last argument being 'void_type_mode' if there
1504 are no variable arguments. If there are variable arguments, then
1505 we won't pass anything in registers */
1509 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1510 param != 0; param = next_param)
1512 next_param = TREE_CHAIN (param);
1513 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1517 cum->maybe_vaarg = true;
1521 if ((!fntype && !libname)
1522 || (fntype && !TYPE_ARG_TYPES (fntype)))
1523 cum->maybe_vaarg = 1;
1525 if (TARGET_DEBUG_ARG)
1526 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1531 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1532 of this code is to classify each 8bytes of incoming argument by the register
1533 class and assign registers accordingly. */
1535 /* Return the union class of CLASS1 and CLASS2.
1536 See the x86-64 PS ABI for details. */
1538 static enum x86_64_reg_class
1539 merge_classes (class1, class2)
1540 enum x86_64_reg_class class1, class2;
1542 /* Rule #1: If both classes are equal, this is the resulting class. */
1543 if (class1 == class2)
1546 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1548 if (class1 == X86_64_NO_CLASS)
1550 if (class2 == X86_64_NO_CLASS)
1553 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1554 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1555 return X86_64_MEMORY_CLASS;
1557 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1558 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1559 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1560 return X86_64_INTEGERSI_CLASS;
1561 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1562 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1563 return X86_64_INTEGER_CLASS;
1565 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1566 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1567 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1568 return X86_64_MEMORY_CLASS;
1570 /* Rule #6: Otherwise class SSE is used. */
1571 return X86_64_SSE_CLASS;
1574 /* Classify the argument of type TYPE and mode MODE.
1575 CLASSES will be filled by the register class used to pass each word
1576 of the operand. The number of words is returned. In case the parameter
1577 should be passed in memory, 0 is returned. As a special case for zero
1578 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1580 BIT_OFFSET is used internally for handling records and specifies offset
1581 of the offset in bits modulo 256 to avoid overflow cases.
1583 See the x86-64 PS ABI for details.
1587 classify_argument (mode, type, classes, bit_offset)
1588 enum machine_mode mode;
1590 enum x86_64_reg_class classes[MAX_CLASSES];
1594 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1595 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1597 if (type && AGGREGATE_TYPE_P (type))
1601 enum x86_64_reg_class subclasses[MAX_CLASSES];
1603 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1607 for (i = 0; i < words; i++)
1608 classes[i] = X86_64_NO_CLASS;
1610 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1611 signalize memory class, so handle it as special case. */
1614 classes[0] = X86_64_NO_CLASS;
1618 /* Classify each field of record and merge classes. */
1619 if (TREE_CODE (type) == RECORD_TYPE)
1621 /* For classes first merge in the field of the subclasses. */
1622 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1624 tree bases = TYPE_BINFO_BASETYPES (type);
1625 int n_bases = TREE_VEC_LENGTH (bases);
1628 for (i = 0; i < n_bases; ++i)
1630 tree binfo = TREE_VEC_ELT (bases, i);
1632 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1633 tree type = BINFO_TYPE (binfo);
1635 num = classify_argument (TYPE_MODE (type),
1637 (offset + bit_offset) % 256);
1640 for (i = 0; i < num; i++)
1642 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1644 merge_classes (subclasses[i], classes[i + pos]);
1648 /* And now merge the fields of structure. */
1649 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1651 if (TREE_CODE (field) == FIELD_DECL)
1655 /* Bitfields are always classified as integer. Handle them
1656 early, since later code would consider them to be
1657 misaligned integers. */
1658 if (DECL_BIT_FIELD (field))
1660 for (i = int_bit_position (field) / 8 / 8;
1661 i < (int_bit_position (field)
1662 + tree_low_cst (DECL_SIZE (field), 0)
1665 merge_classes (X86_64_INTEGER_CLASS,
1670 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1671 TREE_TYPE (field), subclasses,
1672 (int_bit_position (field)
1673 + bit_offset) % 256);
1676 for (i = 0; i < num; i++)
1679 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1681 merge_classes (subclasses[i], classes[i + pos]);
1687 /* Arrays are handled as small records. */
1688 else if (TREE_CODE (type) == ARRAY_TYPE)
1691 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1692 TREE_TYPE (type), subclasses, bit_offset);
1696 /* The partial classes are now full classes. */
1697 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1698 subclasses[0] = X86_64_SSE_CLASS;
1699 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1700 subclasses[0] = X86_64_INTEGER_CLASS;
1702 for (i = 0; i < words; i++)
1703 classes[i] = subclasses[i % num];
1705 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1706 else if (TREE_CODE (type) == UNION_TYPE
1707 || TREE_CODE (type) == QUAL_UNION_TYPE)
1709 /* For classes first merge in the field of the subclasses. */
1710 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1712 tree bases = TYPE_BINFO_BASETYPES (type);
1713 int n_bases = TREE_VEC_LENGTH (bases);
1716 for (i = 0; i < n_bases; ++i)
1718 tree binfo = TREE_VEC_ELT (bases, i);
1720 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1721 tree type = BINFO_TYPE (binfo);
1723 num = classify_argument (TYPE_MODE (type),
1725 (offset + (bit_offset % 64)) % 256);
1728 for (i = 0; i < num; i++)
1730 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1732 merge_classes (subclasses[i], classes[i + pos]);
1736 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1738 if (TREE_CODE (field) == FIELD_DECL)
1741 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1742 TREE_TYPE (field), subclasses,
1746 for (i = 0; i < num; i++)
1747 classes[i] = merge_classes (subclasses[i], classes[i]);
1754 /* Final merger cleanup. */
1755 for (i = 0; i < words; i++)
1757 /* If one class is MEMORY, everything should be passed in
1759 if (classes[i] == X86_64_MEMORY_CLASS)
1762 /* The X86_64_SSEUP_CLASS should be always preceded by
1763 X86_64_SSE_CLASS. */
1764 if (classes[i] == X86_64_SSEUP_CLASS
1765 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1766 classes[i] = X86_64_SSE_CLASS;
1768 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1769 if (classes[i] == X86_64_X87UP_CLASS
1770 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1771 classes[i] = X86_64_SSE_CLASS;
1776 /* Compute alignment needed. We align all types to natural boundaries with
1777 exception of XFmode that is aligned to 64bits. */
1778 if (mode != VOIDmode && mode != BLKmode)
1780 int mode_alignment = GET_MODE_BITSIZE (mode);
1783 mode_alignment = 128;
1784 else if (mode == XCmode)
1785 mode_alignment = 256;
1786 /* Misaligned fields are always returned in memory. */
1787 if (bit_offset % mode_alignment)
1791 /* Classification of atomic types. */
1801 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1802 classes[0] = X86_64_INTEGERSI_CLASS;
1804 classes[0] = X86_64_INTEGER_CLASS;
1808 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1811 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1812 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1815 if (!(bit_offset % 64))
1816 classes[0] = X86_64_SSESF_CLASS;
1818 classes[0] = X86_64_SSE_CLASS;
1821 classes[0] = X86_64_SSEDF_CLASS;
1824 classes[0] = X86_64_X87_CLASS;
1825 classes[1] = X86_64_X87UP_CLASS;
1828 classes[0] = X86_64_X87_CLASS;
1829 classes[1] = X86_64_X87UP_CLASS;
1830 classes[2] = X86_64_X87_CLASS;
1831 classes[3] = X86_64_X87UP_CLASS;
1834 classes[0] = X86_64_SSEDF_CLASS;
1835 classes[1] = X86_64_SSEDF_CLASS;
1838 classes[0] = X86_64_SSE_CLASS;
1846 classes[0] = X86_64_SSE_CLASS;
1847 classes[1] = X86_64_SSEUP_CLASS;
1853 classes[0] = X86_64_SSE_CLASS;
1863 /* Examine the argument and return set number of register required in each
1864 class. Return 0 iff parameter should be passed in memory. */
1866 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1867 enum machine_mode mode;
1869 int *int_nregs, *sse_nregs;
1872 enum x86_64_reg_class class[MAX_CLASSES];
1873 int n = classify_argument (mode, type, class, 0);
1879 for (n--; n >= 0; n--)
1882 case X86_64_INTEGER_CLASS:
1883 case X86_64_INTEGERSI_CLASS:
1886 case X86_64_SSE_CLASS:
1887 case X86_64_SSESF_CLASS:
1888 case X86_64_SSEDF_CLASS:
1891 case X86_64_NO_CLASS:
1892 case X86_64_SSEUP_CLASS:
1894 case X86_64_X87_CLASS:
1895 case X86_64_X87UP_CLASS:
1899 case X86_64_MEMORY_CLASS:
1904 /* Construct container for the argument used by GCC interface. See
1905 FUNCTION_ARG for the detailed description. */
1907 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1908 enum machine_mode mode;
1911 int nintregs, nsseregs;
1915 enum machine_mode tmpmode;
1917 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1918 enum x86_64_reg_class class[MAX_CLASSES];
1922 int needed_sseregs, needed_intregs;
1923 rtx exp[MAX_CLASSES];
1926 n = classify_argument (mode, type, class, 0);
1927 if (TARGET_DEBUG_ARG)
1930 fprintf (stderr, "Memory class\n");
1933 fprintf (stderr, "Classes:");
1934 for (i = 0; i < n; i++)
1936 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1938 fprintf (stderr, "\n");
1943 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1945 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1948 /* First construct simple cases. Avoid SCmode, since we want to use
1949 single register to pass this type. */
1950 if (n == 1 && mode != SCmode)
1953 case X86_64_INTEGER_CLASS:
1954 case X86_64_INTEGERSI_CLASS:
1955 return gen_rtx_REG (mode, intreg[0]);
1956 case X86_64_SSE_CLASS:
1957 case X86_64_SSESF_CLASS:
1958 case X86_64_SSEDF_CLASS:
1959 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1960 case X86_64_X87_CLASS:
1961 return gen_rtx_REG (mode, FIRST_STACK_REG);
1962 case X86_64_NO_CLASS:
1963 /* Zero sized array, struct or class. */
1968 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1969 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1971 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1972 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1973 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1974 && class[1] == X86_64_INTEGER_CLASS
1975 && (mode == CDImode || mode == TImode)
1976 && intreg[0] + 1 == intreg[1])
1977 return gen_rtx_REG (mode, intreg[0]);
1979 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1980 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1981 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1983 /* Otherwise figure out the entries of the PARALLEL. */
1984 for (i = 0; i < n; i++)
1988 case X86_64_NO_CLASS:
1990 case X86_64_INTEGER_CLASS:
1991 case X86_64_INTEGERSI_CLASS:
1992 /* Merge TImodes on aligned occassions here too. */
1993 if (i * 8 + 8 > bytes)
1994 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1995 else if (class[i] == X86_64_INTEGERSI_CLASS)
1999 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2000 if (tmpmode == BLKmode)
2002 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2003 gen_rtx_REG (tmpmode, *intreg),
2007 case X86_64_SSESF_CLASS:
2008 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2009 gen_rtx_REG (SFmode,
2010 SSE_REGNO (sse_regno)),
2014 case X86_64_SSEDF_CLASS:
2015 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2016 gen_rtx_REG (DFmode,
2017 SSE_REGNO (sse_regno)),
2021 case X86_64_SSE_CLASS:
2022 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2023 tmpmode = TImode, i++;
2026 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2027 gen_rtx_REG (tmpmode,
2028 SSE_REGNO (sse_regno)),
2036 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2037 for (i = 0; i < nexps; i++)
2038 XVECEXP (ret, 0, i) = exp [i];
2042 /* Update the data in CUM to advance over an argument
2043 of mode MODE and data type TYPE.
2044 (TYPE is null for libcalls where that information may not be available.) */
2047 function_arg_advance (cum, mode, type, named)
2048 CUMULATIVE_ARGS *cum; /* current arg information */
2049 enum machine_mode mode; /* current arg mode */
2050 tree type; /* type of the argument or 0 if lib support */
2051 int named; /* whether or not the argument was named */
2054 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2055 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2057 if (TARGET_DEBUG_ARG)
2059 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2060 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2063 int int_nregs, sse_nregs;
2064 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2065 cum->words += words;
2066 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2068 cum->nregs -= int_nregs;
2069 cum->sse_nregs -= sse_nregs;
2070 cum->regno += int_nregs;
2071 cum->sse_regno += sse_nregs;
2074 cum->words += words;
2078 if (TARGET_SSE && mode == TImode)
2080 cum->sse_words += words;
2081 cum->sse_nregs -= 1;
2082 cum->sse_regno += 1;
2083 if (cum->sse_nregs <= 0)
2091 cum->words += words;
2092 cum->nregs -= words;
2093 cum->regno += words;
2095 if (cum->nregs <= 0)
2105 /* Define where to put the arguments to a function.
2106 Value is zero to push the argument on the stack,
2107 or a hard register in which to store the argument.
2109 MODE is the argument's machine mode.
2110 TYPE is the data type of the argument (as a tree).
2111 This is null for libcalls where that information may
2113 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2114 the preceding args and about the function being called.
2115 NAMED is nonzero if this argument is a named parameter
2116 (otherwise it is an extra parameter matching an ellipsis). */
2119 function_arg (cum, mode, type, named)
2120 CUMULATIVE_ARGS *cum; /* current arg information */
2121 enum machine_mode mode; /* current arg mode */
2122 tree type; /* type of the argument or 0 if lib support */
2123 int named; /* != 0 for normal args, == 0 for ... args */
2127 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2128 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2130 /* Handle an hidden AL argument containing number of registers for varargs
2131 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2133 if (mode == VOIDmode)
2136 return GEN_INT (cum->maybe_vaarg
2137 ? (cum->sse_nregs < 0
2145 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2146 &x86_64_int_parameter_registers [cum->regno],
2151 /* For now, pass fp/complex values on the stack. */
2160 if (words <= cum->nregs)
2161 ret = gen_rtx_REG (mode, cum->regno);
2165 ret = gen_rtx_REG (mode, cum->sse_regno);
2169 if (TARGET_DEBUG_ARG)
2172 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2173 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2176 print_simple_rtl (stderr, ret);
2178 fprintf (stderr, ", stack");
2180 fprintf (stderr, " )\n");
2186 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2190 ix86_function_arg_boundary (mode, type)
2191 enum machine_mode mode;
2196 return PARM_BOUNDARY;
2198 align = TYPE_ALIGN (type);
2200 align = GET_MODE_ALIGNMENT (mode);
2201 if (align < PARM_BOUNDARY)
2202 align = PARM_BOUNDARY;
2208 /* Return true if N is a possible register number of function value. */
2210 ix86_function_value_regno_p (regno)
2215 return ((regno) == 0
2216 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2217 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2219 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2220 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2221 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2224 /* Define how to find the value returned by a function.
2225 VALTYPE is the data type of the value (as a tree).
2226 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2227 otherwise, FUNC is 0. */
2229 ix86_function_value (valtype)
2234 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2235 REGPARM_MAX, SSE_REGPARM_MAX,
2236 x86_64_int_return_registers, 0);
2237 /* For zero sized structures, construct_continer return NULL, but we need
2238 to keep rest of compiler happy by returning meaningfull value. */
2240 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2244 return gen_rtx_REG (TYPE_MODE (valtype),
2245 ix86_value_regno (TYPE_MODE (valtype)));
2248 /* Return false iff type is returned in memory. */
2250 ix86_return_in_memory (type)
2253 int needed_intregs, needed_sseregs;
2256 return !examine_argument (TYPE_MODE (type), type, 1,
2257 &needed_intregs, &needed_sseregs);
2261 if (TYPE_MODE (type) == BLKmode
2262 || (VECTOR_MODE_P (TYPE_MODE (type))
2263 && int_size_in_bytes (type) == 8)
2264 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2265 && TYPE_MODE (type) != TFmode
2266 && !VECTOR_MODE_P (TYPE_MODE (type))))
2272 /* Define how to find the value returned by a library function
2273 assuming the value has mode MODE. */
2275 ix86_libcall_value (mode)
2276 enum machine_mode mode;
2286 return gen_rtx_REG (mode, FIRST_SSE_REG);
2289 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2291 return gen_rtx_REG (mode, 0);
2295 return gen_rtx_REG (mode, ix86_value_regno (mode));
2298 /* Given a mode, return the register to use for a return value. */
2301 ix86_value_regno (mode)
2302 enum machine_mode mode;
2304 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2305 return FIRST_FLOAT_REG;
2306 if (mode == TImode || VECTOR_MODE_P (mode))
2307 return FIRST_SSE_REG;
2311 /* Create the va_list data type. */
2314 ix86_build_va_list ()
2316 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2318 /* For i386 we use plain pointer to argument area. */
2320 return build_pointer_type (char_type_node);
2322 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2323 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2325 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2326 unsigned_type_node);
2327 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2328 unsigned_type_node);
2329 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2331 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2334 DECL_FIELD_CONTEXT (f_gpr) = record;
2335 DECL_FIELD_CONTEXT (f_fpr) = record;
2336 DECL_FIELD_CONTEXT (f_ovf) = record;
2337 DECL_FIELD_CONTEXT (f_sav) = record;
2339 TREE_CHAIN (record) = type_decl;
2340 TYPE_NAME (record) = type_decl;
2341 TYPE_FIELDS (record) = f_gpr;
2342 TREE_CHAIN (f_gpr) = f_fpr;
2343 TREE_CHAIN (f_fpr) = f_ovf;
2344 TREE_CHAIN (f_ovf) = f_sav;
2346 layout_type (record);
2348 /* The correct type is an array type of one element. */
2349 return build_array_type (record, build_index_type (size_zero_node));
2352 /* Perform any needed actions needed for a function that is receiving a
2353 variable number of arguments.
2357 MODE and TYPE are the mode and type of the current parameter.
2359 PRETEND_SIZE is a variable that should be set to the amount of stack
2360 that must be pushed by the prolog to pretend that our caller pushed
2363 Normally, this macro will push all remaining incoming registers on the
2364 stack and set PRETEND_SIZE to the length of the registers pushed. */
2367 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2368 CUMULATIVE_ARGS *cum;
2369 enum machine_mode mode;
2371 int *pretend_size ATTRIBUTE_UNUSED;
2375 CUMULATIVE_ARGS next_cum;
2376 rtx save_area = NULL_RTX, mem;
2389 /* Indicate to allocate space on the stack for varargs save area. */
2390 ix86_save_varrargs_registers = 1;
2392 fntype = TREE_TYPE (current_function_decl);
2393 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2394 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2395 != void_type_node));
2397 /* For varargs, we do not want to skip the dummy va_dcl argument.
2398 For stdargs, we do want to skip the last named argument. */
2401 function_arg_advance (&next_cum, mode, type, 1);
2404 save_area = frame_pointer_rtx;
2406 set = get_varargs_alias_set ();
2408 for (i = next_cum.regno; i < ix86_regparm; i++)
2410 mem = gen_rtx_MEM (Pmode,
2411 plus_constant (save_area, i * UNITS_PER_WORD));
2412 set_mem_alias_set (mem, set);
2413 emit_move_insn (mem, gen_rtx_REG (Pmode,
2414 x86_64_int_parameter_registers[i]));
2417 if (next_cum.sse_nregs)
2419 /* Now emit code to save SSE registers. The AX parameter contains number
2420 of SSE parameter regsiters used to call this function. We use
2421 sse_prologue_save insn template that produces computed jump across
2422 SSE saves. We need some preparation work to get this working. */
2424 label = gen_label_rtx ();
2425 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2427 /* Compute address to jump to :
2428 label - 5*eax + nnamed_sse_arguments*5 */
2429 tmp_reg = gen_reg_rtx (Pmode);
2430 nsse_reg = gen_reg_rtx (Pmode);
2431 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2432 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2433 gen_rtx_MULT (Pmode, nsse_reg,
2435 if (next_cum.sse_regno)
2438 gen_rtx_CONST (DImode,
2439 gen_rtx_PLUS (DImode,
2441 GEN_INT (next_cum.sse_regno * 4))));
2443 emit_move_insn (nsse_reg, label_ref);
2444 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2446 /* Compute address of memory block we save into. We always use pointer
2447 pointing 127 bytes after first byte to store - this is needed to keep
2448 instruction size limited by 4 bytes. */
2449 tmp_reg = gen_reg_rtx (Pmode);
2450 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2451 plus_constant (save_area,
2452 8 * REGPARM_MAX + 127)));
2453 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2454 set_mem_alias_set (mem, set);
2455 set_mem_align (mem, BITS_PER_WORD);
2457 /* And finally do the dirty job! */
2458 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2459 GEN_INT (next_cum.sse_regno), label));
2464 /* Implement va_start. */
2467 ix86_va_start (valist, nextarg)
2471 HOST_WIDE_INT words, n_gpr, n_fpr;
2472 tree f_gpr, f_fpr, f_ovf, f_sav;
2473 tree gpr, fpr, ovf, sav, t;
2475 /* Only 64bit target needs something special. */
2478 std_expand_builtin_va_start (valist, nextarg);
2482 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2483 f_fpr = TREE_CHAIN (f_gpr);
2484 f_ovf = TREE_CHAIN (f_fpr);
2485 f_sav = TREE_CHAIN (f_ovf);
2487 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2488 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2489 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2490 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2491 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2493 /* Count number of gp and fp argument registers used. */
2494 words = current_function_args_info.words;
2495 n_gpr = current_function_args_info.regno;
2496 n_fpr = current_function_args_info.sse_regno;
2498 if (TARGET_DEBUG_ARG)
2499 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2500 (int) words, (int) n_gpr, (int) n_fpr);
2502 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2503 build_int_2 (n_gpr * 8, 0));
2504 TREE_SIDE_EFFECTS (t) = 1;
2505 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2507 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2508 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2509 TREE_SIDE_EFFECTS (t) = 1;
2510 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2512 /* Find the overflow area. */
2513 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2515 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2516 build_int_2 (words * UNITS_PER_WORD, 0));
2517 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2518 TREE_SIDE_EFFECTS (t) = 1;
2519 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2521 /* Find the register save area.
2522 Prologue of the function save it right above stack frame. */
2523 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2524 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2525 TREE_SIDE_EFFECTS (t) = 1;
2526 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2529 /* Implement va_arg. */
2531 ix86_va_arg (valist, type)
2534 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2535 tree f_gpr, f_fpr, f_ovf, f_sav;
2536 tree gpr, fpr, ovf, sav, t;
2538 rtx lab_false, lab_over = NULL_RTX;
2542 /* Only 64bit target needs something special. */
2545 return std_expand_builtin_va_arg (valist, type);
2548 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2549 f_fpr = TREE_CHAIN (f_gpr);
2550 f_ovf = TREE_CHAIN (f_fpr);
2551 f_sav = TREE_CHAIN (f_ovf);
2553 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2554 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2555 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2556 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2557 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2559 size = int_size_in_bytes (type);
2560 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2562 container = construct_container (TYPE_MODE (type), type, 0,
2563 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2565 * Pull the value out of the saved registers ...
2568 addr_rtx = gen_reg_rtx (Pmode);
2572 rtx int_addr_rtx, sse_addr_rtx;
2573 int needed_intregs, needed_sseregs;
2576 lab_over = gen_label_rtx ();
2577 lab_false = gen_label_rtx ();
2579 examine_argument (TYPE_MODE (type), type, 0,
2580 &needed_intregs, &needed_sseregs);
2583 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2584 || TYPE_ALIGN (type) > 128);
2586 /* In case we are passing structure, verify that it is consetuctive block
2587 on the register save area. If not we need to do moves. */
2588 if (!need_temp && !REG_P (container))
2590 /* Verify that all registers are strictly consetuctive */
2591 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2595 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2597 rtx slot = XVECEXP (container, 0, i);
2598 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2599 || INTVAL (XEXP (slot, 1)) != i * 16)
2607 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2609 rtx slot = XVECEXP (container, 0, i);
2610 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2611 || INTVAL (XEXP (slot, 1)) != i * 8)
2618 int_addr_rtx = addr_rtx;
2619 sse_addr_rtx = addr_rtx;
2623 int_addr_rtx = gen_reg_rtx (Pmode);
2624 sse_addr_rtx = gen_reg_rtx (Pmode);
2626 /* First ensure that we fit completely in registers. */
2629 emit_cmp_and_jump_insns (expand_expr
2630 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2631 GEN_INT ((REGPARM_MAX - needed_intregs +
2632 1) * 8), GE, const1_rtx, SImode,
2637 emit_cmp_and_jump_insns (expand_expr
2638 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2639 GEN_INT ((SSE_REGPARM_MAX -
2640 needed_sseregs + 1) * 16 +
2641 REGPARM_MAX * 8), GE, const1_rtx,
2642 SImode, 1, lab_false);
2645 /* Compute index to start of area used for integer regs. */
2648 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2649 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2650 if (r != int_addr_rtx)
2651 emit_move_insn (int_addr_rtx, r);
2655 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2656 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2657 if (r != sse_addr_rtx)
2658 emit_move_insn (sse_addr_rtx, r);
2665 /* Never use the memory itself, as it has the alias set. */
2666 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2667 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2668 set_mem_alias_set (mem, get_varargs_alias_set ());
2669 set_mem_align (mem, BITS_PER_UNIT);
2671 for (i = 0; i < XVECLEN (container, 0); i++)
2673 rtx slot = XVECEXP (container, 0, i);
2674 rtx reg = XEXP (slot, 0);
2675 enum machine_mode mode = GET_MODE (reg);
2681 if (SSE_REGNO_P (REGNO (reg)))
2683 src_addr = sse_addr_rtx;
2684 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2688 src_addr = int_addr_rtx;
2689 src_offset = REGNO (reg) * 8;
2691 src_mem = gen_rtx_MEM (mode, src_addr);
2692 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2693 src_mem = adjust_address (src_mem, mode, src_offset);
2694 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2695 emit_move_insn (dest_mem, src_mem);
2702 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2703 build_int_2 (needed_intregs * 8, 0));
2704 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2705 TREE_SIDE_EFFECTS (t) = 1;
2706 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2711 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2712 build_int_2 (needed_sseregs * 16, 0));
2713 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2714 TREE_SIDE_EFFECTS (t) = 1;
2715 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2718 emit_jump_insn (gen_jump (lab_over));
2720 emit_label (lab_false);
2723 /* ... otherwise out of the overflow area. */
2725 /* Care for on-stack alignment if needed. */
2726 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2730 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2731 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2732 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2736 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2738 emit_move_insn (addr_rtx, r);
2741 build (PLUS_EXPR, TREE_TYPE (t), t,
2742 build_int_2 (rsize * UNITS_PER_WORD, 0));
2743 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2744 TREE_SIDE_EFFECTS (t) = 1;
2745 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2748 emit_label (lab_over);
2753 /* Return nonzero if OP is general operand representable on x86_64. */
2756 x86_64_general_operand (op, mode)
2758 enum machine_mode mode;
2761 return general_operand (op, mode);
2762 if (nonimmediate_operand (op, mode))
2764 return x86_64_sign_extended_value (op);
2767 /* Return nonzero if OP is general operand representable on x86_64
2768 as either sign extended or zero extended constant. */
2771 x86_64_szext_general_operand (op, mode)
2773 enum machine_mode mode;
2776 return general_operand (op, mode);
2777 if (nonimmediate_operand (op, mode))
2779 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2782 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2785 x86_64_nonmemory_operand (op, mode)
2787 enum machine_mode mode;
2790 return nonmemory_operand (op, mode);
2791 if (register_operand (op, mode))
2793 return x86_64_sign_extended_value (op);
2796 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2799 x86_64_movabs_operand (op, mode)
2801 enum machine_mode mode;
2803 if (!TARGET_64BIT || !flag_pic)
2804 return nonmemory_operand (op, mode);
2805 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2807 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2812 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2815 x86_64_szext_nonmemory_operand (op, mode)
2817 enum machine_mode mode;
2820 return nonmemory_operand (op, mode);
2821 if (register_operand (op, mode))
2823 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2826 /* Return nonzero if OP is immediate operand representable on x86_64. */
2829 x86_64_immediate_operand (op, mode)
2831 enum machine_mode mode;
2834 return immediate_operand (op, mode);
2835 return x86_64_sign_extended_value (op);
2838 /* Return nonzero if OP is immediate operand representable on x86_64. */
2841 x86_64_zext_immediate_operand (op, mode)
2843 enum machine_mode mode ATTRIBUTE_UNUSED;
2845 return x86_64_zero_extended_value (op);
2848 /* Return nonzero if OP is (const_int 1), else return zero. */
2851 const_int_1_operand (op, mode)
2853 enum machine_mode mode ATTRIBUTE_UNUSED;
2855 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2858 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2859 for shift & compare patterns, as shifting by 0 does not change flags),
2860 else return zero. */
2863 const_int_1_31_operand (op, mode)
2865 enum machine_mode mode ATTRIBUTE_UNUSED;
2867 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2870 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2871 reference and a constant. */
2874 symbolic_operand (op, mode)
2876 enum machine_mode mode ATTRIBUTE_UNUSED;
2878 switch (GET_CODE (op))
2886 if (GET_CODE (op) == SYMBOL_REF
2887 || GET_CODE (op) == LABEL_REF
2888 || (GET_CODE (op) == UNSPEC
2889 && (XINT (op, 1) == UNSPEC_GOT
2890 || XINT (op, 1) == UNSPEC_GOTOFF
2891 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2893 if (GET_CODE (op) != PLUS
2894 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2898 if (GET_CODE (op) == SYMBOL_REF
2899 || GET_CODE (op) == LABEL_REF)
2901 /* Only @GOTOFF gets offsets. */
2902 if (GET_CODE (op) != UNSPEC
2903 || XINT (op, 1) != UNSPEC_GOTOFF)
2906 op = XVECEXP (op, 0, 0);
2907 if (GET_CODE (op) == SYMBOL_REF
2908 || GET_CODE (op) == LABEL_REF)
2917 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2920 pic_symbolic_operand (op, mode)
2922 enum machine_mode mode ATTRIBUTE_UNUSED;
2924 if (GET_CODE (op) != CONST)
2929 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2934 if (GET_CODE (op) == UNSPEC)
2936 if (GET_CODE (op) != PLUS
2937 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2940 if (GET_CODE (op) == UNSPEC)
2946 /* Return true if OP is a symbolic operand that resolves locally. */
2949 local_symbolic_operand (op, mode)
2951 enum machine_mode mode ATTRIBUTE_UNUSED;
2953 if (GET_CODE (op) == LABEL_REF)
2956 if (GET_CODE (op) == CONST
2957 && GET_CODE (XEXP (op, 0)) == PLUS
2958 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2959 op = XEXP (XEXP (op, 0), 0);
2961 if (GET_CODE (op) != SYMBOL_REF)
2964 /* These we've been told are local by varasm and encode_section_info
2966 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2969 /* There is, however, a not insubstantial body of code in the rest of
2970 the compiler that assumes it can just stick the results of
2971 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2972 /* ??? This is a hack. Should update the body of the compiler to
2973 always create a DECL an invoke targetm.encode_section_info. */
2974 if (strncmp (XSTR (op, 0), internal_label_prefix,
2975 internal_label_prefix_len) == 0)
2981 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2984 tls_symbolic_operand (op, mode)
2986 enum machine_mode mode ATTRIBUTE_UNUSED;
2988 const char *symbol_str;
2990 if (GET_CODE (op) != SYMBOL_REF)
2992 symbol_str = XSTR (op, 0);
2994 if (symbol_str[0] != '%')
2996 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3000 tls_symbolic_operand_1 (op, kind)
3002 enum tls_model kind;
3004 const char *symbol_str;
3006 if (GET_CODE (op) != SYMBOL_REF)
3008 symbol_str = XSTR (op, 0);
3010 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3014 global_dynamic_symbolic_operand (op, mode)
3016 enum machine_mode mode ATTRIBUTE_UNUSED;
3018 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3022 local_dynamic_symbolic_operand (op, mode)
3024 enum machine_mode mode ATTRIBUTE_UNUSED;
3026 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3030 initial_exec_symbolic_operand (op, mode)
3032 enum machine_mode mode ATTRIBUTE_UNUSED;
3034 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3038 local_exec_symbolic_operand (op, mode)
3040 enum machine_mode mode ATTRIBUTE_UNUSED;
3042 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3045 /* Test for a valid operand for a call instruction. Don't allow the
3046 arg pointer register or virtual regs since they may decay into
3047 reg + const, which the patterns can't handle. */
3050 call_insn_operand (op, mode)
3052 enum machine_mode mode ATTRIBUTE_UNUSED;
3054 /* Disallow indirect through a virtual register. This leads to
3055 compiler aborts when trying to eliminate them. */
3056 if (GET_CODE (op) == REG
3057 && (op == arg_pointer_rtx
3058 || op == frame_pointer_rtx
3059 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3060 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3063 /* Disallow `call 1234'. Due to varying assembler lameness this
3064 gets either rejected or translated to `call .+1234'. */
3065 if (GET_CODE (op) == CONST_INT)
3068 /* Explicitly allow SYMBOL_REF even if pic. */
3069 if (GET_CODE (op) == SYMBOL_REF)
3072 /* Otherwise we can allow any general_operand in the address. */
3073 return general_operand (op, Pmode);
3077 constant_call_address_operand (op, mode)
3079 enum machine_mode mode ATTRIBUTE_UNUSED;
3081 if (GET_CODE (op) == CONST
3082 && GET_CODE (XEXP (op, 0)) == PLUS
3083 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3084 op = XEXP (XEXP (op, 0), 0);
3085 return GET_CODE (op) == SYMBOL_REF;
3088 /* Match exactly zero and one. */
3091 const0_operand (op, mode)
3093 enum machine_mode mode;
3095 return op == CONST0_RTX (mode);
3099 const1_operand (op, mode)
3101 enum machine_mode mode ATTRIBUTE_UNUSED;
3103 return op == const1_rtx;
3106 /* Match 2, 4, or 8. Used for leal multiplicands. */
3109 const248_operand (op, mode)
3111 enum machine_mode mode ATTRIBUTE_UNUSED;
3113 return (GET_CODE (op) == CONST_INT
3114 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3117 /* True if this is a constant appropriate for an increment or decremenmt. */
3120 incdec_operand (op, mode)
3122 enum machine_mode mode ATTRIBUTE_UNUSED;
3124 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3125 registers, since carry flag is not set. */
3126 if (TARGET_PENTIUM4 && !optimize_size)
3128 return op == const1_rtx || op == constm1_rtx;
3131 /* Return nonzero if OP is acceptable as operand of DImode shift
3135 shiftdi_operand (op, mode)
3137 enum machine_mode mode ATTRIBUTE_UNUSED;
3140 return nonimmediate_operand (op, mode);
3142 return register_operand (op, mode);
3145 /* Return false if this is the stack pointer, or any other fake
3146 register eliminable to the stack pointer. Otherwise, this is
3149 This is used to prevent esp from being used as an index reg.
3150 Which would only happen in pathological cases. */
3153 reg_no_sp_operand (op, mode)
3155 enum machine_mode mode;
3158 if (GET_CODE (t) == SUBREG)
3160 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3163 return register_operand (op, mode);
3167 mmx_reg_operand (op, mode)
3169 enum machine_mode mode ATTRIBUTE_UNUSED;
3171 return MMX_REG_P (op);
3174 /* Return false if this is any eliminable register. Otherwise
3178 general_no_elim_operand (op, mode)
3180 enum machine_mode mode;
3183 if (GET_CODE (t) == SUBREG)
3185 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3186 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3187 || t == virtual_stack_dynamic_rtx)
3190 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3191 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3194 return general_operand (op, mode);
3197 /* Return false if this is any eliminable register. Otherwise
3198 register_operand or const_int. */
3201 nonmemory_no_elim_operand (op, mode)
3203 enum machine_mode mode;
3206 if (GET_CODE (t) == SUBREG)
3208 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3209 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3210 || t == virtual_stack_dynamic_rtx)
3213 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3216 /* Return true if op is a Q_REGS class register. */
3219 q_regs_operand (op, mode)
3221 enum machine_mode mode;
3223 if (mode != VOIDmode && GET_MODE (op) != mode)
3225 if (GET_CODE (op) == SUBREG)
3226 op = SUBREG_REG (op);
3227 return ANY_QI_REG_P (op);
3230 /* Return true if op is a NON_Q_REGS class register. */
3233 non_q_regs_operand (op, mode)
3235 enum machine_mode mode;
3237 if (mode != VOIDmode && GET_MODE (op) != mode)
3239 if (GET_CODE (op) == SUBREG)
3240 op = SUBREG_REG (op);
3241 return NON_QI_REG_P (op);
3244 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3247 sse_comparison_operator (op, mode)
3249 enum machine_mode mode ATTRIBUTE_UNUSED;
3251 enum rtx_code code = GET_CODE (op);
3254 /* Operations supported directly. */
3264 /* These are equivalent to ones above in non-IEEE comparisons. */
3271 return !TARGET_IEEE_FP;
3276 /* Return 1 if OP is a valid comparison operator in valid mode. */
3278 ix86_comparison_operator (op, mode)
3280 enum machine_mode mode;
3282 enum machine_mode inmode;
3283 enum rtx_code code = GET_CODE (op);
3284 if (mode != VOIDmode && GET_MODE (op) != mode)
3286 if (GET_RTX_CLASS (code) != '<')
3288 inmode = GET_MODE (XEXP (op, 0));
3290 if (inmode == CCFPmode || inmode == CCFPUmode)
3292 enum rtx_code second_code, bypass_code;
3293 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3294 return (bypass_code == NIL && second_code == NIL);
3301 if (inmode == CCmode || inmode == CCGCmode
3302 || inmode == CCGOCmode || inmode == CCNOmode)
3305 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3306 if (inmode == CCmode)
3310 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3318 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3321 fcmov_comparison_operator (op, mode)
3323 enum machine_mode mode;
3325 enum machine_mode inmode;
3326 enum rtx_code code = GET_CODE (op);
3327 if (mode != VOIDmode && GET_MODE (op) != mode)
3329 if (GET_RTX_CLASS (code) != '<')
3331 inmode = GET_MODE (XEXP (op, 0));
3332 if (inmode == CCFPmode || inmode == CCFPUmode)
3334 enum rtx_code second_code, bypass_code;
3335 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3336 if (bypass_code != NIL || second_code != NIL)
3338 code = ix86_fp_compare_code_to_integer (code);
3340 /* i387 supports just limited amount of conditional codes. */
3343 case LTU: case GTU: case LEU: case GEU:
3344 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3347 case ORDERED: case UNORDERED:
3355 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3358 promotable_binary_operator (op, mode)
3360 enum machine_mode mode ATTRIBUTE_UNUSED;
3362 switch (GET_CODE (op))
3365 /* Modern CPUs have same latency for HImode and SImode multiply,
3366 but 386 and 486 do HImode multiply faster. */
3367 return ix86_cpu > PROCESSOR_I486;
3379 /* Nearly general operand, but accept any const_double, since we wish
3380 to be able to drop them into memory rather than have them get pulled
3384 cmp_fp_expander_operand (op, mode)
3386 enum machine_mode mode;
3388 if (mode != VOIDmode && mode != GET_MODE (op))
3390 if (GET_CODE (op) == CONST_DOUBLE)
3392 return general_operand (op, mode);
3395 /* Match an SI or HImode register for a zero_extract. */
3398 ext_register_operand (op, mode)
3400 enum machine_mode mode ATTRIBUTE_UNUSED;
3403 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3404 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3407 if (!register_operand (op, VOIDmode))
3410 /* Be curefull to accept only registers having upper parts. */
3411 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3412 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3415 /* Return 1 if this is a valid binary floating-point operation.
3416 OP is the expression matched, and MODE is its mode. */
3419 binary_fp_operator (op, mode)
3421 enum machine_mode mode;
3423 if (mode != VOIDmode && mode != GET_MODE (op))
3426 switch (GET_CODE (op))
3432 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3440 mult_operator (op, mode)
3442 enum machine_mode mode ATTRIBUTE_UNUSED;
3444 return GET_CODE (op) == MULT;
3448 div_operator (op, mode)
3450 enum machine_mode mode ATTRIBUTE_UNUSED;
3452 return GET_CODE (op) == DIV;
3456 arith_or_logical_operator (op, mode)
3458 enum machine_mode mode;
3460 return ((mode == VOIDmode || GET_MODE (op) == mode)
3461 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3462 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3465 /* Returns 1 if OP is memory operand with a displacement. */
3468 memory_displacement_operand (op, mode)
3470 enum machine_mode mode;
3472 struct ix86_address parts;
3474 if (! memory_operand (op, mode))
3477 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3480 return parts.disp != NULL_RTX;
3483 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3484 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3486 ??? It seems likely that this will only work because cmpsi is an
3487 expander, and no actual insns use this. */
3490 cmpsi_operand (op, mode)
3492 enum machine_mode mode;
3494 if (nonimmediate_operand (op, mode))
3497 if (GET_CODE (op) == AND
3498 && GET_MODE (op) == SImode
3499 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3500 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3501 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3502 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3503 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3504 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3510 /* Returns 1 if OP is memory operand that can not be represented by the
3514 long_memory_operand (op, mode)
3516 enum machine_mode mode;
3518 if (! memory_operand (op, mode))
3521 return memory_address_length (op) != 0;
3524 /* Return nonzero if the rtx is known aligned. */
3527 aligned_operand (op, mode)
3529 enum machine_mode mode;
3531 struct ix86_address parts;
3533 if (!general_operand (op, mode))
3536 /* Registers and immediate operands are always "aligned". */
3537 if (GET_CODE (op) != MEM)
3540 /* Don't even try to do any aligned optimizations with volatiles. */
3541 if (MEM_VOLATILE_P (op))
3546 /* Pushes and pops are only valid on the stack pointer. */
3547 if (GET_CODE (op) == PRE_DEC
3548 || GET_CODE (op) == POST_INC)
3551 /* Decode the address. */
3552 if (! ix86_decompose_address (op, &parts))
3555 if (parts.base && GET_CODE (parts.base) == SUBREG)
3556 parts.base = SUBREG_REG (parts.base);
3557 if (parts.index && GET_CODE (parts.index) == SUBREG)
3558 parts.index = SUBREG_REG (parts.index);
3560 /* Look for some component that isn't known to be aligned. */
3564 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3569 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3574 if (GET_CODE (parts.disp) != CONST_INT
3575 || (INTVAL (parts.disp) & 3) != 0)
3579 /* Didn't find one -- this must be an aligned address. */
3583 /* Return true if the constant is something that can be loaded with
3584 a special instruction. Only handle 0.0 and 1.0; others are less
3588 standard_80387_constant_p (x)
3591 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3593 /* Note that on the 80387, other constants, such as pi, that we should support
3594 too. On some machines, these are much slower to load as standard constant,
3595 than to load from doubles in memory. */
3596 if (x == CONST0_RTX (GET_MODE (x)))
3598 if (x == CONST1_RTX (GET_MODE (x)))
3603 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3606 standard_sse_constant_p (x)
3609 if (GET_CODE (x) != CONST_DOUBLE)
3611 return (x == CONST0_RTX (GET_MODE (x)));
3614 /* Returns 1 if OP contains a symbol reference */
3617 symbolic_reference_mentioned_p (op)
3620 register const char *fmt;
3623 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3626 fmt = GET_RTX_FORMAT (GET_CODE (op));
3627 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3633 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3634 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3638 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3645 /* Return 1 if it is appropriate to emit `ret' instructions in the
3646 body of a function. Do this only if the epilogue is simple, needing a
3647 couple of insns. Prior to reloading, we can't tell how many registers
3648 must be saved, so return 0 then. Return 0 if there is no frame
3649 marker to de-allocate.
3651 If NON_SAVING_SETJMP is defined and true, then it is not possible
3652 for the epilogue to be simple, so return 0. This is a special case
3653 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3654 until final, but jump_optimize may need to know sooner if a
3658 ix86_can_use_return_insn_p ()
3660 struct ix86_frame frame;
3662 #ifdef NON_SAVING_SETJMP
3663 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3667 if (! reload_completed || frame_pointer_needed)
3670 /* Don't allow more than 32 pop, since that's all we can do
3671 with one instruction. */
3672 if (current_function_pops_args
3673 && current_function_args_size >= 32768)
3676 ix86_compute_frame_layout (&frame);
3677 return frame.to_allocate == 0 && frame.nregs == 0;
3680 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3682 x86_64_sign_extended_value (value)
3685 switch (GET_CODE (value))
3687 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3688 to be at least 32 and this all acceptable constants are
3689 represented as CONST_INT. */
3691 if (HOST_BITS_PER_WIDE_INT == 32)
3695 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3696 return trunc_int_for_mode (val, SImode) == val;
3700 /* For certain code models, the symbolic references are known to fit. */
3702 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3704 /* For certain code models, the code is near as well. */
3706 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3708 /* We also may accept the offsetted memory references in certain special
3711 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3712 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3714 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3716 rtx op1 = XEXP (XEXP (value, 0), 0);
3717 rtx op2 = XEXP (XEXP (value, 0), 1);
3718 HOST_WIDE_INT offset;
3720 if (ix86_cmodel == CM_LARGE)
3722 if (GET_CODE (op2) != CONST_INT)
3724 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3725 switch (GET_CODE (op1))
3728 /* For CM_SMALL assume that latest object is 1MB before
3729 end of 31bits boundary. We may also accept pretty
3730 large negative constants knowing that all objects are
3731 in the positive half of address space. */
3732 if (ix86_cmodel == CM_SMALL
3733 && offset < 1024*1024*1024
3734 && trunc_int_for_mode (offset, SImode) == offset)
3736 /* For CM_KERNEL we know that all object resist in the
3737 negative half of 32bits address space. We may not
3738 accept negative offsets, since they may be just off
3739 and we may accept pretty large positive ones. */
3740 if (ix86_cmodel == CM_KERNEL
3742 && trunc_int_for_mode (offset, SImode) == offset)
3746 /* These conditions are similar to SYMBOL_REF ones, just the
3747 constraints for code models differ. */
3748 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3749 && offset < 1024*1024*1024
3750 && trunc_int_for_mode (offset, SImode) == offset)
3752 if (ix86_cmodel == CM_KERNEL
3754 && trunc_int_for_mode (offset, SImode) == offset)
3767 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3769 x86_64_zero_extended_value (value)
3772 switch (GET_CODE (value))
3775 if (HOST_BITS_PER_WIDE_INT == 32)
3776 return (GET_MODE (value) == VOIDmode
3777 && !CONST_DOUBLE_HIGH (value));
3781 if (HOST_BITS_PER_WIDE_INT == 32)
3782 return INTVAL (value) >= 0;
3784 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3787 /* For certain code models, the symbolic references are known to fit. */
3789 return ix86_cmodel == CM_SMALL;
3791 /* For certain code models, the code is near as well. */
3793 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3795 /* We also may accept the offsetted memory references in certain special
3798 if (GET_CODE (XEXP (value, 0)) == PLUS)
3800 rtx op1 = XEXP (XEXP (value, 0), 0);
3801 rtx op2 = XEXP (XEXP (value, 0), 1);
3803 if (ix86_cmodel == CM_LARGE)
3805 switch (GET_CODE (op1))
3809 /* For small code model we may accept pretty large positive
3810 offsets, since one bit is available for free. Negative
3811 offsets are limited by the size of NULL pointer area
3812 specified by the ABI. */
3813 if (ix86_cmodel == CM_SMALL
3814 && GET_CODE (op2) == CONST_INT
3815 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3816 && (trunc_int_for_mode (INTVAL (op2), SImode)
3819 /* ??? For the kernel, we may accept adjustment of
3820 -0x10000000, since we know that it will just convert
3821 negative address space to positive, but perhaps this
3822 is not worthwhile. */
3825 /* These conditions are similar to SYMBOL_REF ones, just the
3826 constraints for code models differ. */
3827 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3828 && GET_CODE (op2) == CONST_INT
3829 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3830 && (trunc_int_for_mode (INTVAL (op2), SImode)
3844 /* Value should be nonzero if functions must have frame pointers.
3845 Zero means the frame pointer need not be set up (and parms may
3846 be accessed via the stack pointer) in functions that seem suitable. */
3849 ix86_frame_pointer_required ()
3851 /* If we accessed previous frames, then the generated code expects
3852 to be able to access the saved ebp value in our frame. */
3853 if (cfun->machine->accesses_prev_frame)
3856 /* Several x86 os'es need a frame pointer for other reasons,
3857 usually pertaining to setjmp. */
3858 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3861 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3862 the frame pointer by default. Turn it back on now if we've not
3863 got a leaf function. */
3864 if (TARGET_OMIT_LEAF_FRAME_POINTER
3865 && (!current_function_is_leaf || current_function_profile))
3871 /* Record that the current function accesses previous call frames. */
3874 ix86_setup_frame_addresses ()
3876 cfun->machine->accesses_prev_frame = 1;
3879 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3880 # define USE_HIDDEN_LINKONCE 1
3882 # define USE_HIDDEN_LINKONCE 0
3885 static int pic_labels_used;
3887 /* Fills in the label name that should be used for a pc thunk for
3888 the given register. */
3891 get_pc_thunk_name (name, regno)
3895 if (USE_HIDDEN_LINKONCE)
3896 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3898 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3902 /* This function generates code for -fpic that loads %ebx with
3903 the return address of the caller and then returns. */
3906 ix86_asm_file_end (file)
3912 for (regno = 0; regno < 8; ++regno)
3916 if (! ((pic_labels_used >> regno) & 1))
3919 get_pc_thunk_name (name, regno);
3921 if (USE_HIDDEN_LINKONCE)
3925 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3927 TREE_PUBLIC (decl) = 1;
3928 TREE_STATIC (decl) = 1;
3929 DECL_ONE_ONLY (decl) = 1;
3931 (*targetm.asm_out.unique_section) (decl, 0);
3932 named_section (decl, NULL, 0);
3934 (*targetm.asm_out.globalize_label) (file, name);
3935 fputs ("\t.hidden\t", file);
3936 assemble_name (file, name);
3938 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3943 ASM_OUTPUT_LABEL (file, name);
3946 xops[0] = gen_rtx_REG (SImode, regno);
3947 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3948 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3949 output_asm_insn ("ret", xops);
3953 /* Emit code for the SET_GOT patterns. */
3956 output_set_got (dest)
3962 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3964 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3966 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3969 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3971 output_asm_insn ("call\t%a2", xops);
3974 /* Output the "canonical" label name ("Lxx$pb") here too. This
3975 is what will be referred to by the Mach-O PIC subsystem. */
3976 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3978 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3979 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3982 output_asm_insn ("pop{l}\t%0", xops);
3987 get_pc_thunk_name (name, REGNO (dest));
3988 pic_labels_used |= 1 << REGNO (dest);
3990 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3991 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3992 output_asm_insn ("call\t%X2", xops);
3995 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3996 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3997 else if (!TARGET_MACHO)
3998 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4003 /* Generate an "push" pattern for input ARG. */
4009 return gen_rtx_SET (VOIDmode,
4011 gen_rtx_PRE_DEC (Pmode,
4012 stack_pointer_rtx)),
4016 /* Return >= 0 if there is an unused call-clobbered register available
4017 for the entire function. */
4020 ix86_select_alt_pic_regnum ()
4022 if (current_function_is_leaf && !current_function_profile)
4025 for (i = 2; i >= 0; --i)
4026 if (!regs_ever_live[i])
4030 return INVALID_REGNUM;
4033 /* Return 1 if we need to save REGNO. */
4035 ix86_save_reg (regno, maybe_eh_return)
4037 int maybe_eh_return;
4039 if (pic_offset_table_rtx
4040 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4041 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4042 || current_function_profile
4043 || current_function_calls_eh_return))
4045 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4050 if (current_function_calls_eh_return && maybe_eh_return)
4055 unsigned test = EH_RETURN_DATA_REGNO (i);
4056 if (test == INVALID_REGNUM)
4063 return (regs_ever_live[regno]
4064 && !call_used_regs[regno]
4065 && !fixed_regs[regno]
4066 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4069 /* Return number of registers to be saved on the stack. */
4077 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4078 if (ix86_save_reg (regno, true))
4083 /* Return the offset between two registers, one to be eliminated, and the other
4084 its replacement, at the start of a routine. */
4087 ix86_initial_elimination_offset (from, to)
4091 struct ix86_frame frame;
4092 ix86_compute_frame_layout (&frame);
4094 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4095 return frame.hard_frame_pointer_offset;
4096 else if (from == FRAME_POINTER_REGNUM
4097 && to == HARD_FRAME_POINTER_REGNUM)
4098 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4101 if (to != STACK_POINTER_REGNUM)
4103 else if (from == ARG_POINTER_REGNUM)
4104 return frame.stack_pointer_offset;
4105 else if (from != FRAME_POINTER_REGNUM)
4108 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4112 /* Fill structure ix86_frame about frame of currently computed function. */
4115 ix86_compute_frame_layout (frame)
4116 struct ix86_frame *frame;
4118 HOST_WIDE_INT total_size;
4119 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4121 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4122 HOST_WIDE_INT size = get_frame_size ();
4124 frame->nregs = ix86_nsaved_regs ();
4127 /* Skip return address and saved base pointer. */
4128 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4130 frame->hard_frame_pointer_offset = offset;
4132 /* Do some sanity checking of stack_alignment_needed and
4133 preferred_alignment, since i386 port is the only using those features
4134 that may break easily. */
4136 if (size && !stack_alignment_needed)
4138 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4140 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4142 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4145 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4146 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4148 /* Register save area */
4149 offset += frame->nregs * UNITS_PER_WORD;
4152 if (ix86_save_varrargs_registers)
4154 offset += X86_64_VARARGS_SIZE;
4155 frame->va_arg_size = X86_64_VARARGS_SIZE;
4158 frame->va_arg_size = 0;
4160 /* Align start of frame for local function. */
4161 frame->padding1 = ((offset + stack_alignment_needed - 1)
4162 & -stack_alignment_needed) - offset;
4164 offset += frame->padding1;
4166 /* Frame pointer points here. */
4167 frame->frame_pointer_offset = offset;
4171 /* Add outgoing arguments area. Can be skipped if we eliminated
4172 all the function calls as dead code. */
4173 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4175 offset += current_function_outgoing_args_size;
4176 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4179 frame->outgoing_arguments_size = 0;
4181 /* Align stack boundary. Only needed if we're calling another function
4183 if (!current_function_is_leaf || current_function_calls_alloca)
4184 frame->padding2 = ((offset + preferred_alignment - 1)
4185 & -preferred_alignment) - offset;
4187 frame->padding2 = 0;
4189 offset += frame->padding2;
4191 /* We've reached end of stack frame. */
4192 frame->stack_pointer_offset = offset;
4194 /* Size prologue needs to allocate. */
4195 frame->to_allocate =
4196 (size + frame->padding1 + frame->padding2
4197 + frame->outgoing_arguments_size + frame->va_arg_size);
4199 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4200 && current_function_is_leaf)
4202 frame->red_zone_size = frame->to_allocate;
4203 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4204 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4207 frame->red_zone_size = 0;
4208 frame->to_allocate -= frame->red_zone_size;
4209 frame->stack_pointer_offset -= frame->red_zone_size;
4211 fprintf (stderr, "nregs: %i\n", frame->nregs);
4212 fprintf (stderr, "size: %i\n", size);
4213 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4214 fprintf (stderr, "padding1: %i\n", frame->padding1);
4215 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4216 fprintf (stderr, "padding2: %i\n", frame->padding2);
4217 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4218 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4219 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4220 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4221 frame->hard_frame_pointer_offset);
4222 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4226 /* Emit code to save registers in the prologue. */
4229 ix86_emit_save_regs ()
4234 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4235 if (ix86_save_reg (regno, true))
4237 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4238 RTX_FRAME_RELATED_P (insn) = 1;
4242 /* Emit code to save registers using MOV insns. First register
4243 is restored from POINTER + OFFSET. */
4245 ix86_emit_save_regs_using_mov (pointer, offset)
4247 HOST_WIDE_INT offset;
4252 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4253 if (ix86_save_reg (regno, true))
4255 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4257 gen_rtx_REG (Pmode, regno));
4258 RTX_FRAME_RELATED_P (insn) = 1;
4259 offset += UNITS_PER_WORD;
4263 /* Expand the prologue into a bunch of separate insns. */
4266 ix86_expand_prologue ()
4270 struct ix86_frame frame;
4272 HOST_WIDE_INT allocate;
4276 use_fast_prologue_epilogue
4277 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4278 if (TARGET_PROLOGUE_USING_MOVE)
4279 use_mov = use_fast_prologue_epilogue;
4281 ix86_compute_frame_layout (&frame);
4283 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4284 slower on all targets. Also sdb doesn't like it. */
4286 if (frame_pointer_needed)
4288 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4289 RTX_FRAME_RELATED_P (insn) = 1;
4291 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4292 RTX_FRAME_RELATED_P (insn) = 1;
4295 allocate = frame.to_allocate;
4296 /* In case we are dealing only with single register and empty frame,
4297 push is equivalent of the mov+add sequence. */
4298 if (allocate == 0 && frame.nregs <= 1)
4302 ix86_emit_save_regs ();
4304 allocate += frame.nregs * UNITS_PER_WORD;
4308 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4310 insn = emit_insn (gen_pro_epilogue_adjust_stack
4311 (stack_pointer_rtx, stack_pointer_rtx,
4312 GEN_INT (-allocate)));
4313 RTX_FRAME_RELATED_P (insn) = 1;
4317 /* ??? Is this only valid for Win32? */
4324 arg0 = gen_rtx_REG (SImode, 0);
4325 emit_move_insn (arg0, GEN_INT (allocate));
4327 sym = gen_rtx_MEM (FUNCTION_MODE,
4328 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4329 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4331 CALL_INSN_FUNCTION_USAGE (insn)
4332 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4333 CALL_INSN_FUNCTION_USAGE (insn));
4337 if (!frame_pointer_needed || !frame.to_allocate)
4338 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4340 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4341 -frame.nregs * UNITS_PER_WORD);
4344 #ifdef SUBTARGET_PROLOGUE
4348 pic_reg_used = false;
4349 if (pic_offset_table_rtx
4350 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4351 || current_function_profile))
4353 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4355 if (alt_pic_reg_used != INVALID_REGNUM)
4356 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4358 pic_reg_used = true;
4363 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4365 /* Even with accurate pre-reload life analysis, we can wind up
4366 deleting all references to the pic register after reload.
4367 Consider if cross-jumping unifies two sides of a branch
4368 controled by a comparison vs the only read from a global.
4369 In which case, allow the set_got to be deleted, though we're
4370 too late to do anything about the ebx save in the prologue. */
4371 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4374 /* Prevent function calls from be scheduled before the call to mcount.
4375 In the pic_reg_used case, make sure that the got load isn't deleted. */
4376 if (current_function_profile)
4377 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4380 /* Emit code to restore saved registers using MOV insns. First register
4381 is restored from POINTER + OFFSET. */
4383 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4386 int maybe_eh_return;
4390 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4391 if (ix86_save_reg (regno, maybe_eh_return))
4393 emit_move_insn (gen_rtx_REG (Pmode, regno),
4394 adjust_address (gen_rtx_MEM (Pmode, pointer),
4396 offset += UNITS_PER_WORD;
4400 /* Restore function stack, frame, and registers. */
4403 ix86_expand_epilogue (style)
4407 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4408 struct ix86_frame frame;
4409 HOST_WIDE_INT offset;
4411 ix86_compute_frame_layout (&frame);
4413 /* Calculate start of saved registers relative to ebp. Special care
4414 must be taken for the normal return case of a function using
4415 eh_return: the eax and edx registers are marked as saved, but not
4416 restored along this path. */
4417 offset = frame.nregs;
4418 if (current_function_calls_eh_return && style != 2)
4420 offset *= -UNITS_PER_WORD;
4422 /* If we're only restoring one register and sp is not valid then
4423 using a move instruction to restore the register since it's
4424 less work than reloading sp and popping the register.
4426 The default code result in stack adjustment using add/lea instruction,
4427 while this code results in LEAVE instruction (or discrete equivalent),
4428 so it is profitable in some other cases as well. Especially when there
4429 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4430 and there is exactly one register to pop. This heruistic may need some
4431 tuning in future. */
4432 if ((!sp_valid && frame.nregs <= 1)
4433 || (TARGET_EPILOGUE_USING_MOVE
4434 && use_fast_prologue_epilogue
4435 && (frame.nregs > 1 || frame.to_allocate))
4436 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4437 || (frame_pointer_needed && TARGET_USE_LEAVE
4438 && use_fast_prologue_epilogue && frame.nregs == 1)
4439 || current_function_calls_eh_return)
4441 /* Restore registers. We can use ebp or esp to address the memory
4442 locations. If both are available, default to ebp, since offsets
4443 are known to be small. Only exception is esp pointing directly to the
4444 end of block of saved registers, where we may simplify addressing
4447 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4448 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4449 frame.to_allocate, style == 2);
4451 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4452 offset, style == 2);
4454 /* eh_return epilogues need %ecx added to the stack pointer. */
4457 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4459 if (frame_pointer_needed)
4461 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4462 tmp = plus_constant (tmp, UNITS_PER_WORD);
4463 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4465 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4466 emit_move_insn (hard_frame_pointer_rtx, tmp);
4468 emit_insn (gen_pro_epilogue_adjust_stack
4469 (stack_pointer_rtx, sa, const0_rtx));
4473 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4474 tmp = plus_constant (tmp, (frame.to_allocate
4475 + frame.nregs * UNITS_PER_WORD));
4476 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4479 else if (!frame_pointer_needed)
4480 emit_insn (gen_pro_epilogue_adjust_stack
4481 (stack_pointer_rtx, stack_pointer_rtx,
4482 GEN_INT (frame.to_allocate
4483 + frame.nregs * UNITS_PER_WORD)));
4484 /* If not an i386, mov & pop is faster than "leave". */
4485 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4486 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4489 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4490 hard_frame_pointer_rtx,
4493 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4495 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4500 /* First step is to deallocate the stack frame so that we can
4501 pop the registers. */
4504 if (!frame_pointer_needed)
4506 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4507 hard_frame_pointer_rtx,
4510 else if (frame.to_allocate)
4511 emit_insn (gen_pro_epilogue_adjust_stack
4512 (stack_pointer_rtx, stack_pointer_rtx,
4513 GEN_INT (frame.to_allocate)));
4515 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4516 if (ix86_save_reg (regno, false))
4519 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4521 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4523 if (frame_pointer_needed)
4525 /* Leave results in shorter dependency chains on CPUs that are
4526 able to grok it fast. */
4527 if (TARGET_USE_LEAVE)
4528 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4529 else if (TARGET_64BIT)
4530 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4532 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4536 /* Sibcall epilogues don't want a return instruction. */
4540 if (current_function_pops_args && current_function_args_size)
4542 rtx popc = GEN_INT (current_function_pops_args);
4544 /* i386 can only pop 64K bytes. If asked to pop more, pop
4545 return address, do explicit add, and jump indirectly to the
4548 if (current_function_pops_args >= 65536)
4550 rtx ecx = gen_rtx_REG (SImode, 2);
4552 /* There are is no "pascal" calling convention in 64bit ABI. */
4556 emit_insn (gen_popsi1 (ecx));
4557 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4558 emit_jump_insn (gen_return_indirect_internal (ecx));
4561 emit_jump_insn (gen_return_pop_internal (popc));
4564 emit_jump_insn (gen_return_internal ());
4567 /* Reset from the function's potential modifications. */
4570 ix86_output_function_epilogue (file, size)
4571 FILE *file ATTRIBUTE_UNUSED;
4572 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4574 if (pic_offset_table_rtx)
4575 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4578 /* Extract the parts of an RTL expression that is a valid memory address
4579 for an instruction. Return 0 if the structure of the address is
4580 grossly off. Return -1 if the address contains ASHIFT, so it is not
4581 strictly valid, but still used for computing length of lea instruction.
4585 ix86_decompose_address (addr, out)
4587 struct ix86_address *out;
4589 rtx base = NULL_RTX;
4590 rtx index = NULL_RTX;
4591 rtx disp = NULL_RTX;
4592 HOST_WIDE_INT scale = 1;
4593 rtx scale_rtx = NULL_RTX;
4596 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4598 else if (GET_CODE (addr) == PLUS)
4600 rtx op0 = XEXP (addr, 0);
4601 rtx op1 = XEXP (addr, 1);
4602 enum rtx_code code0 = GET_CODE (op0);
4603 enum rtx_code code1 = GET_CODE (op1);
4605 if (code0 == REG || code0 == SUBREG)
4607 if (code1 == REG || code1 == SUBREG)
4608 index = op0, base = op1; /* index + base */
4610 base = op0, disp = op1; /* base + displacement */
4612 else if (code0 == MULT)
4614 index = XEXP (op0, 0);
4615 scale_rtx = XEXP (op0, 1);
4616 if (code1 == REG || code1 == SUBREG)
4617 base = op1; /* index*scale + base */
4619 disp = op1; /* index*scale + disp */
4621 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4623 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4624 scale_rtx = XEXP (XEXP (op0, 0), 1);
4625 base = XEXP (op0, 1);
4628 else if (code0 == PLUS)
4630 index = XEXP (op0, 0); /* index + base + disp */
4631 base = XEXP (op0, 1);
4637 else if (GET_CODE (addr) == MULT)
4639 index = XEXP (addr, 0); /* index*scale */
4640 scale_rtx = XEXP (addr, 1);
4642 else if (GET_CODE (addr) == ASHIFT)
4646 /* We're called for lea too, which implements ashift on occasion. */
4647 index = XEXP (addr, 0);
4648 tmp = XEXP (addr, 1);
4649 if (GET_CODE (tmp) != CONST_INT)
4651 scale = INTVAL (tmp);
4652 if ((unsigned HOST_WIDE_INT) scale > 3)
4658 disp = addr; /* displacement */
4660 /* Extract the integral value of scale. */
4663 if (GET_CODE (scale_rtx) != CONST_INT)
4665 scale = INTVAL (scale_rtx);
4668 /* Allow arg pointer and stack pointer as index if there is not scaling */
4669 if (base && index && scale == 1
4670 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4671 || index == stack_pointer_rtx))
4678 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4679 if ((base == hard_frame_pointer_rtx
4680 || base == frame_pointer_rtx
4681 || base == arg_pointer_rtx) && !disp)
4684 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4685 Avoid this by transforming to [%esi+0]. */
4686 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4687 && base && !index && !disp
4689 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4692 /* Special case: encode reg+reg instead of reg*2. */
4693 if (!base && index && scale && scale == 2)
4694 base = index, scale = 1;
4696 /* Special case: scaling cannot be encoded without base or displacement. */
4697 if (!base && !disp && index && scale != 1)
4708 /* Return cost of the memory address x.
4709 For i386, it is better to use a complex address than let gcc copy
4710 the address into a reg and make a new pseudo. But not if the address
4711 requires to two regs - that would mean more pseudos with longer
4714 ix86_address_cost (x)
4717 struct ix86_address parts;
4720 if (!ix86_decompose_address (x, &parts))
4723 if (parts.base && GET_CODE (parts.base) == SUBREG)
4724 parts.base = SUBREG_REG (parts.base);
4725 if (parts.index && GET_CODE (parts.index) == SUBREG)
4726 parts.index = SUBREG_REG (parts.index);
4728 /* More complex memory references are better. */
4729 if (parts.disp && parts.disp != const0_rtx)
4732 /* Attempt to minimize number of registers in the address. */
4734 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4736 && (!REG_P (parts.index)
4737 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4741 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4743 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4744 && parts.base != parts.index)
4747 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4748 since it's predecode logic can't detect the length of instructions
4749 and it degenerates to vector decoded. Increase cost of such
4750 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4751 to split such addresses or even refuse such addresses at all.
4753 Following addressing modes are affected:
4758 The first and last case may be avoidable by explicitly coding the zero in
4759 memory address, but I don't have AMD-K6 machine handy to check this
4763 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4764 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4765 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4771 /* If X is a machine specific address (i.e. a symbol or label being
4772 referenced as a displacement from the GOT implemented using an
4773 UNSPEC), then return the base term. Otherwise return X. */
4776 ix86_find_base_term (x)
4783 if (GET_CODE (x) != CONST)
4786 if (GET_CODE (term) == PLUS
4787 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4788 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4789 term = XEXP (term, 0);
4790 if (GET_CODE (term) != UNSPEC
4791 || XINT (term, 1) != UNSPEC_GOTPCREL)
4794 term = XVECEXP (term, 0, 0);
4796 if (GET_CODE (term) != SYMBOL_REF
4797 && GET_CODE (term) != LABEL_REF)
4803 if (GET_CODE (x) != PLUS
4804 || XEXP (x, 0) != pic_offset_table_rtx
4805 || GET_CODE (XEXP (x, 1)) != CONST)
4808 term = XEXP (XEXP (x, 1), 0);
4810 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4811 term = XEXP (term, 0);
4813 if (GET_CODE (term) != UNSPEC
4814 || XINT (term, 1) != UNSPEC_GOTOFF)
4817 term = XVECEXP (term, 0, 0);
4819 if (GET_CODE (term) != SYMBOL_REF
4820 && GET_CODE (term) != LABEL_REF)
4826 /* Determine if a given RTX is a valid constant. We already know this
4827 satisfies CONSTANT_P. */
4830 legitimate_constant_p (x)
4835 switch (GET_CODE (x))
4838 /* TLS symbols are not constant. */
4839 if (tls_symbolic_operand (x, Pmode))
4844 inner = XEXP (x, 0);
4846 /* Offsets of TLS symbols are never valid.
4847 Discourage CSE from creating them. */
4848 if (GET_CODE (inner) == PLUS
4849 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4852 /* Only some unspecs are valid as "constants". */
4853 if (GET_CODE (inner) == UNSPEC)
4854 switch (XINT (inner, 1))
4857 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4867 /* Otherwise we handle everything else in the move patterns. */
4871 /* Determine if a given RTX is a valid constant address. */
4874 constant_address_p (x)
4877 switch (GET_CODE (x))
4884 return TARGET_64BIT;
4887 /* For Mach-O, really believe the CONST. */
4890 /* Otherwise fall through. */
4892 return !flag_pic && legitimate_constant_p (x);
4899 /* Nonzero if the constant value X is a legitimate general operand
4900 when generating PIC code. It is given that flag_pic is on and
4901 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4904 legitimate_pic_operand_p (x)
4909 switch (GET_CODE (x))
4912 inner = XEXP (x, 0);
4914 /* Only some unspecs are valid as "constants". */
4915 if (GET_CODE (inner) == UNSPEC)
4916 switch (XINT (inner, 1))
4919 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4927 return legitimate_pic_address_disp_p (x);
4934 /* Determine if a given CONST RTX is a valid memory displacement
4938 legitimate_pic_address_disp_p (disp)
4943 /* In 64bit mode we can allow direct addresses of symbols and labels
4944 when they are not dynamic symbols. */
4948 if (GET_CODE (disp) == CONST)
4950 /* ??? Handle PIC code models */
4951 if (GET_CODE (x) == PLUS
4952 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4953 && ix86_cmodel == CM_SMALL_PIC
4954 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4955 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4957 if (local_symbolic_operand (x, Pmode))
4960 if (GET_CODE (disp) != CONST)
4962 disp = XEXP (disp, 0);
4966 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4967 of GOT tables. We should not need these anyway. */
4968 if (GET_CODE (disp) != UNSPEC
4969 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4972 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4973 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4979 if (GET_CODE (disp) == PLUS)
4981 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4983 disp = XEXP (disp, 0);
4987 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
4988 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
4990 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4991 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4992 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4994 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4995 if (strstr (sym_name, "$pb") != 0)
5000 if (GET_CODE (disp) != UNSPEC)
5003 switch (XINT (disp, 1))
5008 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5010 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5011 case UNSPEC_GOTTPOFF:
5014 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5016 /* ??? Could support offset here. */
5019 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5021 /* ??? Could support offset here. */
5024 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5030 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5031 memory address for an instruction. The MODE argument is the machine mode
5032 for the MEM expression that wants to use this address.
5034 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5035 convert common non-canonical forms to canonical form so that they will
5039 legitimate_address_p (mode, addr, strict)
5040 enum machine_mode mode;
5044 struct ix86_address parts;
5045 rtx base, index, disp;
5046 HOST_WIDE_INT scale;
5047 const char *reason = NULL;
5048 rtx reason_rtx = NULL_RTX;
5050 if (TARGET_DEBUG_ADDR)
5053 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5054 GET_MODE_NAME (mode), strict);
5058 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5060 if (TARGET_DEBUG_ADDR)
5061 fprintf (stderr, "Success.\n");
5065 if (ix86_decompose_address (addr, &parts) <= 0)
5067 reason = "decomposition failed";
5072 index = parts.index;
5074 scale = parts.scale;
5076 /* Validate base register.
5078 Don't allow SUBREG's here, it can lead to spill failures when the base
5079 is one word out of a two word structure, which is represented internally
5087 if (GET_CODE (base) == SUBREG)
5088 reg = SUBREG_REG (base);
5092 if (GET_CODE (reg) != REG)
5094 reason = "base is not a register";
5098 if (GET_MODE (base) != Pmode)
5100 reason = "base is not in Pmode";
5104 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5105 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5107 reason = "base is not valid";
5112 /* Validate index register.
5114 Don't allow SUBREG's here, it can lead to spill failures when the index
5115 is one word out of a two word structure, which is represented internally
5123 if (GET_CODE (index) == SUBREG)
5124 reg = SUBREG_REG (index);
5128 if (GET_CODE (reg) != REG)
5130 reason = "index is not a register";
5134 if (GET_MODE (index) != Pmode)
5136 reason = "index is not in Pmode";
5140 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5141 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5143 reason = "index is not valid";
5148 /* Validate scale factor. */
5151 reason_rtx = GEN_INT (scale);
5154 reason = "scale without index";
5158 if (scale != 2 && scale != 4 && scale != 8)
5160 reason = "scale is not a valid multiplier";
5165 /* Validate displacement. */
5172 if (!x86_64_sign_extended_value (disp))
5174 reason = "displacement is out of range";
5180 if (GET_CODE (disp) == CONST_DOUBLE)
5182 reason = "displacement is a const_double";
5187 if (GET_CODE (disp) == CONST
5188 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5189 switch (XINT (XEXP (disp, 0), 1))
5193 case UNSPEC_GOTPCREL:
5196 goto is_legitimate_pic;
5198 case UNSPEC_GOTTPOFF:
5204 reason = "invalid address unspec";
5208 else if (flag_pic && (SYMBOLIC_CONST (disp)
5210 && !machopic_operand_p (disp)
5215 if (TARGET_64BIT && (index || base))
5217 reason = "non-constant pic memory reference";
5220 if (! legitimate_pic_address_disp_p (disp))
5222 reason = "displacement is an invalid pic construct";
5226 /* This code used to verify that a symbolic pic displacement
5227 includes the pic_offset_table_rtx register.
5229 While this is good idea, unfortunately these constructs may
5230 be created by "adds using lea" optimization for incorrect
5239 This code is nonsensical, but results in addressing
5240 GOT table with pic_offset_table_rtx base. We can't
5241 just refuse it easily, since it gets matched by
5242 "addsi3" pattern, that later gets split to lea in the
5243 case output register differs from input. While this
5244 can be handled by separate addsi pattern for this case
5245 that never results in lea, this seems to be easier and
5246 correct fix for crash to disable this test. */
5248 else if (!CONSTANT_ADDRESS_P (disp))
5250 reason = "displacement is not constant";
5255 /* Everything looks valid. */
5256 if (TARGET_DEBUG_ADDR)
5257 fprintf (stderr, "Success.\n");
5261 if (TARGET_DEBUG_ADDR)
5263 fprintf (stderr, "Error: %s\n", reason);
5264 debug_rtx (reason_rtx);
5269 /* Return an unique alias set for the GOT. */
5271 static HOST_WIDE_INT
5272 ix86_GOT_alias_set ()
5274 static HOST_WIDE_INT set = -1;
5276 set = new_alias_set ();
5280 /* Return a legitimate reference for ORIG (an address) using the
5281 register REG. If REG is 0, a new pseudo is generated.
5283 There are two types of references that must be handled:
5285 1. Global data references must load the address from the GOT, via
5286 the PIC reg. An insn is emitted to do this load, and the reg is
5289 2. Static data references, constant pool addresses, and code labels
5290 compute the address as an offset from the GOT, whose base is in
5291 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5292 differentiate them from global data objects. The returned
5293 address is the PIC reg + an unspec constant.
5295 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5296 reg also appears in the address. */
5299 legitimize_pic_address (orig, reg)
5309 reg = gen_reg_rtx (Pmode);
5310 /* Use the generic Mach-O PIC machinery. */
5311 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5314 if (local_symbolic_operand (addr, Pmode))
5316 /* In 64bit mode we can address such objects directly. */
5321 /* This symbol may be referenced via a displacement from the PIC
5322 base address (@GOTOFF). */
5324 if (reload_in_progress)
5325 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5326 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5327 new = gen_rtx_CONST (Pmode, new);
5328 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5332 emit_move_insn (reg, new);
5337 else if (GET_CODE (addr) == SYMBOL_REF)
5341 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5342 new = gen_rtx_CONST (Pmode, new);
5343 new = gen_rtx_MEM (Pmode, new);
5344 RTX_UNCHANGING_P (new) = 1;
5345 set_mem_alias_set (new, ix86_GOT_alias_set ());
5348 reg = gen_reg_rtx (Pmode);
5349 /* Use directly gen_movsi, otherwise the address is loaded
5350 into register for CSE. We don't want to CSE this addresses,
5351 instead we CSE addresses from the GOT table, so skip this. */
5352 emit_insn (gen_movsi (reg, new));
5357 /* This symbol must be referenced via a load from the
5358 Global Offset Table (@GOT). */
5360 if (reload_in_progress)
5361 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5362 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5363 new = gen_rtx_CONST (Pmode, new);
5364 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5365 new = gen_rtx_MEM (Pmode, new);
5366 RTX_UNCHANGING_P (new) = 1;
5367 set_mem_alias_set (new, ix86_GOT_alias_set ());
5370 reg = gen_reg_rtx (Pmode);
5371 emit_move_insn (reg, new);
5377 if (GET_CODE (addr) == CONST)
5379 addr = XEXP (addr, 0);
5381 /* We must match stuff we generate before. Assume the only
5382 unspecs that can get here are ours. Not that we could do
5383 anything with them anyway... */
5384 if (GET_CODE (addr) == UNSPEC
5385 || (GET_CODE (addr) == PLUS
5386 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5388 if (GET_CODE (addr) != PLUS)
5391 if (GET_CODE (addr) == PLUS)
5393 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5395 /* Check first to see if this is a constant offset from a @GOTOFF
5396 symbol reference. */
5397 if (local_symbolic_operand (op0, Pmode)
5398 && GET_CODE (op1) == CONST_INT)
5402 if (reload_in_progress)
5403 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5404 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5406 new = gen_rtx_PLUS (Pmode, new, op1);
5407 new = gen_rtx_CONST (Pmode, new);
5408 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5412 emit_move_insn (reg, new);
5418 /* ??? We need to limit offsets here. */
5423 base = legitimize_pic_address (XEXP (addr, 0), reg);
5424 new = legitimize_pic_address (XEXP (addr, 1),
5425 base == reg ? NULL_RTX : reg);
5427 if (GET_CODE (new) == CONST_INT)
5428 new = plus_constant (base, INTVAL (new));
5431 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5433 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5434 new = XEXP (new, 1);
5436 new = gen_rtx_PLUS (Pmode, base, new);
5445 ix86_encode_section_info (decl, first)
5447 int first ATTRIBUTE_UNUSED;
5449 bool local_p = (*targetm.binds_local_p) (decl);
5452 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5453 if (GET_CODE (rtl) != MEM)
5455 symbol = XEXP (rtl, 0);
5456 if (GET_CODE (symbol) != SYMBOL_REF)
5459 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5460 symbol so that we may access it directly in the GOT. */
5463 SYMBOL_REF_FLAG (symbol) = local_p;
5465 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5466 "local dynamic", "initial exec" or "local exec" TLS models
5469 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5471 const char *symbol_str;
5474 enum tls_model kind;
5479 kind = TLS_MODEL_LOCAL_EXEC;
5481 kind = TLS_MODEL_INITIAL_EXEC;
5483 /* Local dynamic is inefficient when we're not combining the
5484 parts of the address. */
5485 else if (optimize && local_p)
5486 kind = TLS_MODEL_LOCAL_DYNAMIC;
5488 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5489 if (kind < flag_tls_default)
5490 kind = flag_tls_default;
5492 symbol_str = XSTR (symbol, 0);
5494 if (symbol_str[0] == '%')
5496 if (symbol_str[1] == tls_model_chars[kind])
5500 len = strlen (symbol_str) + 1;
5501 newstr = alloca (len + 2);
5504 newstr[1] = tls_model_chars[kind];
5505 memcpy (newstr + 2, symbol_str, len);
5507 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5511 /* Undo the above when printing symbol names. */
5514 ix86_strip_name_encoding (str)
5524 /* Load the thread pointer into a register. */
5527 get_thread_pointer ()
5531 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5532 tp = gen_rtx_MEM (Pmode, tp);
5533 RTX_UNCHANGING_P (tp) = 1;
5534 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5535 tp = force_reg (Pmode, tp);
5540 /* Try machine-dependent ways of modifying an illegitimate address
5541 to be legitimate. If we find one, return the new, valid address.
5542 This macro is used in only one place: `memory_address' in explow.c.
5544 OLDX is the address as it was before break_out_memory_refs was called.
5545 In some cases it is useful to look at this to decide what needs to be done.
5547 MODE and WIN are passed so that this macro can use
5548 GO_IF_LEGITIMATE_ADDRESS.
5550 It is always safe for this macro to do nothing. It exists to recognize
5551 opportunities to optimize the output.
5553 For the 80386, we handle X+REG by loading X into a register R and
5554 using R+REG. R will go in a general reg and indexing will be used.
5555 However, if REG is a broken-out memory address or multiplication,
5556 nothing needs to be done because REG can certainly go in a general reg.
5558 When -fpic is used, special handling is needed for symbolic references.
5559 See comments by legitimize_pic_address in i386.c for details. */
5562 legitimize_address (x, oldx, mode)
5564 register rtx oldx ATTRIBUTE_UNUSED;
5565 enum machine_mode mode;
5570 if (TARGET_DEBUG_ADDR)
5572 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5573 GET_MODE_NAME (mode));
5577 log = tls_symbolic_operand (x, mode);
5580 rtx dest, base, off, pic;
5584 case TLS_MODEL_GLOBAL_DYNAMIC:
5585 dest = gen_reg_rtx (Pmode);
5586 emit_insn (gen_tls_global_dynamic (dest, x));
5589 case TLS_MODEL_LOCAL_DYNAMIC:
5590 base = gen_reg_rtx (Pmode);
5591 emit_insn (gen_tls_local_dynamic_base (base));
5593 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5594 off = gen_rtx_CONST (Pmode, off);
5596 return gen_rtx_PLUS (Pmode, base, off);
5598 case TLS_MODEL_INITIAL_EXEC:
5601 if (reload_in_progress)
5602 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5603 pic = pic_offset_table_rtx;
5607 pic = gen_reg_rtx (Pmode);
5608 emit_insn (gen_set_got (pic));
5611 base = get_thread_pointer ();
5613 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5614 off = gen_rtx_CONST (Pmode, off);
5615 off = gen_rtx_PLUS (Pmode, pic, off);
5616 off = gen_rtx_MEM (Pmode, off);
5617 RTX_UNCHANGING_P (off) = 1;
5618 set_mem_alias_set (off, ix86_GOT_alias_set ());
5620 /* Damn Sun for specifing a set of dynamic relocations without
5621 considering the two-operand nature of the architecture!
5622 We'd be much better off with a "GOTNTPOFF" relocation that
5623 already contained the negated constant. */
5624 /* ??? Using negl and reg+reg addressing appears to be a lose
5625 size-wise. The negl is two bytes, just like the extra movl
5626 incurred by the two-operand subl, but reg+reg addressing
5627 uses the two-byte modrm form, unlike plain reg. */
5629 dest = gen_reg_rtx (Pmode);
5630 emit_insn (gen_subsi3 (dest, base, off));
5633 case TLS_MODEL_LOCAL_EXEC:
5634 base = get_thread_pointer ();
5636 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5637 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5638 off = gen_rtx_CONST (Pmode, off);
5641 return gen_rtx_PLUS (Pmode, base, off);
5644 dest = gen_reg_rtx (Pmode);
5645 emit_insn (gen_subsi3 (dest, base, off));
5656 if (flag_pic && SYMBOLIC_CONST (x))
5657 return legitimize_pic_address (x, 0);
5659 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5660 if (GET_CODE (x) == ASHIFT
5661 && GET_CODE (XEXP (x, 1)) == CONST_INT
5662 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5665 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5666 GEN_INT (1 << log));
5669 if (GET_CODE (x) == PLUS)
5671 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5673 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5674 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5675 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5678 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5679 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5680 GEN_INT (1 << log));
5683 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5684 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5685 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5688 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5689 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5690 GEN_INT (1 << log));
5693 /* Put multiply first if it isn't already. */
5694 if (GET_CODE (XEXP (x, 1)) == MULT)
5696 rtx tmp = XEXP (x, 0);
5697 XEXP (x, 0) = XEXP (x, 1);
5702 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5703 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5704 created by virtual register instantiation, register elimination, and
5705 similar optimizations. */
5706 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5709 x = gen_rtx_PLUS (Pmode,
5710 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5711 XEXP (XEXP (x, 1), 0)),
5712 XEXP (XEXP (x, 1), 1));
5716 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5717 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5718 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5719 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5720 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5721 && CONSTANT_P (XEXP (x, 1)))
5724 rtx other = NULL_RTX;
5726 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5728 constant = XEXP (x, 1);
5729 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5731 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5733 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5734 other = XEXP (x, 1);
5742 x = gen_rtx_PLUS (Pmode,
5743 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5744 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5745 plus_constant (other, INTVAL (constant)));
5749 if (changed && legitimate_address_p (mode, x, FALSE))
5752 if (GET_CODE (XEXP (x, 0)) == MULT)
5755 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5758 if (GET_CODE (XEXP (x, 1)) == MULT)
5761 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5765 && GET_CODE (XEXP (x, 1)) == REG
5766 && GET_CODE (XEXP (x, 0)) == REG)
5769 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5772 x = legitimize_pic_address (x, 0);
5775 if (changed && legitimate_address_p (mode, x, FALSE))
5778 if (GET_CODE (XEXP (x, 0)) == REG)
5780 register rtx temp = gen_reg_rtx (Pmode);
5781 register rtx val = force_operand (XEXP (x, 1), temp);
5783 emit_move_insn (temp, val);
5789 else if (GET_CODE (XEXP (x, 1)) == REG)
5791 register rtx temp = gen_reg_rtx (Pmode);
5792 register rtx val = force_operand (XEXP (x, 0), temp);
5794 emit_move_insn (temp, val);
5804 /* Print an integer constant expression in assembler syntax. Addition
5805 and subtraction are the only arithmetic that may appear in these
5806 expressions. FILE is the stdio stream to write to, X is the rtx, and
5807 CODE is the operand print code from the output string. */
5810 output_pic_addr_const (file, x, code)
5817 switch (GET_CODE (x))
5827 assemble_name (file, XSTR (x, 0));
5828 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5829 fputs ("@PLT", file);
5836 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5837 assemble_name (asm_out_file, buf);
5841 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5845 /* This used to output parentheses around the expression,
5846 but that does not work on the 386 (either ATT or BSD assembler). */
5847 output_pic_addr_const (file, XEXP (x, 0), code);
5851 if (GET_MODE (x) == VOIDmode)
5853 /* We can use %d if the number is <32 bits and positive. */
5854 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5855 fprintf (file, "0x%lx%08lx",
5856 (unsigned long) CONST_DOUBLE_HIGH (x),
5857 (unsigned long) CONST_DOUBLE_LOW (x));
5859 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5862 /* We can't handle floating point constants;
5863 PRINT_OPERAND must handle them. */
5864 output_operand_lossage ("floating constant misused");
5868 /* Some assemblers need integer constants to appear first. */
5869 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5871 output_pic_addr_const (file, XEXP (x, 0), code);
5873 output_pic_addr_const (file, XEXP (x, 1), code);
5875 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5877 output_pic_addr_const (file, XEXP (x, 1), code);
5879 output_pic_addr_const (file, XEXP (x, 0), code);
5887 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5888 output_pic_addr_const (file, XEXP (x, 0), code);
5890 output_pic_addr_const (file, XEXP (x, 1), code);
5892 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5896 if (XVECLEN (x, 0) != 1)
5898 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5899 switch (XINT (x, 1))
5902 fputs ("@GOT", file);
5905 fputs ("@GOTOFF", file);
5907 case UNSPEC_GOTPCREL:
5908 fputs ("@GOTPCREL(%rip)", file);
5910 case UNSPEC_GOTTPOFF:
5911 fputs ("@GOTTPOFF", file);
5914 fputs ("@TPOFF", file);
5917 fputs ("@NTPOFF", file);
5920 fputs ("@DTPOFF", file);
5923 output_operand_lossage ("invalid UNSPEC as operand");
5929 output_operand_lossage ("invalid expression as operand");
5933 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5934 We need to handle our special PIC relocations. */
5937 i386_dwarf_output_addr_const (file, x)
5942 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5946 fprintf (file, "%s", ASM_LONG);
5949 output_pic_addr_const (file, x, '\0');
5951 output_addr_const (file, x);
5955 /* In the name of slightly smaller debug output, and to cater to
5956 general assembler losage, recognize PIC+GOTOFF and turn it back
5957 into a direct symbol reference. */
5960 i386_simplify_dwarf_addr (orig_x)
5965 if (GET_CODE (x) == MEM)
5970 if (GET_CODE (x) != CONST
5971 || GET_CODE (XEXP (x, 0)) != UNSPEC
5972 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5973 || GET_CODE (orig_x) != MEM)
5975 return XVECEXP (XEXP (x, 0), 0, 0);
5978 if (GET_CODE (x) != PLUS
5979 || GET_CODE (XEXP (x, 1)) != CONST)
5982 if (GET_CODE (XEXP (x, 0)) == REG
5983 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5984 /* %ebx + GOT/GOTOFF */
5986 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5988 /* %ebx + %reg * scale + GOT/GOTOFF */
5990 if (GET_CODE (XEXP (y, 0)) == REG
5991 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5993 else if (GET_CODE (XEXP (y, 1)) == REG
5994 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5998 if (GET_CODE (y) != REG
5999 && GET_CODE (y) != MULT
6000 && GET_CODE (y) != ASHIFT)
6006 x = XEXP (XEXP (x, 1), 0);
6007 if (GET_CODE (x) == UNSPEC
6008 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6009 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6012 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6013 return XVECEXP (x, 0, 0);
6016 if (GET_CODE (x) == PLUS
6017 && GET_CODE (XEXP (x, 0)) == UNSPEC
6018 && GET_CODE (XEXP (x, 1)) == CONST_INT
6019 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6020 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6021 && GET_CODE (orig_x) != MEM)))
6023 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6025 return gen_rtx_PLUS (Pmode, y, x);
6033 put_condition_code (code, mode, reverse, fp, file)
6035 enum machine_mode mode;
6041 if (mode == CCFPmode || mode == CCFPUmode)
6043 enum rtx_code second_code, bypass_code;
6044 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6045 if (bypass_code != NIL || second_code != NIL)
6047 code = ix86_fp_compare_code_to_integer (code);
6051 code = reverse_condition (code);
6062 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6067 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6068 Those same assemblers have the same but opposite losage on cmov. */
6071 suffix = fp ? "nbe" : "a";
6074 if (mode == CCNOmode || mode == CCGOCmode)
6076 else if (mode == CCmode || mode == CCGCmode)
6087 if (mode == CCNOmode || mode == CCGOCmode)
6089 else if (mode == CCmode || mode == CCGCmode)
6098 suffix = fp ? "nb" : "ae";
6101 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6111 suffix = fp ? "u" : "p";
6114 suffix = fp ? "nu" : "np";
6119 fputs (suffix, file);
6123 print_reg (x, code, file)
6128 if (REGNO (x) == ARG_POINTER_REGNUM
6129 || REGNO (x) == FRAME_POINTER_REGNUM
6130 || REGNO (x) == FLAGS_REG
6131 || REGNO (x) == FPSR_REG)
6134 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6137 if (code == 'w' || MMX_REG_P (x))
6139 else if (code == 'b')
6141 else if (code == 'k')
6143 else if (code == 'q')
6145 else if (code == 'y')
6147 else if (code == 'h')
6150 code = GET_MODE_SIZE (GET_MODE (x));
6152 /* Irritatingly, AMD extended registers use different naming convention
6153 from the normal registers. */
6154 if (REX_INT_REG_P (x))
6161 error ("extended registers have no high halves");
6164 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6167 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6170 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6173 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6176 error ("unsupported operand size for extended register");
6184 if (STACK_TOP_P (x))
6186 fputs ("st(0)", file);
6193 if (! ANY_FP_REG_P (x))
6194 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6198 fputs (hi_reg_name[REGNO (x)], file);
6201 fputs (qi_reg_name[REGNO (x)], file);
6204 fputs (qi_high_reg_name[REGNO (x)], file);
6211 /* Locate some local-dynamic symbol still in use by this function
6212 so that we can print its name in some tls_local_dynamic_base
6216 get_some_local_dynamic_name ()
6220 if (cfun->machine->some_ld_name)
6221 return cfun->machine->some_ld_name;
6223 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6225 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6226 return cfun->machine->some_ld_name;
6232 get_some_local_dynamic_name_1 (px, data)
6234 void *data ATTRIBUTE_UNUSED;
6238 if (GET_CODE (x) == SYMBOL_REF
6239 && local_dynamic_symbolic_operand (x, Pmode))
6241 cfun->machine->some_ld_name = XSTR (x, 0);
6249 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6250 C -- print opcode suffix for set/cmov insn.
6251 c -- like C, but print reversed condition
6252 F,f -- likewise, but for floating-point.
6253 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6255 R -- print the prefix for register names.
6256 z -- print the opcode suffix for the size of the current operand.
6257 * -- print a star (in certain assembler syntax)
6258 A -- print an absolute memory reference.
6259 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6260 s -- print a shift double count, followed by the assemblers argument
6262 b -- print the QImode name of the register for the indicated operand.
6263 %b0 would print %al if operands[0] is reg 0.
6264 w -- likewise, print the HImode name of the register.
6265 k -- likewise, print the SImode name of the register.
6266 q -- likewise, print the DImode name of the register.
6267 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6268 y -- print "st(0)" instead of "st" as a register.
6269 D -- print condition for SSE cmp instruction.
6270 P -- if PIC, print an @PLT suffix.
6271 X -- don't print any sort of PIC '@' suffix for a symbol.
6272 & -- print some in-use local-dynamic symbol name.
6276 print_operand (file, x, code)
6286 if (ASSEMBLER_DIALECT == ASM_ATT)
6291 assemble_name (file, get_some_local_dynamic_name ());
6295 if (ASSEMBLER_DIALECT == ASM_ATT)
6297 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6299 /* Intel syntax. For absolute addresses, registers should not
6300 be surrounded by braces. */
6301 if (GET_CODE (x) != REG)
6304 PRINT_OPERAND (file, x, 0);
6312 PRINT_OPERAND (file, x, 0);
6317 if (ASSEMBLER_DIALECT == ASM_ATT)
6322 if (ASSEMBLER_DIALECT == ASM_ATT)
6327 if (ASSEMBLER_DIALECT == ASM_ATT)
6332 if (ASSEMBLER_DIALECT == ASM_ATT)
6337 if (ASSEMBLER_DIALECT == ASM_ATT)
6342 if (ASSEMBLER_DIALECT == ASM_ATT)
6347 /* 387 opcodes don't get size suffixes if the operands are
6349 if (STACK_REG_P (x))
6352 /* Likewise if using Intel opcodes. */
6353 if (ASSEMBLER_DIALECT == ASM_INTEL)
6356 /* This is the size of op from size of operand. */
6357 switch (GET_MODE_SIZE (GET_MODE (x)))
6360 #ifdef HAVE_GAS_FILDS_FISTS
6366 if (GET_MODE (x) == SFmode)
6381 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6383 #ifdef GAS_MNEMONICS
6409 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6411 PRINT_OPERAND (file, x, 0);
6417 /* Little bit of braindamage here. The SSE compare instructions
6418 does use completely different names for the comparisons that the
6419 fp conditional moves. */
6420 switch (GET_CODE (x))
6435 fputs ("unord", file);
6439 fputs ("neq", file);
6443 fputs ("nlt", file);
6447 fputs ("nle", file);
6450 fputs ("ord", file);
6458 #ifdef CMOV_SUN_AS_SYNTAX
6459 if (ASSEMBLER_DIALECT == ASM_ATT)
6461 switch (GET_MODE (x))
6463 case HImode: putc ('w', file); break;
6465 case SFmode: putc ('l', file); break;
6467 case DFmode: putc ('q', file); break;
6475 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6478 #ifdef CMOV_SUN_AS_SYNTAX
6479 if (ASSEMBLER_DIALECT == ASM_ATT)
6482 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6485 /* Like above, but reverse condition */
6487 /* Check to see if argument to %c is really a constant
6488 and not a condition code which needs to be reversed. */
6489 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6491 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6494 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6497 #ifdef CMOV_SUN_AS_SYNTAX
6498 if (ASSEMBLER_DIALECT == ASM_ATT)
6501 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6507 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6510 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6513 int pred_val = INTVAL (XEXP (x, 0));
6515 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6516 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6518 int taken = pred_val > REG_BR_PROB_BASE / 2;
6519 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6521 /* Emit hints only in the case default branch prediction
6522 heruistics would fail. */
6523 if (taken != cputaken)
6525 /* We use 3e (DS) prefix for taken branches and
6526 2e (CS) prefix for not taken branches. */
6528 fputs ("ds ; ", file);
6530 fputs ("cs ; ", file);
6537 output_operand_lossage ("invalid operand code `%c'", code);
6541 if (GET_CODE (x) == REG)
6543 PRINT_REG (x, code, file);
6546 else if (GET_CODE (x) == MEM)
6548 /* No `byte ptr' prefix for call instructions. */
6549 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6552 switch (GET_MODE_SIZE (GET_MODE (x)))
6554 case 1: size = "BYTE"; break;
6555 case 2: size = "WORD"; break;
6556 case 4: size = "DWORD"; break;
6557 case 8: size = "QWORD"; break;
6558 case 12: size = "XWORD"; break;
6559 case 16: size = "XMMWORD"; break;
6564 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6567 else if (code == 'w')
6569 else if (code == 'k')
6573 fputs (" PTR ", file);
6577 if (flag_pic && CONSTANT_ADDRESS_P (x))
6578 output_pic_addr_const (file, x, code);
6579 /* Avoid (%rip) for call operands. */
6580 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6581 && GET_CODE (x) != CONST_INT)
6582 output_addr_const (file, x);
6583 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6584 output_operand_lossage ("invalid constraints for operand");
6589 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6594 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6595 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6597 if (ASSEMBLER_DIALECT == ASM_ATT)
6599 fprintf (file, "0x%lx", l);
6602 /* These float cases don't actually occur as immediate operands. */
6603 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6608 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6609 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6610 fprintf (file, "%s", dstr);
6613 else if (GET_CODE (x) == CONST_DOUBLE
6614 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6619 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6620 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6621 fprintf (file, "%s", dstr);
6628 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6630 if (ASSEMBLER_DIALECT == ASM_ATT)
6633 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6634 || GET_CODE (x) == LABEL_REF)
6636 if (ASSEMBLER_DIALECT == ASM_ATT)
6639 fputs ("OFFSET FLAT:", file);
6642 if (GET_CODE (x) == CONST_INT)
6643 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6645 output_pic_addr_const (file, x, code);
6647 output_addr_const (file, x);
6651 /* Print a memory operand whose address is ADDR. */
6654 print_operand_address (file, addr)
6658 struct ix86_address parts;
6659 rtx base, index, disp;
6662 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6664 if (ASSEMBLER_DIALECT == ASM_INTEL)
6665 fputs ("DWORD PTR ", file);
6666 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6668 fputs ("gs:0", file);
6672 if (! ix86_decompose_address (addr, &parts))
6676 index = parts.index;
6678 scale = parts.scale;
6680 if (!base && !index)
6682 /* Displacement only requires special attention. */
6684 if (GET_CODE (disp) == CONST_INT)
6686 if (ASSEMBLER_DIALECT == ASM_INTEL)
6688 if (USER_LABEL_PREFIX[0] == 0)
6690 fputs ("ds:", file);
6692 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6695 output_pic_addr_const (file, addr, 0);
6697 output_addr_const (file, addr);
6699 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6701 && (GET_CODE (addr) == SYMBOL_REF
6702 || GET_CODE (addr) == LABEL_REF
6703 || (GET_CODE (addr) == CONST
6704 && GET_CODE (XEXP (addr, 0)) == PLUS
6705 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6706 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6707 fputs ("(%rip)", file);
6711 if (ASSEMBLER_DIALECT == ASM_ATT)
6716 output_pic_addr_const (file, disp, 0);
6717 else if (GET_CODE (disp) == LABEL_REF)
6718 output_asm_label (disp);
6720 output_addr_const (file, disp);
6725 PRINT_REG (base, 0, file);
6729 PRINT_REG (index, 0, file);
6731 fprintf (file, ",%d", scale);
6737 rtx offset = NULL_RTX;
6741 /* Pull out the offset of a symbol; print any symbol itself. */
6742 if (GET_CODE (disp) == CONST
6743 && GET_CODE (XEXP (disp, 0)) == PLUS
6744 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6746 offset = XEXP (XEXP (disp, 0), 1);
6747 disp = gen_rtx_CONST (VOIDmode,
6748 XEXP (XEXP (disp, 0), 0));
6752 output_pic_addr_const (file, disp, 0);
6753 else if (GET_CODE (disp) == LABEL_REF)
6754 output_asm_label (disp);
6755 else if (GET_CODE (disp) == CONST_INT)
6758 output_addr_const (file, disp);
6764 PRINT_REG (base, 0, file);
6767 if (INTVAL (offset) >= 0)
6769 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6773 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6780 PRINT_REG (index, 0, file);
6782 fprintf (file, "*%d", scale);
6790 output_addr_const_extra (file, x)
6796 if (GET_CODE (x) != UNSPEC)
6799 op = XVECEXP (x, 0, 0);
6800 switch (XINT (x, 1))
6802 case UNSPEC_GOTTPOFF:
6803 output_addr_const (file, op);
6804 fputs ("@GOTTPOFF", file);
6807 output_addr_const (file, op);
6808 fputs ("@TPOFF", file);
6811 output_addr_const (file, op);
6812 fputs ("@NTPOFF", file);
6815 output_addr_const (file, op);
6816 fputs ("@DTPOFF", file);
6826 /* Split one or more DImode RTL references into pairs of SImode
6827 references. The RTL can be REG, offsettable MEM, integer constant, or
6828 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6829 split and "num" is its length. lo_half and hi_half are output arrays
6830 that parallel "operands". */
6833 split_di (operands, num, lo_half, hi_half)
6836 rtx lo_half[], hi_half[];
6840 rtx op = operands[num];
6842 /* simplify_subreg refuse to split volatile memory addresses,
6843 but we still have to handle it. */
6844 if (GET_CODE (op) == MEM)
6846 lo_half[num] = adjust_address (op, SImode, 0);
6847 hi_half[num] = adjust_address (op, SImode, 4);
6851 lo_half[num] = simplify_gen_subreg (SImode, op,
6852 GET_MODE (op) == VOIDmode
6853 ? DImode : GET_MODE (op), 0);
6854 hi_half[num] = simplify_gen_subreg (SImode, op,
6855 GET_MODE (op) == VOIDmode
6856 ? DImode : GET_MODE (op), 4);
6860 /* Split one or more TImode RTL references into pairs of SImode
6861 references. The RTL can be REG, offsettable MEM, integer constant, or
6862 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6863 split and "num" is its length. lo_half and hi_half are output arrays
6864 that parallel "operands". */
6867 split_ti (operands, num, lo_half, hi_half)
6870 rtx lo_half[], hi_half[];
6874 rtx op = operands[num];
6876 /* simplify_subreg refuse to split volatile memory addresses, but we
6877 still have to handle it. */
6878 if (GET_CODE (op) == MEM)
6880 lo_half[num] = adjust_address (op, DImode, 0);
6881 hi_half[num] = adjust_address (op, DImode, 8);
6885 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6886 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6891 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6892 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6893 is the expression of the binary operation. The output may either be
6894 emitted here, or returned to the caller, like all output_* functions.
6896 There is no guarantee that the operands are the same mode, as they
6897 might be within FLOAT or FLOAT_EXTEND expressions. */
6899 #ifndef SYSV386_COMPAT
6900 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6901 wants to fix the assemblers because that causes incompatibility
6902 with gcc. No-one wants to fix gcc because that causes
6903 incompatibility with assemblers... You can use the option of
6904 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6905 #define SYSV386_COMPAT 1
6909 output_387_binary_op (insn, operands)
6913 static char buf[30];
6916 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6918 #ifdef ENABLE_CHECKING
6919 /* Even if we do not want to check the inputs, this documents input
6920 constraints. Which helps in understanding the following code. */
6921 if (STACK_REG_P (operands[0])
6922 && ((REG_P (operands[1])
6923 && REGNO (operands[0]) == REGNO (operands[1])
6924 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6925 || (REG_P (operands[2])
6926 && REGNO (operands[0]) == REGNO (operands[2])
6927 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6928 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6934 switch (GET_CODE (operands[3]))
6937 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6938 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6946 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6947 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6955 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6956 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6964 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6965 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6979 if (GET_MODE (operands[0]) == SFmode)
6980 strcat (buf, "ss\t{%2, %0|%0, %2}");
6982 strcat (buf, "sd\t{%2, %0|%0, %2}");
6987 switch (GET_CODE (operands[3]))
6991 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6993 rtx temp = operands[2];
6994 operands[2] = operands[1];
6998 /* know operands[0] == operands[1]. */
7000 if (GET_CODE (operands[2]) == MEM)
7006 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7008 if (STACK_TOP_P (operands[0]))
7009 /* How is it that we are storing to a dead operand[2]?
7010 Well, presumably operands[1] is dead too. We can't
7011 store the result to st(0) as st(0) gets popped on this
7012 instruction. Instead store to operands[2] (which I
7013 think has to be st(1)). st(1) will be popped later.
7014 gcc <= 2.8.1 didn't have this check and generated
7015 assembly code that the Unixware assembler rejected. */
7016 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7018 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7022 if (STACK_TOP_P (operands[0]))
7023 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7025 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7030 if (GET_CODE (operands[1]) == MEM)
7036 if (GET_CODE (operands[2]) == MEM)
7042 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7045 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7046 derived assemblers, confusingly reverse the direction of
7047 the operation for fsub{r} and fdiv{r} when the
7048 destination register is not st(0). The Intel assembler
7049 doesn't have this brain damage. Read !SYSV386_COMPAT to
7050 figure out what the hardware really does. */
7051 if (STACK_TOP_P (operands[0]))
7052 p = "{p\t%0, %2|rp\t%2, %0}";
7054 p = "{rp\t%2, %0|p\t%0, %2}";
7056 if (STACK_TOP_P (operands[0]))
7057 /* As above for fmul/fadd, we can't store to st(0). */
7058 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7060 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7065 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7068 if (STACK_TOP_P (operands[0]))
7069 p = "{rp\t%0, %1|p\t%1, %0}";
7071 p = "{p\t%1, %0|rp\t%0, %1}";
7073 if (STACK_TOP_P (operands[0]))
7074 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7076 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7081 if (STACK_TOP_P (operands[0]))
7083 if (STACK_TOP_P (operands[1]))
7084 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7086 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7089 else if (STACK_TOP_P (operands[1]))
7092 p = "{\t%1, %0|r\t%0, %1}";
7094 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7100 p = "{r\t%2, %0|\t%0, %2}";
7102 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7115 /* Output code to initialize control word copies used by
7116 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7117 is set to control word rounding downwards. */
7119 emit_i387_cw_initialization (normal, round_down)
7120 rtx normal, round_down;
7122 rtx reg = gen_reg_rtx (HImode);
7124 emit_insn (gen_x86_fnstcw_1 (normal));
7125 emit_move_insn (reg, normal);
7126 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7128 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7130 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7131 emit_move_insn (round_down, reg);
7134 /* Output code for INSN to convert a float to a signed int. OPERANDS
7135 are the insn operands. The output may be [HSD]Imode and the input
7136 operand may be [SDX]Fmode. */
7139 output_fix_trunc (insn, operands)
7143 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7144 int dimode_p = GET_MODE (operands[0]) == DImode;
7146 /* Jump through a hoop or two for DImode, since the hardware has no
7147 non-popping instruction. We used to do this a different way, but
7148 that was somewhat fragile and broke with post-reload splitters. */
7149 if (dimode_p && !stack_top_dies)
7150 output_asm_insn ("fld\t%y1", operands);
7152 if (!STACK_TOP_P (operands[1]))
7155 if (GET_CODE (operands[0]) != MEM)
7158 output_asm_insn ("fldcw\t%3", operands);
7159 if (stack_top_dies || dimode_p)
7160 output_asm_insn ("fistp%z0\t%0", operands);
7162 output_asm_insn ("fist%z0\t%0", operands);
7163 output_asm_insn ("fldcw\t%2", operands);
7168 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7169 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7170 when fucom should be used. */
7173 output_fp_compare (insn, operands, eflags_p, unordered_p)
7176 int eflags_p, unordered_p;
7179 rtx cmp_op0 = operands[0];
7180 rtx cmp_op1 = operands[1];
7181 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7186 cmp_op1 = operands[2];
7190 if (GET_MODE (operands[0]) == SFmode)
7192 return "ucomiss\t{%1, %0|%0, %1}";
7194 return "comiss\t{%1, %0|%0, %y}";
7197 return "ucomisd\t{%1, %0|%0, %1}";
7199 return "comisd\t{%1, %0|%0, %y}";
7202 if (! STACK_TOP_P (cmp_op0))
7205 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7207 if (STACK_REG_P (cmp_op1)
7209 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7210 && REGNO (cmp_op1) != FIRST_STACK_REG)
7212 /* If both the top of the 387 stack dies, and the other operand
7213 is also a stack register that dies, then this must be a
7214 `fcompp' float compare */
7218 /* There is no double popping fcomi variant. Fortunately,
7219 eflags is immune from the fstp's cc clobbering. */
7221 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7223 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7231 return "fucompp\n\tfnstsw\t%0";
7233 return "fcompp\n\tfnstsw\t%0";
7246 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7248 static const char * const alt[24] =
7260 "fcomi\t{%y1, %0|%0, %y1}",
7261 "fcomip\t{%y1, %0|%0, %y1}",
7262 "fucomi\t{%y1, %0|%0, %y1}",
7263 "fucomip\t{%y1, %0|%0, %y1}",
7270 "fcom%z2\t%y2\n\tfnstsw\t%0",
7271 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7272 "fucom%z2\t%y2\n\tfnstsw\t%0",
7273 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7275 "ficom%z2\t%y2\n\tfnstsw\t%0",
7276 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7284 mask = eflags_p << 3;
7285 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7286 mask |= unordered_p << 1;
7287 mask |= stack_top_dies;
7300 ix86_output_addr_vec_elt (file, value)
7304 const char *directive = ASM_LONG;
7309 directive = ASM_QUAD;
7315 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7319 ix86_output_addr_diff_elt (file, value, rel)
7324 fprintf (file, "%s%s%d-%s%d\n",
7325 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7326 else if (HAVE_AS_GOTOFF_IN_DATA)
7327 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7329 else if (TARGET_MACHO)
7330 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7331 machopic_function_base_name () + 1);
7334 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7335 ASM_LONG, LPREFIX, value);
7338 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7342 ix86_expand_clear (dest)
7347 /* We play register width games, which are only valid after reload. */
7348 if (!reload_completed)
7351 /* Avoid HImode and its attendant prefix byte. */
7352 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7353 dest = gen_rtx_REG (SImode, REGNO (dest));
7355 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7357 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7358 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7360 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7361 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7367 /* X is an unchanging MEM. If it is a constant pool reference, return
7368 the constant pool rtx, else NULL. */
7371 maybe_get_pool_constant (x)
7378 if (GET_CODE (x) != PLUS)
7380 if (XEXP (x, 0) != pic_offset_table_rtx)
7383 if (GET_CODE (x) != CONST)
7386 if (GET_CODE (x) != UNSPEC)
7388 if (XINT (x, 1) != UNSPEC_GOTOFF)
7390 x = XVECEXP (x, 0, 0);
7393 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7394 return get_pool_constant (x);
7400 ix86_expand_move (mode, operands)
7401 enum machine_mode mode;
7404 int strict = (reload_in_progress || reload_completed);
7405 rtx insn, op0, op1, tmp;
7410 /* ??? We have a slight problem. We need to say that tls symbols are
7411 not legitimate constants so that reload does not helpfully reload
7412 these constants from a REG_EQUIV, which we cannot handle. (Recall
7413 that general- and local-dynamic address resolution requires a
7416 However, if we say that tls symbols are not legitimate constants,
7417 then emit_move_insn helpfully drop them into the constant pool.
7419 It is far easier to work around emit_move_insn than reload. Recognize
7420 the MEM that we would have created and extract the symbol_ref. */
7423 && GET_CODE (op1) == MEM
7424 && RTX_UNCHANGING_P (op1))
7426 tmp = maybe_get_pool_constant (op1);
7427 /* Note that we only care about symbolic constants here, which
7428 unlike CONST_INT will always have a proper mode. */
7429 if (tmp && GET_MODE (tmp) == Pmode)
7433 if (tls_symbolic_operand (op1, Pmode))
7435 op1 = legitimize_address (op1, op1, VOIDmode);
7436 if (GET_CODE (op0) == MEM)
7438 tmp = gen_reg_rtx (mode);
7439 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7443 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7448 rtx temp = ((reload_in_progress
7449 || ((op0 && GET_CODE (op0) == REG)
7451 ? op0 : gen_reg_rtx (Pmode));
7452 op1 = machopic_indirect_data_reference (op1, temp);
7453 op1 = machopic_legitimize_pic_address (op1, mode,
7454 temp == op1 ? 0 : temp);
7458 if (MACHOPIC_INDIRECT)
7459 op1 = machopic_indirect_data_reference (op1, 0);
7463 insn = gen_rtx_SET (VOIDmode, op0, op1);
7467 #endif /* TARGET_MACHO */
7468 if (GET_CODE (op0) == MEM)
7469 op1 = force_reg (Pmode, op1);
7473 if (GET_CODE (temp) != REG)
7474 temp = gen_reg_rtx (Pmode);
7475 temp = legitimize_pic_address (op1, temp);
7483 if (GET_CODE (op0) == MEM
7484 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7485 || !push_operand (op0, mode))
7486 && GET_CODE (op1) == MEM)
7487 op1 = force_reg (mode, op1);
7489 if (push_operand (op0, mode)
7490 && ! general_no_elim_operand (op1, mode))
7491 op1 = copy_to_mode_reg (mode, op1);
7493 /* Force large constants in 64bit compilation into register
7494 to get them CSEed. */
7495 if (TARGET_64BIT && mode == DImode
7496 && immediate_operand (op1, mode)
7497 && !x86_64_zero_extended_value (op1)
7498 && !register_operand (op0, mode)
7499 && optimize && !reload_completed && !reload_in_progress)
7500 op1 = copy_to_mode_reg (mode, op1);
7502 if (FLOAT_MODE_P (mode))
7504 /* If we are loading a floating point constant to a register,
7505 force the value to memory now, since we'll get better code
7506 out the back end. */
7510 else if (GET_CODE (op1) == CONST_DOUBLE
7511 && register_operand (op0, mode))
7512 op1 = validize_mem (force_const_mem (mode, op1));
7516 insn = gen_rtx_SET (VOIDmode, op0, op1);
7522 ix86_expand_vector_move (mode, operands)
7523 enum machine_mode mode;
7526 /* Force constants other than zero into memory. We do not know how
7527 the instructions used to build constants modify the upper 64 bits
7528 of the register, once we have that information we may be able
7529 to handle some of them more efficiently. */
7530 if ((reload_in_progress | reload_completed) == 0
7531 && register_operand (operands[0], mode)
7532 && CONSTANT_P (operands[1]))
7534 rtx addr = gen_reg_rtx (Pmode);
7535 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7536 operands[1] = gen_rtx_MEM (mode, addr);
7539 /* Make operand1 a register if it isn't already. */
7540 if ((reload_in_progress | reload_completed) == 0
7541 && !register_operand (operands[0], mode)
7542 && !register_operand (operands[1], mode)
7543 && operands[1] != CONST0_RTX (mode))
7545 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7546 emit_move_insn (operands[0], temp);
7550 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7553 /* Attempt to expand a binary operator. Make the expansion closer to the
7554 actual machine, then just general_operand, which will allow 3 separate
7555 memory references (one output, two input) in a single insn. */
7558 ix86_expand_binary_operator (code, mode, operands)
7560 enum machine_mode mode;
7563 int matching_memory;
7564 rtx src1, src2, dst, op, clob;
7570 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7571 if (GET_RTX_CLASS (code) == 'c'
7572 && (rtx_equal_p (dst, src2)
7573 || immediate_operand (src1, mode)))
7580 /* If the destination is memory, and we do not have matching source
7581 operands, do things in registers. */
7582 matching_memory = 0;
7583 if (GET_CODE (dst) == MEM)
7585 if (rtx_equal_p (dst, src1))
7586 matching_memory = 1;
7587 else if (GET_RTX_CLASS (code) == 'c'
7588 && rtx_equal_p (dst, src2))
7589 matching_memory = 2;
7591 dst = gen_reg_rtx (mode);
7594 /* Both source operands cannot be in memory. */
7595 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7597 if (matching_memory != 2)
7598 src2 = force_reg (mode, src2);
7600 src1 = force_reg (mode, src1);
7603 /* If the operation is not commutable, source 1 cannot be a constant
7604 or non-matching memory. */
7605 if ((CONSTANT_P (src1)
7606 || (!matching_memory && GET_CODE (src1) == MEM))
7607 && GET_RTX_CLASS (code) != 'c')
7608 src1 = force_reg (mode, src1);
7610 /* If optimizing, copy to regs to improve CSE */
7611 if (optimize && ! no_new_pseudos)
7613 if (GET_CODE (dst) == MEM)
7614 dst = gen_reg_rtx (mode);
7615 if (GET_CODE (src1) == MEM)
7616 src1 = force_reg (mode, src1);
7617 if (GET_CODE (src2) == MEM)
7618 src2 = force_reg (mode, src2);
7621 /* Emit the instruction. */
7623 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7624 if (reload_in_progress)
7626 /* Reload doesn't know about the flags register, and doesn't know that
7627 it doesn't want to clobber it. We can only do this with PLUS. */
7634 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7635 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7638 /* Fix up the destination if needed. */
7639 if (dst != operands[0])
7640 emit_move_insn (operands[0], dst);
7643 /* Return TRUE or FALSE depending on whether the binary operator meets the
7644 appropriate constraints. */
7647 ix86_binary_operator_ok (code, mode, operands)
7649 enum machine_mode mode ATTRIBUTE_UNUSED;
7652 /* Both source operands cannot be in memory. */
7653 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7655 /* If the operation is not commutable, source 1 cannot be a constant. */
7656 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7658 /* If the destination is memory, we must have a matching source operand. */
7659 if (GET_CODE (operands[0]) == MEM
7660 && ! (rtx_equal_p (operands[0], operands[1])
7661 || (GET_RTX_CLASS (code) == 'c'
7662 && rtx_equal_p (operands[0], operands[2]))))
7664 /* If the operation is not commutable and the source 1 is memory, we must
7665 have a matching destination. */
7666 if (GET_CODE (operands[1]) == MEM
7667 && GET_RTX_CLASS (code) != 'c'
7668 && ! rtx_equal_p (operands[0], operands[1]))
7673 /* Attempt to expand a unary operator. Make the expansion closer to the
7674 actual machine, then just general_operand, which will allow 2 separate
7675 memory references (one output, one input) in a single insn. */
7678 ix86_expand_unary_operator (code, mode, operands)
7680 enum machine_mode mode;
7683 int matching_memory;
7684 rtx src, dst, op, clob;
7689 /* If the destination is memory, and we do not have matching source
7690 operands, do things in registers. */
7691 matching_memory = 0;
7692 if (GET_CODE (dst) == MEM)
7694 if (rtx_equal_p (dst, src))
7695 matching_memory = 1;
7697 dst = gen_reg_rtx (mode);
7700 /* When source operand is memory, destination must match. */
7701 if (!matching_memory && GET_CODE (src) == MEM)
7702 src = force_reg (mode, src);
7704 /* If optimizing, copy to regs to improve CSE */
7705 if (optimize && ! no_new_pseudos)
7707 if (GET_CODE (dst) == MEM)
7708 dst = gen_reg_rtx (mode);
7709 if (GET_CODE (src) == MEM)
7710 src = force_reg (mode, src);
7713 /* Emit the instruction. */
7715 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7716 if (reload_in_progress || code == NOT)
7718 /* Reload doesn't know about the flags register, and doesn't know that
7719 it doesn't want to clobber it. */
7726 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7727 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7730 /* Fix up the destination if needed. */
7731 if (dst != operands[0])
7732 emit_move_insn (operands[0], dst);
7735 /* Return TRUE or FALSE depending on whether the unary operator meets the
7736 appropriate constraints. */
7739 ix86_unary_operator_ok (code, mode, operands)
7740 enum rtx_code code ATTRIBUTE_UNUSED;
7741 enum machine_mode mode ATTRIBUTE_UNUSED;
7742 rtx operands[2] ATTRIBUTE_UNUSED;
7744 /* If one of operands is memory, source and destination must match. */
7745 if ((GET_CODE (operands[0]) == MEM
7746 || GET_CODE (operands[1]) == MEM)
7747 && ! rtx_equal_p (operands[0], operands[1]))
7752 /* Return TRUE or FALSE depending on whether the first SET in INSN
7753 has source and destination with matching CC modes, and that the
7754 CC mode is at least as constrained as REQ_MODE. */
7757 ix86_match_ccmode (insn, req_mode)
7759 enum machine_mode req_mode;
7762 enum machine_mode set_mode;
7764 set = PATTERN (insn);
7765 if (GET_CODE (set) == PARALLEL)
7766 set = XVECEXP (set, 0, 0);
7767 if (GET_CODE (set) != SET)
7769 if (GET_CODE (SET_SRC (set)) != COMPARE)
7772 set_mode = GET_MODE (SET_DEST (set));
7776 if (req_mode != CCNOmode
7777 && (req_mode != CCmode
7778 || XEXP (SET_SRC (set), 1) != const0_rtx))
7782 if (req_mode == CCGCmode)
7786 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7790 if (req_mode == CCZmode)
7800 return (GET_MODE (SET_SRC (set)) == set_mode);
7803 /* Generate insn patterns to do an integer compare of OPERANDS. */
7806 ix86_expand_int_compare (code, op0, op1)
7810 enum machine_mode cmpmode;
7813 cmpmode = SELECT_CC_MODE (code, op0, op1);
7814 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7816 /* This is very simple, but making the interface the same as in the
7817 FP case makes the rest of the code easier. */
7818 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7819 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7821 /* Return the test that should be put into the flags user, i.e.
7822 the bcc, scc, or cmov instruction. */
7823 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7826 /* Figure out whether to use ordered or unordered fp comparisons.
7827 Return the appropriate mode to use. */
7830 ix86_fp_compare_mode (code)
7831 enum rtx_code code ATTRIBUTE_UNUSED;
7833 /* ??? In order to make all comparisons reversible, we do all comparisons
7834 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7835 all forms trapping and nontrapping comparisons, we can make inequality
7836 comparisons trapping again, since it results in better code when using
7837 FCOM based compares. */
7838 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7842 ix86_cc_mode (code, op0, op1)
7846 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7847 return ix86_fp_compare_mode (code);
7850 /* Only zero flag is needed. */
7852 case NE: /* ZF!=0 */
7854 /* Codes needing carry flag. */
7855 case GEU: /* CF=0 */
7856 case GTU: /* CF=0 & ZF=0 */
7857 case LTU: /* CF=1 */
7858 case LEU: /* CF=1 | ZF=1 */
7860 /* Codes possibly doable only with sign flag when
7861 comparing against zero. */
7862 case GE: /* SF=OF or SF=0 */
7863 case LT: /* SF<>OF or SF=1 */
7864 if (op1 == const0_rtx)
7867 /* For other cases Carry flag is not required. */
7869 /* Codes doable only with sign flag when comparing
7870 against zero, but we miss jump instruction for it
7871 so we need to use relational tests agains overflow
7872 that thus needs to be zero. */
7873 case GT: /* ZF=0 & SF=OF */
7874 case LE: /* ZF=1 | SF<>OF */
7875 if (op1 == const0_rtx)
7879 /* strcmp pattern do (use flags) and combine may ask us for proper
7888 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7891 ix86_use_fcomi_compare (code)
7892 enum rtx_code code ATTRIBUTE_UNUSED;
7894 enum rtx_code swapped_code = swap_condition (code);
7895 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7896 || (ix86_fp_comparison_cost (swapped_code)
7897 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7900 /* Swap, force into registers, or otherwise massage the two operands
7901 to a fp comparison. The operands are updated in place; the new
7902 comparsion code is returned. */
7904 static enum rtx_code
7905 ix86_prepare_fp_compare_args (code, pop0, pop1)
7909 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7910 rtx op0 = *pop0, op1 = *pop1;
7911 enum machine_mode op_mode = GET_MODE (op0);
7912 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7914 /* All of the unordered compare instructions only work on registers.
7915 The same is true of the XFmode compare instructions. The same is
7916 true of the fcomi compare instructions. */
7919 && (fpcmp_mode == CCFPUmode
7920 || op_mode == XFmode
7921 || op_mode == TFmode
7922 || ix86_use_fcomi_compare (code)))
7924 op0 = force_reg (op_mode, op0);
7925 op1 = force_reg (op_mode, op1);
7929 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7930 things around if they appear profitable, otherwise force op0
7933 if (standard_80387_constant_p (op0) == 0
7934 || (GET_CODE (op0) == MEM
7935 && ! (standard_80387_constant_p (op1) == 0
7936 || GET_CODE (op1) == MEM)))
7939 tmp = op0, op0 = op1, op1 = tmp;
7940 code = swap_condition (code);
7943 if (GET_CODE (op0) != REG)
7944 op0 = force_reg (op_mode, op0);
7946 if (CONSTANT_P (op1))
7948 if (standard_80387_constant_p (op1))
7949 op1 = force_reg (op_mode, op1);
7951 op1 = validize_mem (force_const_mem (op_mode, op1));
7955 /* Try to rearrange the comparison to make it cheaper. */
7956 if (ix86_fp_comparison_cost (code)
7957 > ix86_fp_comparison_cost (swap_condition (code))
7958 && (GET_CODE (op1) == REG || !no_new_pseudos))
7961 tmp = op0, op0 = op1, op1 = tmp;
7962 code = swap_condition (code);
7963 if (GET_CODE (op0) != REG)
7964 op0 = force_reg (op_mode, op0);
7972 /* Convert comparison codes we use to represent FP comparison to integer
7973 code that will result in proper branch. Return UNKNOWN if no such code
7975 static enum rtx_code
7976 ix86_fp_compare_code_to_integer (code)
8006 /* Split comparison code CODE into comparisons we can do using branch
8007 instructions. BYPASS_CODE is comparison code for branch that will
8008 branch around FIRST_CODE and SECOND_CODE. If some of branches
8009 is not required, set value to NIL.
8010 We never require more than two branches. */
8012 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8013 enum rtx_code code, *bypass_code, *first_code, *second_code;
8019 /* The fcomi comparison sets flags as follows:
8029 case GT: /* GTU - CF=0 & ZF=0 */
8030 case GE: /* GEU - CF=0 */
8031 case ORDERED: /* PF=0 */
8032 case UNORDERED: /* PF=1 */
8033 case UNEQ: /* EQ - ZF=1 */
8034 case UNLT: /* LTU - CF=1 */
8035 case UNLE: /* LEU - CF=1 | ZF=1 */
8036 case LTGT: /* EQ - ZF=0 */
8038 case LT: /* LTU - CF=1 - fails on unordered */
8040 *bypass_code = UNORDERED;
8042 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8044 *bypass_code = UNORDERED;
8046 case EQ: /* EQ - ZF=1 - fails on unordered */
8048 *bypass_code = UNORDERED;
8050 case NE: /* NE - ZF=0 - fails on unordered */
8052 *second_code = UNORDERED;
8054 case UNGE: /* GEU - CF=0 - fails on unordered */
8056 *second_code = UNORDERED;
8058 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8060 *second_code = UNORDERED;
8065 if (!TARGET_IEEE_FP)
8072 /* Return cost of comparison done fcom + arithmetics operations on AX.
8073 All following functions do use number of instructions as an cost metrics.
8074 In future this should be tweaked to compute bytes for optimize_size and
8075 take into account performance of various instructions on various CPUs. */
8077 ix86_fp_comparison_arithmetics_cost (code)
8080 if (!TARGET_IEEE_FP)
8082 /* The cost of code output by ix86_expand_fp_compare. */
8110 /* Return cost of comparison done using fcomi operation.
8111 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8113 ix86_fp_comparison_fcomi_cost (code)
8116 enum rtx_code bypass_code, first_code, second_code;
8117 /* Return arbitarily high cost when instruction is not supported - this
8118 prevents gcc from using it. */
8121 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8122 return (bypass_code != NIL || second_code != NIL) + 2;
8125 /* Return cost of comparison done using sahf operation.
8126 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8128 ix86_fp_comparison_sahf_cost (code)
8131 enum rtx_code bypass_code, first_code, second_code;
8132 /* Return arbitarily high cost when instruction is not preferred - this
8133 avoids gcc from using it. */
8134 if (!TARGET_USE_SAHF && !optimize_size)
8136 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8137 return (bypass_code != NIL || second_code != NIL) + 3;
8140 /* Compute cost of the comparison done using any method.
8141 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8143 ix86_fp_comparison_cost (code)
8146 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8149 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8150 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8152 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8153 if (min > sahf_cost)
8155 if (min > fcomi_cost)
8160 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8163 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8165 rtx op0, op1, scratch;
8169 enum machine_mode fpcmp_mode, intcmp_mode;
8171 int cost = ix86_fp_comparison_cost (code);
8172 enum rtx_code bypass_code, first_code, second_code;
8174 fpcmp_mode = ix86_fp_compare_mode (code);
8175 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8178 *second_test = NULL_RTX;
8180 *bypass_test = NULL_RTX;
8182 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8184 /* Do fcomi/sahf based test when profitable. */
8185 if ((bypass_code == NIL || bypass_test)
8186 && (second_code == NIL || second_test)
8187 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8191 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8192 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8198 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8199 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8201 scratch = gen_reg_rtx (HImode);
8202 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8203 emit_insn (gen_x86_sahf_1 (scratch));
8206 /* The FP codes work out to act like unsigned. */
8207 intcmp_mode = fpcmp_mode;
8209 if (bypass_code != NIL)
8210 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8211 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8213 if (second_code != NIL)
8214 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8215 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8220 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8221 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8222 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8224 scratch = gen_reg_rtx (HImode);
8225 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8227 /* In the unordered case, we have to check C2 for NaN's, which
8228 doesn't happen to work out to anything nice combination-wise.
8229 So do some bit twiddling on the value we've got in AH to come
8230 up with an appropriate set of condition codes. */
8232 intcmp_mode = CCNOmode;
8237 if (code == GT || !TARGET_IEEE_FP)
8239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8244 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8245 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8247 intcmp_mode = CCmode;
8253 if (code == LT && TARGET_IEEE_FP)
8255 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8256 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8257 intcmp_mode = CCmode;
8262 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8268 if (code == GE || !TARGET_IEEE_FP)
8270 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8275 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8276 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8283 if (code == LE && TARGET_IEEE_FP)
8285 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8286 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8287 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8288 intcmp_mode = CCmode;
8293 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8299 if (code == EQ && TARGET_IEEE_FP)
8301 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8302 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8303 intcmp_mode = CCmode;
8308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8315 if (code == NE && TARGET_IEEE_FP)
8317 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8318 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8324 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8330 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8334 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8343 /* Return the test that should be put into the flags user, i.e.
8344 the bcc, scc, or cmov instruction. */
8345 return gen_rtx_fmt_ee (code, VOIDmode,
8346 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8351 ix86_expand_compare (code, second_test, bypass_test)
8353 rtx *second_test, *bypass_test;
8356 op0 = ix86_compare_op0;
8357 op1 = ix86_compare_op1;
8360 *second_test = NULL_RTX;
8362 *bypass_test = NULL_RTX;
8364 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8365 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8366 second_test, bypass_test);
8368 ret = ix86_expand_int_compare (code, op0, op1);
8373 /* Return true if the CODE will result in nontrivial jump sequence. */
8375 ix86_fp_jump_nontrivial_p (code)
8378 enum rtx_code bypass_code, first_code, second_code;
8381 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8382 return bypass_code != NIL || second_code != NIL;
8386 ix86_expand_branch (code, label)
8392 switch (GET_MODE (ix86_compare_op0))
8398 tmp = ix86_expand_compare (code, NULL, NULL);
8399 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8400 gen_rtx_LABEL_REF (VOIDmode, label),
8402 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8412 enum rtx_code bypass_code, first_code, second_code;
8414 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8417 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8419 /* Check whether we will use the natural sequence with one jump. If
8420 so, we can expand jump early. Otherwise delay expansion by
8421 creating compound insn to not confuse optimizers. */
8422 if (bypass_code == NIL && second_code == NIL
8425 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8426 gen_rtx_LABEL_REF (VOIDmode, label),
8431 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8432 ix86_compare_op0, ix86_compare_op1);
8433 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8434 gen_rtx_LABEL_REF (VOIDmode, label),
8436 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8438 use_fcomi = ix86_use_fcomi_compare (code);
8439 vec = rtvec_alloc (3 + !use_fcomi);
8440 RTVEC_ELT (vec, 0) = tmp;
8442 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8444 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8447 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8449 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8457 /* Expand DImode branch into multiple compare+branch. */
8459 rtx lo[2], hi[2], label2;
8460 enum rtx_code code1, code2, code3;
8462 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8464 tmp = ix86_compare_op0;
8465 ix86_compare_op0 = ix86_compare_op1;
8466 ix86_compare_op1 = tmp;
8467 code = swap_condition (code);
8469 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8470 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8472 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8473 avoid two branches. This costs one extra insn, so disable when
8474 optimizing for size. */
8476 if ((code == EQ || code == NE)
8478 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8483 if (hi[1] != const0_rtx)
8484 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8485 NULL_RTX, 0, OPTAB_WIDEN);
8488 if (lo[1] != const0_rtx)
8489 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8490 NULL_RTX, 0, OPTAB_WIDEN);
8492 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8493 NULL_RTX, 0, OPTAB_WIDEN);
8495 ix86_compare_op0 = tmp;
8496 ix86_compare_op1 = const0_rtx;
8497 ix86_expand_branch (code, label);
8501 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8502 op1 is a constant and the low word is zero, then we can just
8503 examine the high word. */
8505 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8508 case LT: case LTU: case GE: case GEU:
8509 ix86_compare_op0 = hi[0];
8510 ix86_compare_op1 = hi[1];
8511 ix86_expand_branch (code, label);
8517 /* Otherwise, we need two or three jumps. */
8519 label2 = gen_label_rtx ();
8522 code2 = swap_condition (code);
8523 code3 = unsigned_condition (code);
8527 case LT: case GT: case LTU: case GTU:
8530 case LE: code1 = LT; code2 = GT; break;
8531 case GE: code1 = GT; code2 = LT; break;
8532 case LEU: code1 = LTU; code2 = GTU; break;
8533 case GEU: code1 = GTU; code2 = LTU; break;
8535 case EQ: code1 = NIL; code2 = NE; break;
8536 case NE: code2 = NIL; break;
8544 * if (hi(a) < hi(b)) goto true;
8545 * if (hi(a) > hi(b)) goto false;
8546 * if (lo(a) < lo(b)) goto true;
8550 ix86_compare_op0 = hi[0];
8551 ix86_compare_op1 = hi[1];
8554 ix86_expand_branch (code1, label);
8556 ix86_expand_branch (code2, label2);
8558 ix86_compare_op0 = lo[0];
8559 ix86_compare_op1 = lo[1];
8560 ix86_expand_branch (code3, label);
8563 emit_label (label2);
8572 /* Split branch based on floating point condition. */
8574 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8576 rtx op1, op2, target1, target2, tmp;
8579 rtx label = NULL_RTX;
8581 int bypass_probability = -1, second_probability = -1, probability = -1;
8584 if (target2 != pc_rtx)
8587 code = reverse_condition_maybe_unordered (code);
8592 condition = ix86_expand_fp_compare (code, op1, op2,
8593 tmp, &second, &bypass);
8595 if (split_branch_probability >= 0)
8597 /* Distribute the probabilities across the jumps.
8598 Assume the BYPASS and SECOND to be always test
8600 probability = split_branch_probability;
8602 /* Value of 1 is low enough to make no need for probability
8603 to be updated. Later we may run some experiments and see
8604 if unordered values are more frequent in practice. */
8606 bypass_probability = 1;
8608 second_probability = 1;
8610 if (bypass != NULL_RTX)
8612 label = gen_label_rtx ();
8613 i = emit_jump_insn (gen_rtx_SET
8615 gen_rtx_IF_THEN_ELSE (VOIDmode,
8617 gen_rtx_LABEL_REF (VOIDmode,
8620 if (bypass_probability >= 0)
8622 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8623 GEN_INT (bypass_probability),
8626 i = emit_jump_insn (gen_rtx_SET
8628 gen_rtx_IF_THEN_ELSE (VOIDmode,
8629 condition, target1, target2)));
8630 if (probability >= 0)
8632 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8633 GEN_INT (probability),
8635 if (second != NULL_RTX)
8637 i = emit_jump_insn (gen_rtx_SET
8639 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8641 if (second_probability >= 0)
8643 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8644 GEN_INT (second_probability),
8647 if (label != NULL_RTX)
8652 ix86_expand_setcc (code, dest)
8656 rtx ret, tmp, tmpreg;
8657 rtx second_test, bypass_test;
8659 if (GET_MODE (ix86_compare_op0) == DImode
8661 return 0; /* FAIL */
8663 if (GET_MODE (dest) != QImode)
8666 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8667 PUT_MODE (ret, QImode);
8672 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8673 if (bypass_test || second_test)
8675 rtx test = second_test;
8677 rtx tmp2 = gen_reg_rtx (QImode);
8684 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8686 PUT_MODE (test, QImode);
8687 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8690 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8692 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8695 return 1; /* DONE */
8699 ix86_expand_int_movcc (operands)
8702 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8703 rtx compare_seq, compare_op;
8704 rtx second_test, bypass_test;
8705 enum machine_mode mode = GET_MODE (operands[0]);
8707 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8708 In case comparsion is done with immediate, we can convert it to LTU or
8709 GEU by altering the integer. */
8711 if ((code == LEU || code == GTU)
8712 && GET_CODE (ix86_compare_op1) == CONST_INT
8714 && INTVAL (ix86_compare_op1) != -1
8715 /* For x86-64, the immediate field in the instruction is 32-bit
8716 signed, so we can't increment a DImode value above 0x7fffffff. */
8718 || GET_MODE (ix86_compare_op0) != DImode
8719 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8720 && GET_CODE (operands[2]) == CONST_INT
8721 && GET_CODE (operands[3]) == CONST_INT)
8727 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8728 GET_MODE (ix86_compare_op0));
8732 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8733 compare_seq = get_insns ();
8736 compare_code = GET_CODE (compare_op);
8738 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8739 HImode insns, we'd be swallowed in word prefix ops. */
8742 && (mode != DImode || TARGET_64BIT)
8743 && GET_CODE (operands[2]) == CONST_INT
8744 && GET_CODE (operands[3]) == CONST_INT)
8746 rtx out = operands[0];
8747 HOST_WIDE_INT ct = INTVAL (operands[2]);
8748 HOST_WIDE_INT cf = INTVAL (operands[3]);
8751 if ((compare_code == LTU || compare_code == GEU)
8752 && !second_test && !bypass_test)
8754 /* Detect overlap between destination and compare sources. */
8757 /* To simplify rest of code, restrict to the GEU case. */
8758 if (compare_code == LTU)
8763 compare_code = reverse_condition (compare_code);
8764 code = reverse_condition (code);
8768 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8769 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8770 tmp = gen_reg_rtx (mode);
8772 emit_insn (compare_seq);
8774 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8776 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8788 tmp = expand_simple_binop (mode, PLUS,
8790 tmp, 1, OPTAB_DIRECT);
8801 tmp = expand_simple_binop (mode, IOR,
8803 tmp, 1, OPTAB_DIRECT);
8805 else if (diff == -1 && ct)
8815 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8817 tmp = expand_simple_binop (mode, PLUS,
8819 tmp, 1, OPTAB_DIRECT);
8827 * andl cf - ct, dest
8837 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8840 tmp = expand_simple_binop (mode, AND,
8842 gen_int_mode (cf - ct, mode),
8843 tmp, 1, OPTAB_DIRECT);
8845 tmp = expand_simple_binop (mode, PLUS,
8847 tmp, 1, OPTAB_DIRECT);
8851 emit_move_insn (out, tmp);
8853 return 1; /* DONE */
8860 tmp = ct, ct = cf, cf = tmp;
8862 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8864 /* We may be reversing unordered compare to normal compare, that
8865 is not valid in general (we may convert non-trapping condition
8866 to trapping one), however on i386 we currently emit all
8867 comparisons unordered. */
8868 compare_code = reverse_condition_maybe_unordered (compare_code);
8869 code = reverse_condition_maybe_unordered (code);
8873 compare_code = reverse_condition (compare_code);
8874 code = reverse_condition (code);
8879 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8880 && GET_CODE (ix86_compare_op1) == CONST_INT)
8882 if (ix86_compare_op1 == const0_rtx
8883 && (code == LT || code == GE))
8884 compare_code = code;
8885 else if (ix86_compare_op1 == constm1_rtx)
8889 else if (code == GT)
8894 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8895 if (compare_code != NIL
8896 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8897 && (cf == -1 || ct == -1))
8899 /* If lea code below could be used, only optimize
8900 if it results in a 2 insn sequence. */
8902 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8903 || diff == 3 || diff == 5 || diff == 9)
8904 || (compare_code == LT && ct == -1)
8905 || (compare_code == GE && cf == -1))
8908 * notl op1 (if necessary)
8916 code = reverse_condition (code);
8919 out = emit_store_flag (out, code, ix86_compare_op0,
8920 ix86_compare_op1, VOIDmode, 0, -1);
8922 out = expand_simple_binop (mode, IOR,
8924 out, 1, OPTAB_DIRECT);
8925 if (out != operands[0])
8926 emit_move_insn (operands[0], out);
8928 return 1; /* DONE */
8932 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8933 || diff == 3 || diff == 5 || diff == 9)
8934 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8940 * lea cf(dest*(ct-cf)),dest
8944 * This also catches the degenerate setcc-only case.
8950 out = emit_store_flag (out, code, ix86_compare_op0,
8951 ix86_compare_op1, VOIDmode, 0, 1);
8954 /* On x86_64 the lea instruction operates on Pmode, so we need
8955 to get arithmetics done in proper mode to match. */
8962 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8966 tmp = gen_rtx_PLUS (mode, tmp, out1);
8972 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8976 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8982 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8983 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8985 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8986 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8990 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8992 if (out != operands[0])
8993 emit_move_insn (operands[0], out);
8995 return 1; /* DONE */
8999 * General case: Jumpful:
9000 * xorl dest,dest cmpl op1, op2
9001 * cmpl op1, op2 movl ct, dest
9003 * decl dest movl cf, dest
9004 * andl (cf-ct),dest 1:
9009 * This is reasonably steep, but branch mispredict costs are
9010 * high on modern cpus, so consider failing only if optimizing
9013 * %%% Parameterize branch_cost on the tuning architecture, then
9014 * use that. The 80386 couldn't care less about mispredicts.
9017 if (!optimize_size && !TARGET_CMOVE)
9023 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9024 /* We may be reversing unordered compare to normal compare,
9025 that is not valid in general (we may convert non-trapping
9026 condition to trapping one), however on i386 we currently
9027 emit all comparisons unordered. */
9028 code = reverse_condition_maybe_unordered (code);
9031 code = reverse_condition (code);
9032 if (compare_code != NIL)
9033 compare_code = reverse_condition (compare_code);
9037 if (compare_code != NIL)
9039 /* notl op1 (if needed)
9044 For x < 0 (resp. x <= -1) there will be no notl,
9045 so if possible swap the constants to get rid of the
9047 True/false will be -1/0 while code below (store flag
9048 followed by decrement) is 0/-1, so the constants need
9049 to be exchanged once more. */
9051 if (compare_code == GE || !cf)
9053 code = reverse_condition (code);
9058 HOST_WIDE_INT tmp = cf;
9063 out = emit_store_flag (out, code, ix86_compare_op0,
9064 ix86_compare_op1, VOIDmode, 0, -1);
9068 out = emit_store_flag (out, code, ix86_compare_op0,
9069 ix86_compare_op1, VOIDmode, 0, 1);
9071 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9072 out, 1, OPTAB_DIRECT);
9075 out = expand_simple_binop (mode, AND, out,
9076 gen_int_mode (cf - ct, mode),
9077 out, 1, OPTAB_DIRECT);
9079 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9080 out, 1, OPTAB_DIRECT);
9081 if (out != operands[0])
9082 emit_move_insn (operands[0], out);
9084 return 1; /* DONE */
9090 /* Try a few things more with specific constants and a variable. */
9093 rtx var, orig_out, out, tmp;
9096 return 0; /* FAIL */
9098 /* If one of the two operands is an interesting constant, load a
9099 constant with the above and mask it in with a logical operation. */
9101 if (GET_CODE (operands[2]) == CONST_INT)
9104 if (INTVAL (operands[2]) == 0)
9105 operands[3] = constm1_rtx, op = and_optab;
9106 else if (INTVAL (operands[2]) == -1)
9107 operands[3] = const0_rtx, op = ior_optab;
9109 return 0; /* FAIL */
9111 else if (GET_CODE (operands[3]) == CONST_INT)
9114 if (INTVAL (operands[3]) == 0)
9115 operands[2] = constm1_rtx, op = and_optab;
9116 else if (INTVAL (operands[3]) == -1)
9117 operands[2] = const0_rtx, op = ior_optab;
9119 return 0; /* FAIL */
9122 return 0; /* FAIL */
9124 orig_out = operands[0];
9125 tmp = gen_reg_rtx (mode);
9128 /* Recurse to get the constant loaded. */
9129 if (ix86_expand_int_movcc (operands) == 0)
9130 return 0; /* FAIL */
9132 /* Mask in the interesting variable. */
9133 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9135 if (out != orig_out)
9136 emit_move_insn (orig_out, out);
9138 return 1; /* DONE */
9142 * For comparison with above,
9152 if (! nonimmediate_operand (operands[2], mode))
9153 operands[2] = force_reg (mode, operands[2]);
9154 if (! nonimmediate_operand (operands[3], mode))
9155 operands[3] = force_reg (mode, operands[3]);
9157 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9159 rtx tmp = gen_reg_rtx (mode);
9160 emit_move_insn (tmp, operands[3]);
9163 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9165 rtx tmp = gen_reg_rtx (mode);
9166 emit_move_insn (tmp, operands[2]);
9169 if (! register_operand (operands[2], VOIDmode)
9170 && ! register_operand (operands[3], VOIDmode))
9171 operands[2] = force_reg (mode, operands[2]);
9173 emit_insn (compare_seq);
9174 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9175 gen_rtx_IF_THEN_ELSE (mode,
9176 compare_op, operands[2],
9179 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9180 gen_rtx_IF_THEN_ELSE (mode,
9185 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9186 gen_rtx_IF_THEN_ELSE (mode,
9191 return 1; /* DONE */
9195 ix86_expand_fp_movcc (operands)
9200 rtx compare_op, second_test, bypass_test;
9202 /* For SF/DFmode conditional moves based on comparisons
9203 in same mode, we may want to use SSE min/max instructions. */
9204 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9205 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9206 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9207 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9209 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9210 /* We may be called from the post-reload splitter. */
9211 && (!REG_P (operands[0])
9212 || SSE_REG_P (operands[0])
9213 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9215 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9216 code = GET_CODE (operands[1]);
9218 /* See if we have (cross) match between comparison operands and
9219 conditional move operands. */
9220 if (rtx_equal_p (operands[2], op1))
9225 code = reverse_condition_maybe_unordered (code);
9227 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9229 /* Check for min operation. */
9232 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9233 if (memory_operand (op0, VOIDmode))
9234 op0 = force_reg (GET_MODE (operands[0]), op0);
9235 if (GET_MODE (operands[0]) == SFmode)
9236 emit_insn (gen_minsf3 (operands[0], op0, op1));
9238 emit_insn (gen_mindf3 (operands[0], op0, op1));
9241 /* Check for max operation. */
9244 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9245 if (memory_operand (op0, VOIDmode))
9246 op0 = force_reg (GET_MODE (operands[0]), op0);
9247 if (GET_MODE (operands[0]) == SFmode)
9248 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9250 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9254 /* Manage condition to be sse_comparison_operator. In case we are
9255 in non-ieee mode, try to canonicalize the destination operand
9256 to be first in the comparison - this helps reload to avoid extra
9258 if (!sse_comparison_operator (operands[1], VOIDmode)
9259 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9261 rtx tmp = ix86_compare_op0;
9262 ix86_compare_op0 = ix86_compare_op1;
9263 ix86_compare_op1 = tmp;
9264 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9265 VOIDmode, ix86_compare_op0,
9268 /* Similary try to manage result to be first operand of conditional
9269 move. We also don't support the NE comparison on SSE, so try to
9271 if ((rtx_equal_p (operands[0], operands[3])
9272 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9273 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9275 rtx tmp = operands[2];
9276 operands[2] = operands[3];
9278 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9279 (GET_CODE (operands[1])),
9280 VOIDmode, ix86_compare_op0,
9283 if (GET_MODE (operands[0]) == SFmode)
9284 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9285 operands[2], operands[3],
9286 ix86_compare_op0, ix86_compare_op1));
9288 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9289 operands[2], operands[3],
9290 ix86_compare_op0, ix86_compare_op1));
9294 /* The floating point conditional move instructions don't directly
9295 support conditions resulting from a signed integer comparison. */
9297 code = GET_CODE (operands[1]);
9298 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9300 /* The floating point conditional move instructions don't directly
9301 support signed integer comparisons. */
9303 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9305 if (second_test != NULL || bypass_test != NULL)
9307 tmp = gen_reg_rtx (QImode);
9308 ix86_expand_setcc (code, tmp);
9310 ix86_compare_op0 = tmp;
9311 ix86_compare_op1 = const0_rtx;
9312 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9314 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9316 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9317 emit_move_insn (tmp, operands[3]);
9320 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9322 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9323 emit_move_insn (tmp, operands[2]);
9327 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9328 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9333 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9334 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9339 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9340 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9348 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9349 works for floating pointer parameters and nonoffsetable memories.
9350 For pushes, it returns just stack offsets; the values will be saved
9351 in the right order. Maximally three parts are generated. */
9354 ix86_split_to_parts (operand, parts, mode)
9357 enum machine_mode mode;
9362 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9364 size = (GET_MODE_SIZE (mode) + 4) / 8;
9366 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9368 if (size < 2 || size > 3)
9371 /* Optimize constant pool reference to immediates. This is used by fp
9372 moves, that force all constants to memory to allow combining. */
9373 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9375 rtx tmp = maybe_get_pool_constant (operand);
9380 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9382 /* The only non-offsetable memories we handle are pushes. */
9383 if (! push_operand (operand, VOIDmode))
9386 operand = copy_rtx (operand);
9387 PUT_MODE (operand, Pmode);
9388 parts[0] = parts[1] = parts[2] = operand;
9390 else if (!TARGET_64BIT)
9393 split_di (&operand, 1, &parts[0], &parts[1]);
9396 if (REG_P (operand))
9398 if (!reload_completed)
9400 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9401 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9403 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9405 else if (offsettable_memref_p (operand))
9407 operand = adjust_address (operand, SImode, 0);
9409 parts[1] = adjust_address (operand, SImode, 4);
9411 parts[2] = adjust_address (operand, SImode, 8);
9413 else if (GET_CODE (operand) == CONST_DOUBLE)
9418 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9423 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9424 parts[2] = gen_int_mode (l[2], SImode);
9427 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9432 parts[1] = gen_int_mode (l[1], SImode);
9433 parts[0] = gen_int_mode (l[0], SImode);
9442 split_ti (&operand, 1, &parts[0], &parts[1]);
9443 if (mode == XFmode || mode == TFmode)
9445 if (REG_P (operand))
9447 if (!reload_completed)
9449 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9450 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9452 else if (offsettable_memref_p (operand))
9454 operand = adjust_address (operand, DImode, 0);
9456 parts[1] = adjust_address (operand, SImode, 8);
9458 else if (GET_CODE (operand) == CONST_DOUBLE)
9463 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9464 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9465 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9466 if (HOST_BITS_PER_WIDE_INT >= 64)
9469 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9470 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9473 parts[0] = immed_double_const (l[0], l[1], DImode);
9474 parts[1] = gen_int_mode (l[2], SImode);
9484 /* Emit insns to perform a move or push of DI, DF, and XF values.
9485 Return false when normal moves are needed; true when all required
9486 insns have been emitted. Operands 2-4 contain the input values
9487 int the correct order; operands 5-7 contain the output values. */
9490 ix86_split_long_move (operands)
9497 enum machine_mode mode = GET_MODE (operands[0]);
9499 /* The DFmode expanders may ask us to move double.
9500 For 64bit target this is single move. By hiding the fact
9501 here we simplify i386.md splitters. */
9502 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9504 /* Optimize constant pool reference to immediates. This is used by
9505 fp moves, that force all constants to memory to allow combining. */
9507 if (GET_CODE (operands[1]) == MEM
9508 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9509 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9510 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9511 if (push_operand (operands[0], VOIDmode))
9513 operands[0] = copy_rtx (operands[0]);
9514 PUT_MODE (operands[0], Pmode);
9517 operands[0] = gen_lowpart (DImode, operands[0]);
9518 operands[1] = gen_lowpart (DImode, operands[1]);
9519 emit_move_insn (operands[0], operands[1]);
9523 /* The only non-offsettable memory we handle is push. */
9524 if (push_operand (operands[0], VOIDmode))
9526 else if (GET_CODE (operands[0]) == MEM
9527 && ! offsettable_memref_p (operands[0]))
9530 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9531 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9533 /* When emitting push, take care for source operands on the stack. */
9534 if (push && GET_CODE (operands[1]) == MEM
9535 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9538 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9539 XEXP (part[1][2], 0));
9540 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9541 XEXP (part[1][1], 0));
9544 /* We need to do copy in the right order in case an address register
9545 of the source overlaps the destination. */
9546 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9548 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9550 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9553 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9556 /* Collision in the middle part can be handled by reordering. */
9557 if (collisions == 1 && nparts == 3
9558 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9561 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9562 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9565 /* If there are more collisions, we can't handle it by reordering.
9566 Do an lea to the last part and use only one colliding move. */
9567 else if (collisions > 1)
9570 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9571 XEXP (part[1][0], 0)));
9572 part[1][0] = change_address (part[1][0],
9573 TARGET_64BIT ? DImode : SImode,
9574 part[0][nparts - 1]);
9575 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9577 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9587 /* We use only first 12 bytes of TFmode value, but for pushing we
9588 are required to adjust stack as if we were pushing real 16byte
9590 if (mode == TFmode && !TARGET_64BIT)
9591 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9593 emit_move_insn (part[0][2], part[1][2]);
9598 /* In 64bit mode we don't have 32bit push available. In case this is
9599 register, it is OK - we will just use larger counterpart. We also
9600 retype memory - these comes from attempt to avoid REX prefix on
9601 moving of second half of TFmode value. */
9602 if (GET_MODE (part[1][1]) == SImode)
9604 if (GET_CODE (part[1][1]) == MEM)
9605 part[1][1] = adjust_address (part[1][1], DImode, 0);
9606 else if (REG_P (part[1][1]))
9607 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9610 if (GET_MODE (part[1][0]) == SImode)
9611 part[1][0] = part[1][1];
9614 emit_move_insn (part[0][1], part[1][1]);
9615 emit_move_insn (part[0][0], part[1][0]);
9619 /* Choose correct order to not overwrite the source before it is copied. */
9620 if ((REG_P (part[0][0])
9621 && REG_P (part[1][1])
9622 && (REGNO (part[0][0]) == REGNO (part[1][1])
9624 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9626 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9630 operands[2] = part[0][2];
9631 operands[3] = part[0][1];
9632 operands[4] = part[0][0];
9633 operands[5] = part[1][2];
9634 operands[6] = part[1][1];
9635 operands[7] = part[1][0];
9639 operands[2] = part[0][1];
9640 operands[3] = part[0][0];
9641 operands[5] = part[1][1];
9642 operands[6] = part[1][0];
9649 operands[2] = part[0][0];
9650 operands[3] = part[0][1];
9651 operands[4] = part[0][2];
9652 operands[5] = part[1][0];
9653 operands[6] = part[1][1];
9654 operands[7] = part[1][2];
9658 operands[2] = part[0][0];
9659 operands[3] = part[0][1];
9660 operands[5] = part[1][0];
9661 operands[6] = part[1][1];
9664 emit_move_insn (operands[2], operands[5]);
9665 emit_move_insn (operands[3], operands[6]);
9667 emit_move_insn (operands[4], operands[7]);
9673 ix86_split_ashldi (operands, scratch)
9674 rtx *operands, scratch;
9676 rtx low[2], high[2];
9679 if (GET_CODE (operands[2]) == CONST_INT)
9681 split_di (operands, 2, low, high);
9682 count = INTVAL (operands[2]) & 63;
9686 emit_move_insn (high[0], low[1]);
9687 emit_move_insn (low[0], const0_rtx);
9690 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9694 if (!rtx_equal_p (operands[0], operands[1]))
9695 emit_move_insn (operands[0], operands[1]);
9696 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9697 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9702 if (!rtx_equal_p (operands[0], operands[1]))
9703 emit_move_insn (operands[0], operands[1]);
9705 split_di (operands, 1, low, high);
9707 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9708 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9710 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9712 if (! no_new_pseudos)
9713 scratch = force_reg (SImode, const0_rtx);
9715 emit_move_insn (scratch, const0_rtx);
9717 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9721 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9726 ix86_split_ashrdi (operands, scratch)
9727 rtx *operands, scratch;
9729 rtx low[2], high[2];
9732 if (GET_CODE (operands[2]) == CONST_INT)
9734 split_di (operands, 2, low, high);
9735 count = INTVAL (operands[2]) & 63;
9739 emit_move_insn (low[0], high[1]);
9741 if (! reload_completed)
9742 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9745 emit_move_insn (high[0], low[0]);
9746 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9750 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9754 if (!rtx_equal_p (operands[0], operands[1]))
9755 emit_move_insn (operands[0], operands[1]);
9756 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9757 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9762 if (!rtx_equal_p (operands[0], operands[1]))
9763 emit_move_insn (operands[0], operands[1]);
9765 split_di (operands, 1, low, high);
9767 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9768 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9770 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9772 if (! no_new_pseudos)
9773 scratch = gen_reg_rtx (SImode);
9774 emit_move_insn (scratch, high[0]);
9775 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9776 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9780 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9785 ix86_split_lshrdi (operands, scratch)
9786 rtx *operands, scratch;
9788 rtx low[2], high[2];
9791 if (GET_CODE (operands[2]) == CONST_INT)
9793 split_di (operands, 2, low, high);
9794 count = INTVAL (operands[2]) & 63;
9798 emit_move_insn (low[0], high[1]);
9799 emit_move_insn (high[0], const0_rtx);
9802 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9806 if (!rtx_equal_p (operands[0], operands[1]))
9807 emit_move_insn (operands[0], operands[1]);
9808 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9809 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9814 if (!rtx_equal_p (operands[0], operands[1]))
9815 emit_move_insn (operands[0], operands[1]);
9817 split_di (operands, 1, low, high);
9819 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9820 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9822 /* Heh. By reversing the arguments, we can reuse this pattern. */
9823 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9825 if (! no_new_pseudos)
9826 scratch = force_reg (SImode, const0_rtx);
9828 emit_move_insn (scratch, const0_rtx);
9830 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9834 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9838 /* Helper function for the string operations below. Dest VARIABLE whether
9839 it is aligned to VALUE bytes. If true, jump to the label. */
9841 ix86_expand_aligntest (variable, value)
9845 rtx label = gen_label_rtx ();
9846 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9847 if (GET_MODE (variable) == DImode)
9848 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9850 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9851 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9856 /* Adjust COUNTER by the VALUE. */
9858 ix86_adjust_counter (countreg, value)
9860 HOST_WIDE_INT value;
9862 if (GET_MODE (countreg) == DImode)
9863 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9865 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9868 /* Zero extend possibly SImode EXP to Pmode register. */
9870 ix86_zero_extend_to_Pmode (exp)
9874 if (GET_MODE (exp) == VOIDmode)
9875 return force_reg (Pmode, exp);
9876 if (GET_MODE (exp) == Pmode)
9877 return copy_to_mode_reg (Pmode, exp);
9878 r = gen_reg_rtx (Pmode);
9879 emit_insn (gen_zero_extendsidi2 (r, exp));
9883 /* Expand string move (memcpy) operation. Use i386 string operations when
9884 profitable. expand_clrstr contains similar code. */
9886 ix86_expand_movstr (dst, src, count_exp, align_exp)
9887 rtx dst, src, count_exp, align_exp;
9889 rtx srcreg, destreg, countreg;
9890 enum machine_mode counter_mode;
9891 HOST_WIDE_INT align = 0;
9892 unsigned HOST_WIDE_INT count = 0;
9897 if (GET_CODE (align_exp) == CONST_INT)
9898 align = INTVAL (align_exp);
9900 /* This simple hack avoids all inlining code and simplifies code below. */
9901 if (!TARGET_ALIGN_STRINGOPS)
9904 if (GET_CODE (count_exp) == CONST_INT)
9905 count = INTVAL (count_exp);
9907 /* Figure out proper mode for counter. For 32bits it is always SImode,
9908 for 64bits use SImode when possible, otherwise DImode.
9909 Set count to number of bytes copied when known at compile time. */
9910 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9911 || x86_64_zero_extended_value (count_exp))
9912 counter_mode = SImode;
9914 counter_mode = DImode;
9916 if (counter_mode != SImode && counter_mode != DImode)
9919 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9920 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9922 emit_insn (gen_cld ());
9924 /* When optimizing for size emit simple rep ; movsb instruction for
9925 counts not divisible by 4. */
9927 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9929 countreg = ix86_zero_extend_to_Pmode (count_exp);
9931 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9932 destreg, srcreg, countreg));
9934 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9935 destreg, srcreg, countreg));
9938 /* For constant aligned (or small unaligned) copies use rep movsl
9939 followed by code copying the rest. For PentiumPro ensure 8 byte
9940 alignment to allow rep movsl acceleration. */
9944 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9945 || optimize_size || count < (unsigned int) 64))
9947 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9948 if (count & ~(size - 1))
9950 countreg = copy_to_mode_reg (counter_mode,
9951 GEN_INT ((count >> (size == 4 ? 2 : 3))
9952 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9953 countreg = ix86_zero_extend_to_Pmode (countreg);
9957 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9958 destreg, srcreg, countreg));
9960 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9961 destreg, srcreg, countreg));
9964 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9965 destreg, srcreg, countreg));
9967 if (size == 8 && (count & 0x04))
9968 emit_insn (gen_strmovsi (destreg, srcreg));
9970 emit_insn (gen_strmovhi (destreg, srcreg));
9972 emit_insn (gen_strmovqi (destreg, srcreg));
9974 /* The generic code based on the glibc implementation:
9975 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9976 allowing accelerated copying there)
9977 - copy the data using rep movsl
9983 int desired_alignment = (TARGET_PENTIUMPRO
9984 && (count == 0 || count >= (unsigned int) 260)
9985 ? 8 : UNITS_PER_WORD);
9987 /* In case we don't know anything about the alignment, default to
9988 library version, since it is usually equally fast and result in
9990 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9996 if (TARGET_SINGLE_STRINGOP)
9997 emit_insn (gen_cld ());
9999 countreg2 = gen_reg_rtx (Pmode);
10000 countreg = copy_to_mode_reg (counter_mode, count_exp);
10002 /* We don't use loops to align destination and to copy parts smaller
10003 than 4 bytes, because gcc is able to optimize such code better (in
10004 the case the destination or the count really is aligned, gcc is often
10005 able to predict the branches) and also it is friendlier to the
10006 hardware branch prediction.
10008 Using loops is benefical for generic case, because we can
10009 handle small counts using the loops. Many CPUs (such as Athlon)
10010 have large REP prefix setup costs.
10012 This is quite costy. Maybe we can revisit this decision later or
10013 add some customizability to this code. */
10015 if (count == 0 && align < desired_alignment)
10017 label = gen_label_rtx ();
10018 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10019 LEU, 0, counter_mode, 1, label);
10023 rtx label = ix86_expand_aligntest (destreg, 1);
10024 emit_insn (gen_strmovqi (destreg, srcreg));
10025 ix86_adjust_counter (countreg, 1);
10026 emit_label (label);
10027 LABEL_NUSES (label) = 1;
10031 rtx label = ix86_expand_aligntest (destreg, 2);
10032 emit_insn (gen_strmovhi (destreg, srcreg));
10033 ix86_adjust_counter (countreg, 2);
10034 emit_label (label);
10035 LABEL_NUSES (label) = 1;
10037 if (align <= 4 && desired_alignment > 4)
10039 rtx label = ix86_expand_aligntest (destreg, 4);
10040 emit_insn (gen_strmovsi (destreg, srcreg));
10041 ix86_adjust_counter (countreg, 4);
10042 emit_label (label);
10043 LABEL_NUSES (label) = 1;
10046 if (label && desired_alignment > 4 && !TARGET_64BIT)
10048 emit_label (label);
10049 LABEL_NUSES (label) = 1;
10052 if (!TARGET_SINGLE_STRINGOP)
10053 emit_insn (gen_cld ());
10056 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10058 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10059 destreg, srcreg, countreg2));
10063 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10064 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10065 destreg, srcreg, countreg2));
10070 emit_label (label);
10071 LABEL_NUSES (label) = 1;
10073 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10074 emit_insn (gen_strmovsi (destreg, srcreg));
10075 if ((align <= 4 || count == 0) && TARGET_64BIT)
10077 rtx label = ix86_expand_aligntest (countreg, 4);
10078 emit_insn (gen_strmovsi (destreg, srcreg));
10079 emit_label (label);
10080 LABEL_NUSES (label) = 1;
10082 if (align > 2 && count != 0 && (count & 2))
10083 emit_insn (gen_strmovhi (destreg, srcreg));
10084 if (align <= 2 || count == 0)
10086 rtx label = ix86_expand_aligntest (countreg, 2);
10087 emit_insn (gen_strmovhi (destreg, srcreg));
10088 emit_label (label);
10089 LABEL_NUSES (label) = 1;
10091 if (align > 1 && count != 0 && (count & 1))
10092 emit_insn (gen_strmovqi (destreg, srcreg));
10093 if (align <= 1 || count == 0)
10095 rtx label = ix86_expand_aligntest (countreg, 1);
10096 emit_insn (gen_strmovqi (destreg, srcreg));
10097 emit_label (label);
10098 LABEL_NUSES (label) = 1;
10102 insns = get_insns ();
10105 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10110 /* Expand string clear operation (bzero). Use i386 string operations when
10111 profitable. expand_movstr contains similar code. */
10113 ix86_expand_clrstr (src, count_exp, align_exp)
10114 rtx src, count_exp, align_exp;
10116 rtx destreg, zeroreg, countreg;
10117 enum machine_mode counter_mode;
10118 HOST_WIDE_INT align = 0;
10119 unsigned HOST_WIDE_INT count = 0;
10121 if (GET_CODE (align_exp) == CONST_INT)
10122 align = INTVAL (align_exp);
10124 /* This simple hack avoids all inlining code and simplifies code below. */
10125 if (!TARGET_ALIGN_STRINGOPS)
10128 if (GET_CODE (count_exp) == CONST_INT)
10129 count = INTVAL (count_exp);
10130 /* Figure out proper mode for counter. For 32bits it is always SImode,
10131 for 64bits use SImode when possible, otherwise DImode.
10132 Set count to number of bytes copied when known at compile time. */
10133 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10134 || x86_64_zero_extended_value (count_exp))
10135 counter_mode = SImode;
10137 counter_mode = DImode;
10139 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10141 emit_insn (gen_cld ());
10143 /* When optimizing for size emit simple rep ; movsb instruction for
10144 counts not divisible by 4. */
10146 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10148 countreg = ix86_zero_extend_to_Pmode (count_exp);
10149 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10151 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10152 destreg, countreg));
10154 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10155 destreg, countreg));
10157 else if (count != 0
10159 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10160 || optimize_size || count < (unsigned int) 64))
10162 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10163 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10164 if (count & ~(size - 1))
10166 countreg = copy_to_mode_reg (counter_mode,
10167 GEN_INT ((count >> (size == 4 ? 2 : 3))
10168 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10169 countreg = ix86_zero_extend_to_Pmode (countreg);
10173 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10174 destreg, countreg));
10176 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10177 destreg, countreg));
10180 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10181 destreg, countreg));
10183 if (size == 8 && (count & 0x04))
10184 emit_insn (gen_strsetsi (destreg,
10185 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10187 emit_insn (gen_strsethi (destreg,
10188 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10190 emit_insn (gen_strsetqi (destreg,
10191 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10197 /* Compute desired alignment of the string operation. */
10198 int desired_alignment = (TARGET_PENTIUMPRO
10199 && (count == 0 || count >= (unsigned int) 260)
10200 ? 8 : UNITS_PER_WORD);
10202 /* In case we don't know anything about the alignment, default to
10203 library version, since it is usually equally fast and result in
10205 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10208 if (TARGET_SINGLE_STRINGOP)
10209 emit_insn (gen_cld ());
10211 countreg2 = gen_reg_rtx (Pmode);
10212 countreg = copy_to_mode_reg (counter_mode, count_exp);
10213 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10215 if (count == 0 && align < desired_alignment)
10217 label = gen_label_rtx ();
10218 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10219 LEU, 0, counter_mode, 1, label);
10223 rtx label = ix86_expand_aligntest (destreg, 1);
10224 emit_insn (gen_strsetqi (destreg,
10225 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10226 ix86_adjust_counter (countreg, 1);
10227 emit_label (label);
10228 LABEL_NUSES (label) = 1;
10232 rtx label = ix86_expand_aligntest (destreg, 2);
10233 emit_insn (gen_strsethi (destreg,
10234 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10235 ix86_adjust_counter (countreg, 2);
10236 emit_label (label);
10237 LABEL_NUSES (label) = 1;
10239 if (align <= 4 && desired_alignment > 4)
10241 rtx label = ix86_expand_aligntest (destreg, 4);
10242 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10243 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10245 ix86_adjust_counter (countreg, 4);
10246 emit_label (label);
10247 LABEL_NUSES (label) = 1;
10250 if (label && desired_alignment > 4 && !TARGET_64BIT)
10252 emit_label (label);
10253 LABEL_NUSES (label) = 1;
10257 if (!TARGET_SINGLE_STRINGOP)
10258 emit_insn (gen_cld ());
10261 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10263 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10264 destreg, countreg2));
10268 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10269 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10270 destreg, countreg2));
10274 emit_label (label);
10275 LABEL_NUSES (label) = 1;
10278 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10279 emit_insn (gen_strsetsi (destreg,
10280 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10281 if (TARGET_64BIT && (align <= 4 || count == 0))
10283 rtx label = ix86_expand_aligntest (countreg, 4);
10284 emit_insn (gen_strsetsi (destreg,
10285 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10286 emit_label (label);
10287 LABEL_NUSES (label) = 1;
10289 if (align > 2 && count != 0 && (count & 2))
10290 emit_insn (gen_strsethi (destreg,
10291 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10292 if (align <= 2 || count == 0)
10294 rtx label = ix86_expand_aligntest (countreg, 2);
10295 emit_insn (gen_strsethi (destreg,
10296 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10297 emit_label (label);
10298 LABEL_NUSES (label) = 1;
10300 if (align > 1 && count != 0 && (count & 1))
10301 emit_insn (gen_strsetqi (destreg,
10302 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10303 if (align <= 1 || count == 0)
10305 rtx label = ix86_expand_aligntest (countreg, 1);
10306 emit_insn (gen_strsetqi (destreg,
10307 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10308 emit_label (label);
10309 LABEL_NUSES (label) = 1;
10314 /* Expand strlen. */
10316 ix86_expand_strlen (out, src, eoschar, align)
10317 rtx out, src, eoschar, align;
10319 rtx addr, scratch1, scratch2, scratch3, scratch4;
10321 /* The generic case of strlen expander is long. Avoid it's
10322 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10324 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10325 && !TARGET_INLINE_ALL_STRINGOPS
10327 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10330 addr = force_reg (Pmode, XEXP (src, 0));
10331 scratch1 = gen_reg_rtx (Pmode);
10333 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10336 /* Well it seems that some optimizer does not combine a call like
10337 foo(strlen(bar), strlen(bar));
10338 when the move and the subtraction is done here. It does calculate
10339 the length just once when these instructions are done inside of
10340 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10341 often used and I use one fewer register for the lifetime of
10342 output_strlen_unroll() this is better. */
10344 emit_move_insn (out, addr);
10346 ix86_expand_strlensi_unroll_1 (out, align);
10348 /* strlensi_unroll_1 returns the address of the zero at the end of
10349 the string, like memchr(), so compute the length by subtracting
10350 the start address. */
10352 emit_insn (gen_subdi3 (out, out, addr));
10354 emit_insn (gen_subsi3 (out, out, addr));
10358 scratch2 = gen_reg_rtx (Pmode);
10359 scratch3 = gen_reg_rtx (Pmode);
10360 scratch4 = force_reg (Pmode, constm1_rtx);
10362 emit_move_insn (scratch3, addr);
10363 eoschar = force_reg (QImode, eoschar);
10365 emit_insn (gen_cld ());
10368 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10369 align, scratch4, scratch3));
10370 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10371 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10375 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10376 align, scratch4, scratch3));
10377 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10378 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10384 /* Expand the appropriate insns for doing strlen if not just doing
10387 out = result, initialized with the start address
10388 align_rtx = alignment of the address.
10389 scratch = scratch register, initialized with the startaddress when
10390 not aligned, otherwise undefined
10392 This is just the body. It needs the initialisations mentioned above and
10393 some address computing at the end. These things are done in i386.md. */
10396 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10397 rtx out, align_rtx;
10401 rtx align_2_label = NULL_RTX;
10402 rtx align_3_label = NULL_RTX;
10403 rtx align_4_label = gen_label_rtx ();
10404 rtx end_0_label = gen_label_rtx ();
10406 rtx tmpreg = gen_reg_rtx (SImode);
10407 rtx scratch = gen_reg_rtx (SImode);
10410 if (GET_CODE (align_rtx) == CONST_INT)
10411 align = INTVAL (align_rtx);
10413 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10415 /* Is there a known alignment and is it less than 4? */
10418 rtx scratch1 = gen_reg_rtx (Pmode);
10419 emit_move_insn (scratch1, out);
10420 /* Is there a known alignment and is it not 2? */
10423 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10424 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10426 /* Leave just the 3 lower bits. */
10427 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10428 NULL_RTX, 0, OPTAB_WIDEN);
10430 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10431 Pmode, 1, align_4_label);
10432 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10433 Pmode, 1, align_2_label);
10434 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10435 Pmode, 1, align_3_label);
10439 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10440 check if is aligned to 4 - byte. */
10442 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10443 NULL_RTX, 0, OPTAB_WIDEN);
10445 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10446 Pmode, 1, align_4_label);
10449 mem = gen_rtx_MEM (QImode, out);
10451 /* Now compare the bytes. */
10453 /* Compare the first n unaligned byte on a byte per byte basis. */
10454 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10455 QImode, 1, end_0_label);
10457 /* Increment the address. */
10459 emit_insn (gen_adddi3 (out, out, const1_rtx));
10461 emit_insn (gen_addsi3 (out, out, const1_rtx));
10463 /* Not needed with an alignment of 2 */
10466 emit_label (align_2_label);
10468 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10472 emit_insn (gen_adddi3 (out, out, const1_rtx));
10474 emit_insn (gen_addsi3 (out, out, const1_rtx));
10476 emit_label (align_3_label);
10479 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10483 emit_insn (gen_adddi3 (out, out, const1_rtx));
10485 emit_insn (gen_addsi3 (out, out, const1_rtx));
10488 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10489 align this loop. It gives only huge programs, but does not help to
10491 emit_label (align_4_label);
10493 mem = gen_rtx_MEM (SImode, out);
10494 emit_move_insn (scratch, mem);
10496 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10498 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10500 /* This formula yields a nonzero result iff one of the bytes is zero.
10501 This saves three branches inside loop and many cycles. */
10503 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10504 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10505 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10506 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10507 gen_int_mode (0x80808080, SImode)));
10508 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10513 rtx reg = gen_reg_rtx (SImode);
10514 rtx reg2 = gen_reg_rtx (Pmode);
10515 emit_move_insn (reg, tmpreg);
10516 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10518 /* If zero is not in the first two bytes, move two bytes forward. */
10519 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10520 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10521 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10522 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10523 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10526 /* Emit lea manually to avoid clobbering of flags. */
10527 emit_insn (gen_rtx_SET (SImode, reg2,
10528 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10530 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10531 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10532 emit_insn (gen_rtx_SET (VOIDmode, out,
10533 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10540 rtx end_2_label = gen_label_rtx ();
10541 /* Is zero in the first two bytes? */
10543 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10544 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10545 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10546 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10547 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10549 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10550 JUMP_LABEL (tmp) = end_2_label;
10552 /* Not in the first two. Move two bytes forward. */
10553 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10555 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10557 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10559 emit_label (end_2_label);
10563 /* Avoid branch in fixing the byte. */
10564 tmpreg = gen_lowpart (QImode, tmpreg);
10565 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10567 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10569 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10571 emit_label (end_0_label);
10575 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10576 rtx retval, fnaddr, callarg1, callarg2, pop;
10578 rtx use = NULL, call;
10580 if (pop == const0_rtx)
10582 if (TARGET_64BIT && pop)
10586 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10587 fnaddr = machopic_indirect_call_target (fnaddr);
10589 /* Static functions and indirect calls don't need the pic register. */
10590 if (! TARGET_64BIT && flag_pic
10591 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10592 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10593 use_reg (&use, pic_offset_table_rtx);
10595 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10597 rtx al = gen_rtx_REG (QImode, 0);
10598 emit_move_insn (al, callarg2);
10599 use_reg (&use, al);
10601 #endif /* TARGET_MACHO */
10603 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10605 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10606 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10609 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10611 call = gen_rtx_SET (VOIDmode, retval, call);
10614 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10615 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10616 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10619 call = emit_call_insn (call);
10621 CALL_INSN_FUNCTION_USAGE (call) = use;
10625 /* Clear stack slot assignments remembered from previous functions.
10626 This is called from INIT_EXPANDERS once before RTL is emitted for each
10629 static struct machine_function *
10630 ix86_init_machine_status ()
10632 return ggc_alloc_cleared (sizeof (struct machine_function));
10635 /* Return a MEM corresponding to a stack slot with mode MODE.
10636 Allocate a new slot if necessary.
10638 The RTL for a function can have several slots available: N is
10639 which slot to use. */
10642 assign_386_stack_local (mode, n)
10643 enum machine_mode mode;
10646 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10649 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10650 ix86_stack_locals[(int) mode][n]
10651 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10653 return ix86_stack_locals[(int) mode][n];
10656 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10658 static GTY(()) rtx ix86_tls_symbol;
10660 ix86_tls_get_addr ()
10663 if (!ix86_tls_symbol)
10665 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10666 ? "___tls_get_addr"
10667 : "__tls_get_addr"));
10670 return ix86_tls_symbol;
10673 /* Calculate the length of the memory address in the instruction
10674 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10677 memory_address_length (addr)
10680 struct ix86_address parts;
10681 rtx base, index, disp;
10684 if (GET_CODE (addr) == PRE_DEC
10685 || GET_CODE (addr) == POST_INC
10686 || GET_CODE (addr) == PRE_MODIFY
10687 || GET_CODE (addr) == POST_MODIFY)
10690 if (! ix86_decompose_address (addr, &parts))
10694 index = parts.index;
10698 /* Register Indirect. */
10699 if (base && !index && !disp)
10701 /* Special cases: ebp and esp need the two-byte modrm form. */
10702 if (addr == stack_pointer_rtx
10703 || addr == arg_pointer_rtx
10704 || addr == frame_pointer_rtx
10705 || addr == hard_frame_pointer_rtx)
10709 /* Direct Addressing. */
10710 else if (disp && !base && !index)
10715 /* Find the length of the displacement constant. */
10718 if (GET_CODE (disp) == CONST_INT
10719 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10725 /* An index requires the two-byte modrm form. */
10733 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10734 is set, expect that insn have 8bit immediate alternative. */
10736 ix86_attr_length_immediate_default (insn, shortform)
10742 extract_insn_cached (insn);
10743 for (i = recog_data.n_operands - 1; i >= 0; --i)
10744 if (CONSTANT_P (recog_data.operand[i]))
10749 && GET_CODE (recog_data.operand[i]) == CONST_INT
10750 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10754 switch (get_attr_mode (insn))
10765 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10770 fatal_insn ("unknown insn mode", insn);
10776 /* Compute default value for "length_address" attribute. */
10778 ix86_attr_length_address_default (insn)
10782 extract_insn_cached (insn);
10783 for (i = recog_data.n_operands - 1; i >= 0; --i)
10784 if (GET_CODE (recog_data.operand[i]) == MEM)
10786 return memory_address_length (XEXP (recog_data.operand[i], 0));
10792 /* Return the maximum number of instructions a cpu can issue. */
10799 case PROCESSOR_PENTIUM:
10803 case PROCESSOR_PENTIUMPRO:
10804 case PROCESSOR_PENTIUM4:
10805 case PROCESSOR_ATHLON:
10813 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10814 by DEP_INSN and nothing set by DEP_INSN. */
10817 ix86_flags_dependant (insn, dep_insn, insn_type)
10818 rtx insn, dep_insn;
10819 enum attr_type insn_type;
10823 /* Simplify the test for uninteresting insns. */
10824 if (insn_type != TYPE_SETCC
10825 && insn_type != TYPE_ICMOV
10826 && insn_type != TYPE_FCMOV
10827 && insn_type != TYPE_IBR)
10830 if ((set = single_set (dep_insn)) != 0)
10832 set = SET_DEST (set);
10835 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10836 && XVECLEN (PATTERN (dep_insn), 0) == 2
10837 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10838 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10840 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10841 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10846 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10849 /* This test is true if the dependent insn reads the flags but
10850 not any other potentially set register. */
10851 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10854 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10860 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10861 address with operands set by DEP_INSN. */
10864 ix86_agi_dependant (insn, dep_insn, insn_type)
10865 rtx insn, dep_insn;
10866 enum attr_type insn_type;
10870 if (insn_type == TYPE_LEA
10873 addr = PATTERN (insn);
10874 if (GET_CODE (addr) == SET)
10876 else if (GET_CODE (addr) == PARALLEL
10877 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10878 addr = XVECEXP (addr, 0, 0);
10881 addr = SET_SRC (addr);
10886 extract_insn_cached (insn);
10887 for (i = recog_data.n_operands - 1; i >= 0; --i)
10888 if (GET_CODE (recog_data.operand[i]) == MEM)
10890 addr = XEXP (recog_data.operand[i], 0);
10897 return modified_in_p (addr, dep_insn);
10901 ix86_adjust_cost (insn, link, dep_insn, cost)
10902 rtx insn, link, dep_insn;
10905 enum attr_type insn_type, dep_insn_type;
10906 enum attr_memory memory, dep_memory;
10908 int dep_insn_code_number;
10910 /* Anti and output depenancies have zero cost on all CPUs. */
10911 if (REG_NOTE_KIND (link) != 0)
10914 dep_insn_code_number = recog_memoized (dep_insn);
10916 /* If we can't recognize the insns, we can't really do anything. */
10917 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10920 insn_type = get_attr_type (insn);
10921 dep_insn_type = get_attr_type (dep_insn);
10925 case PROCESSOR_PENTIUM:
10926 /* Address Generation Interlock adds a cycle of latency. */
10927 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10930 /* ??? Compares pair with jump/setcc. */
10931 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10934 /* Floating point stores require value to be ready one cycle ealier. */
10935 if (insn_type == TYPE_FMOV
10936 && get_attr_memory (insn) == MEMORY_STORE
10937 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10941 case PROCESSOR_PENTIUMPRO:
10942 memory = get_attr_memory (insn);
10943 dep_memory = get_attr_memory (dep_insn);
10945 /* Since we can't represent delayed latencies of load+operation,
10946 increase the cost here for non-imov insns. */
10947 if (dep_insn_type != TYPE_IMOV
10948 && dep_insn_type != TYPE_FMOV
10949 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10952 /* INT->FP conversion is expensive. */
10953 if (get_attr_fp_int_src (dep_insn))
10956 /* There is one cycle extra latency between an FP op and a store. */
10957 if (insn_type == TYPE_FMOV
10958 && (set = single_set (dep_insn)) != NULL_RTX
10959 && (set2 = single_set (insn)) != NULL_RTX
10960 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10961 && GET_CODE (SET_DEST (set2)) == MEM)
10964 /* Show ability of reorder buffer to hide latency of load by executing
10965 in parallel with previous instruction in case
10966 previous instruction is not needed to compute the address. */
10967 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10968 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10970 /* Claim moves to take one cycle, as core can issue one load
10971 at time and the next load can start cycle later. */
10972 if (dep_insn_type == TYPE_IMOV
10973 || dep_insn_type == TYPE_FMOV)
10981 memory = get_attr_memory (insn);
10982 dep_memory = get_attr_memory (dep_insn);
10983 /* The esp dependency is resolved before the instruction is really
10985 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10986 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10989 /* Since we can't represent delayed latencies of load+operation,
10990 increase the cost here for non-imov insns. */
10991 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10992 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10994 /* INT->FP conversion is expensive. */
10995 if (get_attr_fp_int_src (dep_insn))
10998 /* Show ability of reorder buffer to hide latency of load by executing
10999 in parallel with previous instruction in case
11000 previous instruction is not needed to compute the address. */
11001 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11002 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11004 /* Claim moves to take one cycle, as core can issue one load
11005 at time and the next load can start cycle later. */
11006 if (dep_insn_type == TYPE_IMOV
11007 || dep_insn_type == TYPE_FMOV)
11016 case PROCESSOR_ATHLON:
11017 memory = get_attr_memory (insn);
11018 dep_memory = get_attr_memory (dep_insn);
11020 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11022 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11027 /* Show ability of reorder buffer to hide latency of load by executing
11028 in parallel with previous instruction in case
11029 previous instruction is not needed to compute the address. */
11030 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11031 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11033 /* Claim moves to take one cycle, as core can issue one load
11034 at time and the next load can start cycle later. */
11035 if (dep_insn_type == TYPE_IMOV
11036 || dep_insn_type == TYPE_FMOV)
11038 else if (cost >= 3)
11053 struct ppro_sched_data
11056 int issued_this_cycle;
11060 static enum attr_ppro_uops
11061 ix86_safe_ppro_uops (insn)
11064 if (recog_memoized (insn) >= 0)
11065 return get_attr_ppro_uops (insn);
11067 return PPRO_UOPS_MANY;
11071 ix86_dump_ppro_packet (dump)
11074 if (ix86_sched_data.ppro.decode[0])
11076 fprintf (dump, "PPRO packet: %d",
11077 INSN_UID (ix86_sched_data.ppro.decode[0]));
11078 if (ix86_sched_data.ppro.decode[1])
11079 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11080 if (ix86_sched_data.ppro.decode[2])
11081 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11082 fputc ('\n', dump);
11086 /* We're beginning a new block. Initialize data structures as necessary. */
11089 ix86_sched_init (dump, sched_verbose, veclen)
11090 FILE *dump ATTRIBUTE_UNUSED;
11091 int sched_verbose ATTRIBUTE_UNUSED;
11092 int veclen ATTRIBUTE_UNUSED;
11094 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11097 /* Shift INSN to SLOT, and shift everything else down. */
11100 ix86_reorder_insn (insnp, slot)
11107 insnp[0] = insnp[1];
11108 while (++insnp != slot);
11114 ix86_sched_reorder_ppro (ready, e_ready)
11119 enum attr_ppro_uops cur_uops;
11120 int issued_this_cycle;
11124 /* At this point .ppro.decode contains the state of the three
11125 decoders from last "cycle". That is, those insns that were
11126 actually independent. But here we're scheduling for the
11127 decoder, and we may find things that are decodable in the
11130 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11131 issued_this_cycle = 0;
11134 cur_uops = ix86_safe_ppro_uops (*insnp);
11136 /* If the decoders are empty, and we've a complex insn at the
11137 head of the priority queue, let it issue without complaint. */
11138 if (decode[0] == NULL)
11140 if (cur_uops == PPRO_UOPS_MANY)
11142 decode[0] = *insnp;
11146 /* Otherwise, search for a 2-4 uop unsn to issue. */
11147 while (cur_uops != PPRO_UOPS_FEW)
11149 if (insnp == ready)
11151 cur_uops = ix86_safe_ppro_uops (*--insnp);
11154 /* If so, move it to the head of the line. */
11155 if (cur_uops == PPRO_UOPS_FEW)
11156 ix86_reorder_insn (insnp, e_ready);
11158 /* Issue the head of the queue. */
11159 issued_this_cycle = 1;
11160 decode[0] = *e_ready--;
11163 /* Look for simple insns to fill in the other two slots. */
11164 for (i = 1; i < 3; ++i)
11165 if (decode[i] == NULL)
11167 if (ready > e_ready)
11171 cur_uops = ix86_safe_ppro_uops (*insnp);
11172 while (cur_uops != PPRO_UOPS_ONE)
11174 if (insnp == ready)
11176 cur_uops = ix86_safe_ppro_uops (*--insnp);
11179 /* Found one. Move it to the head of the queue and issue it. */
11180 if (cur_uops == PPRO_UOPS_ONE)
11182 ix86_reorder_insn (insnp, e_ready);
11183 decode[i] = *e_ready--;
11184 issued_this_cycle++;
11188 /* ??? Didn't find one. Ideally, here we would do a lazy split
11189 of 2-uop insns, issue one and queue the other. */
11193 if (issued_this_cycle == 0)
11194 issued_this_cycle = 1;
11195 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11198 /* We are about to being issuing insns for this clock cycle.
11199 Override the default sort algorithm to better slot instructions. */
11201 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11202 FILE *dump ATTRIBUTE_UNUSED;
11203 int sched_verbose ATTRIBUTE_UNUSED;
11206 int clock_var ATTRIBUTE_UNUSED;
11208 int n_ready = *n_readyp;
11209 rtx *e_ready = ready + n_ready - 1;
11211 /* Make sure to go ahead and initialize key items in
11212 ix86_sched_data if we are not going to bother trying to
11213 reorder the ready queue. */
11216 ix86_sched_data.ppro.issued_this_cycle = 1;
11225 case PROCESSOR_PENTIUMPRO:
11226 ix86_sched_reorder_ppro (ready, e_ready);
11231 return ix86_issue_rate ();
11234 /* We are about to issue INSN. Return the number of insns left on the
11235 ready queue that can be issued this cycle. */
11238 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11242 int can_issue_more;
11248 return can_issue_more - 1;
11250 case PROCESSOR_PENTIUMPRO:
11252 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11254 if (uops == PPRO_UOPS_MANY)
11257 ix86_dump_ppro_packet (dump);
11258 ix86_sched_data.ppro.decode[0] = insn;
11259 ix86_sched_data.ppro.decode[1] = NULL;
11260 ix86_sched_data.ppro.decode[2] = NULL;
11262 ix86_dump_ppro_packet (dump);
11263 ix86_sched_data.ppro.decode[0] = NULL;
11265 else if (uops == PPRO_UOPS_FEW)
11268 ix86_dump_ppro_packet (dump);
11269 ix86_sched_data.ppro.decode[0] = insn;
11270 ix86_sched_data.ppro.decode[1] = NULL;
11271 ix86_sched_data.ppro.decode[2] = NULL;
11275 for (i = 0; i < 3; ++i)
11276 if (ix86_sched_data.ppro.decode[i] == NULL)
11278 ix86_sched_data.ppro.decode[i] = insn;
11286 ix86_dump_ppro_packet (dump);
11287 ix86_sched_data.ppro.decode[0] = NULL;
11288 ix86_sched_data.ppro.decode[1] = NULL;
11289 ix86_sched_data.ppro.decode[2] = NULL;
11293 return --ix86_sched_data.ppro.issued_this_cycle;
11298 ia32_use_dfa_pipeline_interface ()
11300 if (ix86_cpu == PROCESSOR_PENTIUM)
11305 /* How many alternative schedules to try. This should be as wide as the
11306 scheduling freedom in the DFA, but no wider. Making this value too
11307 large results extra work for the scheduler. */
11310 ia32_multipass_dfa_lookahead ()
11312 if (ix86_cpu == PROCESSOR_PENTIUM)
11319 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11320 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11324 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11326 rtx dstref, srcref, dstreg, srcreg;
11330 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11332 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11336 /* Subroutine of above to actually do the updating by recursively walking
11340 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11342 rtx dstref, srcref, dstreg, srcreg;
11344 enum rtx_code code = GET_CODE (x);
11345 const char *format_ptr = GET_RTX_FORMAT (code);
11348 if (code == MEM && XEXP (x, 0) == dstreg)
11349 MEM_COPY_ATTRIBUTES (x, dstref);
11350 else if (code == MEM && XEXP (x, 0) == srcreg)
11351 MEM_COPY_ATTRIBUTES (x, srcref);
11353 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11355 if (*format_ptr == 'e')
11356 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11358 else if (*format_ptr == 'E')
11359 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11360 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11365 /* Compute the alignment given to a constant that is being placed in memory.
11366 EXP is the constant and ALIGN is the alignment that the object would
11368 The value of this function is used instead of that alignment to align
11372 ix86_constant_alignment (exp, align)
11376 if (TREE_CODE (exp) == REAL_CST)
11378 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11380 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11383 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11390 /* Compute the alignment for a static variable.
11391 TYPE is the data type, and ALIGN is the alignment that
11392 the object would ordinarily have. The value of this function is used
11393 instead of that alignment to align the object. */
11396 ix86_data_alignment (type, align)
11400 if (AGGREGATE_TYPE_P (type)
11401 && TYPE_SIZE (type)
11402 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11403 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11404 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11407 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11408 to 16byte boundary. */
11411 if (AGGREGATE_TYPE_P (type)
11412 && TYPE_SIZE (type)
11413 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11414 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11415 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11419 if (TREE_CODE (type) == ARRAY_TYPE)
11421 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11423 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11426 else if (TREE_CODE (type) == COMPLEX_TYPE)
11429 if (TYPE_MODE (type) == DCmode && align < 64)
11431 if (TYPE_MODE (type) == XCmode && align < 128)
11434 else if ((TREE_CODE (type) == RECORD_TYPE
11435 || TREE_CODE (type) == UNION_TYPE
11436 || TREE_CODE (type) == QUAL_UNION_TYPE)
11437 && TYPE_FIELDS (type))
11439 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11441 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11444 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11445 || TREE_CODE (type) == INTEGER_TYPE)
11447 if (TYPE_MODE (type) == DFmode && align < 64)
11449 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11456 /* Compute the alignment for a local variable.
11457 TYPE is the data type, and ALIGN is the alignment that
11458 the object would ordinarily have. The value of this macro is used
11459 instead of that alignment to align the object. */
11462 ix86_local_alignment (type, align)
11466 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11467 to 16byte boundary. */
11470 if (AGGREGATE_TYPE_P (type)
11471 && TYPE_SIZE (type)
11472 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11473 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11474 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11477 if (TREE_CODE (type) == ARRAY_TYPE)
11479 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11481 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11484 else if (TREE_CODE (type) == COMPLEX_TYPE)
11486 if (TYPE_MODE (type) == DCmode && align < 64)
11488 if (TYPE_MODE (type) == XCmode && align < 128)
11491 else if ((TREE_CODE (type) == RECORD_TYPE
11492 || TREE_CODE (type) == UNION_TYPE
11493 || TREE_CODE (type) == QUAL_UNION_TYPE)
11494 && TYPE_FIELDS (type))
11496 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11498 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11501 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11502 || TREE_CODE (type) == INTEGER_TYPE)
11505 if (TYPE_MODE (type) == DFmode && align < 64)
11507 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11513 /* Emit RTL insns to initialize the variable parts of a trampoline.
11514 FNADDR is an RTX for the address of the function's pure code.
11515 CXT is an RTX for the static chain value for the function. */
11517 x86_initialize_trampoline (tramp, fnaddr, cxt)
11518 rtx tramp, fnaddr, cxt;
11522 /* Compute offset from the end of the jmp to the target function. */
11523 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11524 plus_constant (tramp, 10),
11525 NULL_RTX, 1, OPTAB_DIRECT);
11526 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11527 gen_int_mode (0xb9, QImode));
11528 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11529 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11530 gen_int_mode (0xe9, QImode));
11531 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11536 /* Try to load address using shorter movl instead of movabs.
11537 We may want to support movq for kernel mode, but kernel does not use
11538 trampolines at the moment. */
11539 if (x86_64_zero_extended_value (fnaddr))
11541 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11542 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11543 gen_int_mode (0xbb41, HImode));
11544 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11545 gen_lowpart (SImode, fnaddr));
11550 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11551 gen_int_mode (0xbb49, HImode));
11552 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11556 /* Load static chain using movabs to r10. */
11557 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11558 gen_int_mode (0xba49, HImode));
11559 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11562 /* Jump to the r11 */
11563 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11564 gen_int_mode (0xff49, HImode));
11565 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11566 gen_int_mode (0xe3, QImode));
11568 if (offset > TRAMPOLINE_SIZE)
11573 #define def_builtin(MASK, NAME, TYPE, CODE) \
11575 if ((MASK) & target_flags) \
11576 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11577 NULL, NULL_TREE); \
11580 struct builtin_description
11582 const unsigned int mask;
11583 const enum insn_code icode;
11584 const char *const name;
11585 const enum ix86_builtins code;
11586 const enum rtx_code comparison;
11587 const unsigned int flag;
11590 /* Used for builtins that are enabled both by -msse and -msse2. */
11591 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11593 static const struct builtin_description bdesc_comi[] =
11595 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11596 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11597 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11598 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11599 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11600 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11601 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11602 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11603 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11604 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11605 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11606 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11607 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11608 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11609 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11610 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11611 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11612 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11613 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11614 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11615 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11616 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11617 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11618 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11621 static const struct builtin_description bdesc_2arg[] =
11624 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11625 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11626 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11627 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11628 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11629 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11630 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11631 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11633 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11634 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11635 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11636 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11637 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11638 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11639 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11640 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11641 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11642 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11643 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11644 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11645 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11646 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11647 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11648 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11649 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11650 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11651 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11652 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11653 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11654 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11655 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11656 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11658 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11659 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11660 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11661 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11663 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11664 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11665 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11666 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11667 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11670 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11671 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11672 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11673 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11674 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11675 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11677 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11678 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11679 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11680 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11681 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11682 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11683 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11684 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11686 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11687 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11688 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11690 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11691 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11692 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11693 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11695 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11696 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11698 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11699 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11700 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11701 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11702 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11703 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11705 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11706 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11707 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11708 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11710 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11711 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11712 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11713 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11714 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11715 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11718 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11719 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11720 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11722 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11723 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11725 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11726 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11727 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11728 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11729 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11730 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11732 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11733 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11734 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11735 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11736 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11737 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11739 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11740 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11741 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11742 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11744 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11745 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11750 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11751 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11752 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11753 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11754 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11755 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11757 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11758 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11759 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11760 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11761 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11762 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11763 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11764 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11765 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11766 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11767 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11768 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11769 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11770 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11771 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11772 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11773 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11774 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11775 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11776 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11777 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11778 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11779 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11780 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11782 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11783 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11784 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11785 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11787 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11788 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11789 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11790 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11792 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11793 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11794 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11797 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11798 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11799 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11800 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11801 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11802 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11803 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11804 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11806 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11807 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11808 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11809 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11810 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11811 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11812 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11813 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11815 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11816 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11817 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11818 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11820 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11821 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11822 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11823 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11825 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11826 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11828 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11829 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11830 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11831 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11832 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11833 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11835 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11836 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11837 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11838 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11840 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11841 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11842 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11843 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11844 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11845 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11847 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11848 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11849 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11851 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11852 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11854 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11855 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11856 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11857 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11858 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11859 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11861 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11862 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11863 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11864 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11865 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11866 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11868 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11869 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11870 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11871 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11873 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11875 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11876 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11877 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11880 static const struct builtin_description bdesc_1arg[] =
11882 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11883 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11885 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11886 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11887 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11889 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11890 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11891 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11892 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11894 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11898 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11905 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11909 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11914 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11915 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11920 ix86_init_builtins ()
11923 ix86_init_mmx_sse_builtins ();
11926 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11927 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11930 ix86_init_mmx_sse_builtins ()
11932 const struct builtin_description * d;
11935 tree pchar_type_node = build_pointer_type (char_type_node);
11936 tree pfloat_type_node = build_pointer_type (float_type_node);
11937 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11938 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11939 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11942 tree int_ftype_v4sf_v4sf
11943 = build_function_type_list (integer_type_node,
11944 V4SF_type_node, V4SF_type_node, NULL_TREE);
11945 tree v4si_ftype_v4sf_v4sf
11946 = build_function_type_list (V4SI_type_node,
11947 V4SF_type_node, V4SF_type_node, NULL_TREE);
11948 /* MMX/SSE/integer conversions. */
11949 tree int_ftype_v4sf
11950 = build_function_type_list (integer_type_node,
11951 V4SF_type_node, NULL_TREE);
11952 tree int_ftype_v8qi
11953 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
11954 tree v4sf_ftype_v4sf_int
11955 = build_function_type_list (V4SF_type_node,
11956 V4SF_type_node, integer_type_node, NULL_TREE);
11957 tree v4sf_ftype_v4sf_v2si
11958 = build_function_type_list (V4SF_type_node,
11959 V4SF_type_node, V2SI_type_node, NULL_TREE);
11960 tree int_ftype_v4hi_int
11961 = build_function_type_list (integer_type_node,
11962 V4HI_type_node, integer_type_node, NULL_TREE);
11963 tree v4hi_ftype_v4hi_int_int
11964 = build_function_type_list (V4HI_type_node, V4HI_type_node,
11965 integer_type_node, integer_type_node,
11967 /* Miscellaneous. */
11968 tree v8qi_ftype_v4hi_v4hi
11969 = build_function_type_list (V8QI_type_node,
11970 V4HI_type_node, V4HI_type_node, NULL_TREE);
11971 tree v4hi_ftype_v2si_v2si
11972 = build_function_type_list (V4HI_type_node,
11973 V2SI_type_node, V2SI_type_node, NULL_TREE);
11974 tree v4sf_ftype_v4sf_v4sf_int
11975 = build_function_type_list (V4SF_type_node,
11976 V4SF_type_node, V4SF_type_node,
11977 integer_type_node, NULL_TREE);
11978 tree v2si_ftype_v4hi_v4hi
11979 = build_function_type_list (V2SI_type_node,
11980 V4HI_type_node, V4HI_type_node, NULL_TREE);
11981 tree v4hi_ftype_v4hi_int
11982 = build_function_type_list (V4HI_type_node,
11983 V4HI_type_node, integer_type_node, NULL_TREE);
11984 tree v4hi_ftype_v4hi_di
11985 = build_function_type_list (V4HI_type_node,
11986 V4HI_type_node, long_long_unsigned_type_node,
11988 tree v2si_ftype_v2si_di
11989 = build_function_type_list (V2SI_type_node,
11990 V2SI_type_node, long_long_unsigned_type_node,
11992 tree void_ftype_void
11993 = build_function_type (void_type_node, void_list_node);
11994 tree void_ftype_unsigned
11995 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
11996 tree unsigned_ftype_void
11997 = build_function_type (unsigned_type_node, void_list_node);
11999 = build_function_type (long_long_unsigned_type_node, void_list_node);
12000 tree v4sf_ftype_void
12001 = build_function_type (V4SF_type_node, void_list_node);
12002 tree v2si_ftype_v4sf
12003 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12004 /* Loads/stores. */
12005 tree void_ftype_v8qi_v8qi_pchar
12006 = build_function_type_list (void_type_node,
12007 V8QI_type_node, V8QI_type_node,
12008 pchar_type_node, NULL_TREE);
12009 tree v4sf_ftype_pfloat
12010 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12011 /* @@@ the type is bogus */
12012 tree v4sf_ftype_v4sf_pv2si
12013 = build_function_type_list (V4SF_type_node,
12014 V4SF_type_node, pv2di_type_node, NULL_TREE);
12015 tree void_ftype_pv2si_v4sf
12016 = build_function_type_list (void_type_node,
12017 pv2di_type_node, V4SF_type_node, NULL_TREE);
12018 tree void_ftype_pfloat_v4sf
12019 = build_function_type_list (void_type_node,
12020 pfloat_type_node, V4SF_type_node, NULL_TREE);
12021 tree void_ftype_pdi_di
12022 = build_function_type_list (void_type_node,
12023 pdi_type_node, long_long_unsigned_type_node,
12025 tree void_ftype_pv2di_v2di
12026 = build_function_type_list (void_type_node,
12027 pv2di_type_node, V2DI_type_node, NULL_TREE);
12028 /* Normal vector unops. */
12029 tree v4sf_ftype_v4sf
12030 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12032 /* Normal vector binops. */
12033 tree v4sf_ftype_v4sf_v4sf
12034 = build_function_type_list (V4SF_type_node,
12035 V4SF_type_node, V4SF_type_node, NULL_TREE);
12036 tree v8qi_ftype_v8qi_v8qi
12037 = build_function_type_list (V8QI_type_node,
12038 V8QI_type_node, V8QI_type_node, NULL_TREE);
12039 tree v4hi_ftype_v4hi_v4hi
12040 = build_function_type_list (V4HI_type_node,
12041 V4HI_type_node, V4HI_type_node, NULL_TREE);
12042 tree v2si_ftype_v2si_v2si
12043 = build_function_type_list (V2SI_type_node,
12044 V2SI_type_node, V2SI_type_node, NULL_TREE);
12045 tree di_ftype_di_di
12046 = build_function_type_list (long_long_unsigned_type_node,
12047 long_long_unsigned_type_node,
12048 long_long_unsigned_type_node, NULL_TREE);
12050 tree v2si_ftype_v2sf
12051 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12052 tree v2sf_ftype_v2si
12053 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12054 tree v2si_ftype_v2si
12055 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12056 tree v2sf_ftype_v2sf
12057 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12058 tree v2sf_ftype_v2sf_v2sf
12059 = build_function_type_list (V2SF_type_node,
12060 V2SF_type_node, V2SF_type_node, NULL_TREE);
12061 tree v2si_ftype_v2sf_v2sf
12062 = build_function_type_list (V2SI_type_node,
12063 V2SF_type_node, V2SF_type_node, NULL_TREE);
12064 tree pint_type_node = build_pointer_type (integer_type_node);
12065 tree pdouble_type_node = build_pointer_type (double_type_node);
12066 tree int_ftype_v2df_v2df
12067 = build_function_type_list (integer_type_node,
12068 V2DF_type_node, V2DF_type_node, NULL_TREE);
12071 = build_function_type (intTI_type_node, void_list_node);
12072 tree ti_ftype_ti_ti
12073 = build_function_type_list (intTI_type_node,
12074 intTI_type_node, intTI_type_node, NULL_TREE);
12075 tree void_ftype_pvoid
12076 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12078 = build_function_type_list (V2DI_type_node,
12079 long_long_unsigned_type_node, NULL_TREE);
12080 tree v4sf_ftype_v4si
12081 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12082 tree v4si_ftype_v4sf
12083 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12084 tree v2df_ftype_v4si
12085 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12086 tree v4si_ftype_v2df
12087 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12088 tree v2si_ftype_v2df
12089 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12090 tree v4sf_ftype_v2df
12091 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12092 tree v2df_ftype_v2si
12093 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12094 tree v2df_ftype_v4sf
12095 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12096 tree int_ftype_v2df
12097 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12098 tree v2df_ftype_v2df_int
12099 = build_function_type_list (V2DF_type_node,
12100 V2DF_type_node, integer_type_node, NULL_TREE);
12101 tree v4sf_ftype_v4sf_v2df
12102 = build_function_type_list (V4SF_type_node,
12103 V4SF_type_node, V2DF_type_node, NULL_TREE);
12104 tree v2df_ftype_v2df_v4sf
12105 = build_function_type_list (V2DF_type_node,
12106 V2DF_type_node, V4SF_type_node, NULL_TREE);
12107 tree v2df_ftype_v2df_v2df_int
12108 = build_function_type_list (V2DF_type_node,
12109 V2DF_type_node, V2DF_type_node,
12112 tree v2df_ftype_v2df_pv2si
12113 = build_function_type_list (V2DF_type_node,
12114 V2DF_type_node, pv2si_type_node, NULL_TREE);
12115 tree void_ftype_pv2si_v2df
12116 = build_function_type_list (void_type_node,
12117 pv2si_type_node, V2DF_type_node, NULL_TREE);
12118 tree void_ftype_pdouble_v2df
12119 = build_function_type_list (void_type_node,
12120 pdouble_type_node, V2DF_type_node, NULL_TREE);
12121 tree void_ftype_pint_int
12122 = build_function_type_list (void_type_node,
12123 pint_type_node, integer_type_node, NULL_TREE);
12124 tree void_ftype_v16qi_v16qi_pchar
12125 = build_function_type_list (void_type_node,
12126 V16QI_type_node, V16QI_type_node,
12127 pchar_type_node, NULL_TREE);
12128 tree v2df_ftype_pdouble
12129 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12130 tree v2df_ftype_v2df_v2df
12131 = build_function_type_list (V2DF_type_node,
12132 V2DF_type_node, V2DF_type_node, NULL_TREE);
12133 tree v16qi_ftype_v16qi_v16qi
12134 = build_function_type_list (V16QI_type_node,
12135 V16QI_type_node, V16QI_type_node, NULL_TREE);
12136 tree v8hi_ftype_v8hi_v8hi
12137 = build_function_type_list (V8HI_type_node,
12138 V8HI_type_node, V8HI_type_node, NULL_TREE);
12139 tree v4si_ftype_v4si_v4si
12140 = build_function_type_list (V4SI_type_node,
12141 V4SI_type_node, V4SI_type_node, NULL_TREE);
12142 tree v2di_ftype_v2di_v2di
12143 = build_function_type_list (V2DI_type_node,
12144 V2DI_type_node, V2DI_type_node, NULL_TREE);
12145 tree v2di_ftype_v2df_v2df
12146 = build_function_type_list (V2DI_type_node,
12147 V2DF_type_node, V2DF_type_node, NULL_TREE);
12148 tree v2df_ftype_v2df
12149 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12150 tree v2df_ftype_double
12151 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12152 tree v2df_ftype_double_double
12153 = build_function_type_list (V2DF_type_node,
12154 double_type_node, double_type_node, NULL_TREE);
12155 tree int_ftype_v8hi_int
12156 = build_function_type_list (integer_type_node,
12157 V8HI_type_node, integer_type_node, NULL_TREE);
12158 tree v8hi_ftype_v8hi_int_int
12159 = build_function_type_list (V8HI_type_node,
12160 V8HI_type_node, integer_type_node,
12161 integer_type_node, NULL_TREE);
12162 tree v2di_ftype_v2di_int
12163 = build_function_type_list (V2DI_type_node,
12164 V2DI_type_node, integer_type_node, NULL_TREE);
12165 tree v4si_ftype_v4si_int
12166 = build_function_type_list (V4SI_type_node,
12167 V4SI_type_node, integer_type_node, NULL_TREE);
12168 tree v8hi_ftype_v8hi_int
12169 = build_function_type_list (V8HI_type_node,
12170 V8HI_type_node, integer_type_node, NULL_TREE);
12171 tree v8hi_ftype_v8hi_v2di
12172 = build_function_type_list (V8HI_type_node,
12173 V8HI_type_node, V2DI_type_node, NULL_TREE);
12174 tree v4si_ftype_v4si_v2di
12175 = build_function_type_list (V4SI_type_node,
12176 V4SI_type_node, V2DI_type_node, NULL_TREE);
12177 tree v4si_ftype_v8hi_v8hi
12178 = build_function_type_list (V4SI_type_node,
12179 V8HI_type_node, V8HI_type_node, NULL_TREE);
12180 tree di_ftype_v8qi_v8qi
12181 = build_function_type_list (long_long_unsigned_type_node,
12182 V8QI_type_node, V8QI_type_node, NULL_TREE);
12183 tree v2di_ftype_v16qi_v16qi
12184 = build_function_type_list (V2DI_type_node,
12185 V16QI_type_node, V16QI_type_node, NULL_TREE);
12186 tree int_ftype_v16qi
12187 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12189 /* Add all builtins that are more or less simple operations on two
12191 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12193 /* Use one of the operands; the target can have a different mode for
12194 mask-generating compares. */
12195 enum machine_mode mode;
12200 mode = insn_data[d->icode].operand[1].mode;
12205 type = v16qi_ftype_v16qi_v16qi;
12208 type = v8hi_ftype_v8hi_v8hi;
12211 type = v4si_ftype_v4si_v4si;
12214 type = v2di_ftype_v2di_v2di;
12217 type = v2df_ftype_v2df_v2df;
12220 type = ti_ftype_ti_ti;
12223 type = v4sf_ftype_v4sf_v4sf;
12226 type = v8qi_ftype_v8qi_v8qi;
12229 type = v4hi_ftype_v4hi_v4hi;
12232 type = v2si_ftype_v2si_v2si;
12235 type = di_ftype_di_di;
12242 /* Override for comparisons. */
12243 if (d->icode == CODE_FOR_maskcmpv4sf3
12244 || d->icode == CODE_FOR_maskncmpv4sf3
12245 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12246 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12247 type = v4si_ftype_v4sf_v4sf;
12249 if (d->icode == CODE_FOR_maskcmpv2df3
12250 || d->icode == CODE_FOR_maskncmpv2df3
12251 || d->icode == CODE_FOR_vmmaskcmpv2df3
12252 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12253 type = v2di_ftype_v2df_v2df;
12255 def_builtin (d->mask, d->name, type, d->code);
12258 /* Add the remaining MMX insns with somewhat more complicated types. */
12259 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12260 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12261 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12262 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12263 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12264 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12265 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12267 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12268 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12269 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12271 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12272 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12274 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12275 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12277 /* comi/ucomi insns. */
12278 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12279 if (d->mask == MASK_SSE2)
12280 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12282 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12284 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12285 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12286 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12288 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12289 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12290 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12291 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12292 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12293 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12295 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12296 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12297 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12298 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12300 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12301 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12303 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12305 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12306 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12307 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12308 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12309 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12310 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12312 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12313 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12314 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12315 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12317 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12318 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12319 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12320 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12322 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12324 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12326 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12327 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12328 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12329 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12330 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12331 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12333 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12335 /* Original 3DNow! */
12336 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12337 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12338 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12339 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12340 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12341 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12342 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12343 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12344 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12345 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12346 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12347 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12348 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12349 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12350 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12351 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12352 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12353 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12354 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12355 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12357 /* 3DNow! extension as used in the Athlon CPU. */
12358 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12359 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12360 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12361 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12362 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12363 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12365 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12368 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12369 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12371 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12372 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12374 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12375 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12376 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12377 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12378 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12379 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12381 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12382 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12383 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12384 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12386 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12387 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12388 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12389 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12390 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12392 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12393 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12394 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12395 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12397 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12398 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12400 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12402 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12403 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12405 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12406 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12407 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12408 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12409 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12411 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12413 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12414 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12416 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12417 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12418 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12420 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12421 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12422 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12424 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12425 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12426 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12427 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12428 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12429 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12430 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12432 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12433 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12434 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12436 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12437 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12438 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12440 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12441 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12442 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12444 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12445 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12447 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12448 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12449 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12451 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12452 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12453 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12455 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12456 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12458 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12461 /* Errors in the source file can cause expand_expr to return const0_rtx
12462 where we expect a vector. To avoid crashing, use one of the vector
12463 clear instructions. */
12465 safe_vector_operand (x, mode)
12467 enum machine_mode mode;
12469 if (x != const0_rtx)
12471 x = gen_reg_rtx (mode);
12473 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12474 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12475 : gen_rtx_SUBREG (DImode, x, 0)));
12477 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12478 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12482 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12485 ix86_expand_binop_builtin (icode, arglist, target)
12486 enum insn_code icode;
12491 tree arg0 = TREE_VALUE (arglist);
12492 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12493 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12494 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12495 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12496 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12497 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12499 if (VECTOR_MODE_P (mode0))
12500 op0 = safe_vector_operand (op0, mode0);
12501 if (VECTOR_MODE_P (mode1))
12502 op1 = safe_vector_operand (op1, mode1);
12505 || GET_MODE (target) != tmode
12506 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12507 target = gen_reg_rtx (tmode);
12509 /* In case the insn wants input operands in modes different from
12510 the result, abort. */
12511 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12514 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12515 op0 = copy_to_mode_reg (mode0, op0);
12516 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12517 op1 = copy_to_mode_reg (mode1, op1);
12519 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12520 yet one of the two must not be a memory. This is normally enforced
12521 by expanders, but we didn't bother to create one here. */
12522 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12523 op0 = copy_to_mode_reg (mode0, op0);
12525 pat = GEN_FCN (icode) (target, op0, op1);
12532 /* In type_for_mode we restrict the ability to create TImode types
12533 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12534 to have a V4SFmode signature. Convert them in-place to TImode. */
12537 ix86_expand_timode_binop_builtin (icode, arglist, target)
12538 enum insn_code icode;
12543 tree arg0 = TREE_VALUE (arglist);
12544 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12545 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12546 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12548 op0 = gen_lowpart (TImode, op0);
12549 op1 = gen_lowpart (TImode, op1);
12550 target = gen_reg_rtx (TImode);
12552 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12553 op0 = copy_to_mode_reg (TImode, op0);
12554 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12555 op1 = copy_to_mode_reg (TImode, op1);
12557 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12558 yet one of the two must not be a memory. This is normally enforced
12559 by expanders, but we didn't bother to create one here. */
12560 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12561 op0 = copy_to_mode_reg (TImode, op0);
12563 pat = GEN_FCN (icode) (target, op0, op1);
12568 return gen_lowpart (V4SFmode, target);
12571 /* Subroutine of ix86_expand_builtin to take care of stores. */
12574 ix86_expand_store_builtin (icode, arglist)
12575 enum insn_code icode;
12579 tree arg0 = TREE_VALUE (arglist);
12580 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12581 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12582 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12583 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12584 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12586 if (VECTOR_MODE_P (mode1))
12587 op1 = safe_vector_operand (op1, mode1);
12589 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12591 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12592 op1 = copy_to_mode_reg (mode1, op1);
12594 pat = GEN_FCN (icode) (op0, op1);
12600 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12603 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12604 enum insn_code icode;
12610 tree arg0 = TREE_VALUE (arglist);
12611 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12612 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12613 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12616 || GET_MODE (target) != tmode
12617 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12618 target = gen_reg_rtx (tmode);
12620 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12623 if (VECTOR_MODE_P (mode0))
12624 op0 = safe_vector_operand (op0, mode0);
12626 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12627 op0 = copy_to_mode_reg (mode0, op0);
12630 pat = GEN_FCN (icode) (target, op0);
12637 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12638 sqrtss, rsqrtss, rcpss. */
12641 ix86_expand_unop1_builtin (icode, arglist, target)
12642 enum insn_code icode;
12647 tree arg0 = TREE_VALUE (arglist);
12648 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12649 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12650 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12653 || GET_MODE (target) != tmode
12654 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12655 target = gen_reg_rtx (tmode);
12657 if (VECTOR_MODE_P (mode0))
12658 op0 = safe_vector_operand (op0, mode0);
12660 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12661 op0 = copy_to_mode_reg (mode0, op0);
12664 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12665 op1 = copy_to_mode_reg (mode0, op1);
12667 pat = GEN_FCN (icode) (target, op0, op1);
12674 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12677 ix86_expand_sse_compare (d, arglist, target)
12678 const struct builtin_description *d;
12683 tree arg0 = TREE_VALUE (arglist);
12684 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12685 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12686 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12688 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12689 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12690 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12691 enum rtx_code comparison = d->comparison;
12693 if (VECTOR_MODE_P (mode0))
12694 op0 = safe_vector_operand (op0, mode0);
12695 if (VECTOR_MODE_P (mode1))
12696 op1 = safe_vector_operand (op1, mode1);
12698 /* Swap operands if we have a comparison that isn't available in
12702 rtx tmp = gen_reg_rtx (mode1);
12703 emit_move_insn (tmp, op1);
12709 || GET_MODE (target) != tmode
12710 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12711 target = gen_reg_rtx (tmode);
12713 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12714 op0 = copy_to_mode_reg (mode0, op0);
12715 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12716 op1 = copy_to_mode_reg (mode1, op1);
12718 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12719 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12726 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12729 ix86_expand_sse_comi (d, arglist, target)
12730 const struct builtin_description *d;
12735 tree arg0 = TREE_VALUE (arglist);
12736 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12737 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12738 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12740 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12741 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12742 enum rtx_code comparison = d->comparison;
12744 if (VECTOR_MODE_P (mode0))
12745 op0 = safe_vector_operand (op0, mode0);
12746 if (VECTOR_MODE_P (mode1))
12747 op1 = safe_vector_operand (op1, mode1);
12749 /* Swap operands if we have a comparison that isn't available in
12758 target = gen_reg_rtx (SImode);
12759 emit_move_insn (target, const0_rtx);
12760 target = gen_rtx_SUBREG (QImode, target, 0);
12762 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12763 op0 = copy_to_mode_reg (mode0, op0);
12764 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12765 op1 = copy_to_mode_reg (mode1, op1);
12767 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12768 pat = GEN_FCN (d->icode) (op0, op1, op2);
12772 emit_insn (gen_rtx_SET (VOIDmode,
12773 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12774 gen_rtx_fmt_ee (comparison, QImode,
12775 gen_rtx_REG (CCmode, FLAGS_REG),
12778 return SUBREG_REG (target);
12781 /* Expand an expression EXP that calls a built-in function,
12782 with result going to TARGET if that's convenient
12783 (and in mode MODE if that's convenient).
12784 SUBTARGET may be used as the target for computing one of EXP's operands.
12785 IGNORE is nonzero if the value is to be ignored. */
12788 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12791 rtx subtarget ATTRIBUTE_UNUSED;
12792 enum machine_mode mode ATTRIBUTE_UNUSED;
12793 int ignore ATTRIBUTE_UNUSED;
12795 const struct builtin_description *d;
12797 enum insn_code icode;
12798 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12799 tree arglist = TREE_OPERAND (exp, 1);
12800 tree arg0, arg1, arg2;
12801 rtx op0, op1, op2, pat;
12802 enum machine_mode tmode, mode0, mode1, mode2;
12803 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12807 case IX86_BUILTIN_EMMS:
12808 emit_insn (gen_emms ());
12811 case IX86_BUILTIN_SFENCE:
12812 emit_insn (gen_sfence ());
12815 case IX86_BUILTIN_PEXTRW:
12816 case IX86_BUILTIN_PEXTRW128:
12817 icode = (fcode == IX86_BUILTIN_PEXTRW
12818 ? CODE_FOR_mmx_pextrw
12819 : CODE_FOR_sse2_pextrw);
12820 arg0 = TREE_VALUE (arglist);
12821 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12822 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12823 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12824 tmode = insn_data[icode].operand[0].mode;
12825 mode0 = insn_data[icode].operand[1].mode;
12826 mode1 = insn_data[icode].operand[2].mode;
12828 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12829 op0 = copy_to_mode_reg (mode0, op0);
12830 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12832 /* @@@ better error message */
12833 error ("selector must be an immediate");
12834 return gen_reg_rtx (tmode);
12837 || GET_MODE (target) != tmode
12838 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12839 target = gen_reg_rtx (tmode);
12840 pat = GEN_FCN (icode) (target, op0, op1);
12846 case IX86_BUILTIN_PINSRW:
12847 case IX86_BUILTIN_PINSRW128:
12848 icode = (fcode == IX86_BUILTIN_PINSRW
12849 ? CODE_FOR_mmx_pinsrw
12850 : CODE_FOR_sse2_pinsrw);
12851 arg0 = TREE_VALUE (arglist);
12852 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12853 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12854 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12855 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12856 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12857 tmode = insn_data[icode].operand[0].mode;
12858 mode0 = insn_data[icode].operand[1].mode;
12859 mode1 = insn_data[icode].operand[2].mode;
12860 mode2 = insn_data[icode].operand[3].mode;
12862 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12863 op0 = copy_to_mode_reg (mode0, op0);
12864 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12865 op1 = copy_to_mode_reg (mode1, op1);
12866 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12868 /* @@@ better error message */
12869 error ("selector must be an immediate");
12873 || GET_MODE (target) != tmode
12874 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12875 target = gen_reg_rtx (tmode);
12876 pat = GEN_FCN (icode) (target, op0, op1, op2);
12882 case IX86_BUILTIN_MASKMOVQ:
12883 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12884 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12885 : CODE_FOR_sse2_maskmovdqu);
12886 /* Note the arg order is different from the operand order. */
12887 arg1 = TREE_VALUE (arglist);
12888 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12889 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12890 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12891 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12892 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12893 mode0 = insn_data[icode].operand[0].mode;
12894 mode1 = insn_data[icode].operand[1].mode;
12895 mode2 = insn_data[icode].operand[2].mode;
12897 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12898 op0 = copy_to_mode_reg (mode0, op0);
12899 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12900 op1 = copy_to_mode_reg (mode1, op1);
12901 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12902 op2 = copy_to_mode_reg (mode2, op2);
12903 pat = GEN_FCN (icode) (op0, op1, op2);
12909 case IX86_BUILTIN_SQRTSS:
12910 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12911 case IX86_BUILTIN_RSQRTSS:
12912 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12913 case IX86_BUILTIN_RCPSS:
12914 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12916 case IX86_BUILTIN_ANDPS:
12917 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12919 case IX86_BUILTIN_ANDNPS:
12920 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12922 case IX86_BUILTIN_ORPS:
12923 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12925 case IX86_BUILTIN_XORPS:
12926 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12929 case IX86_BUILTIN_LOADAPS:
12930 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12932 case IX86_BUILTIN_LOADUPS:
12933 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12935 case IX86_BUILTIN_STOREAPS:
12936 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12937 case IX86_BUILTIN_STOREUPS:
12938 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12940 case IX86_BUILTIN_LOADSS:
12941 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12943 case IX86_BUILTIN_STORESS:
12944 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12946 case IX86_BUILTIN_LOADHPS:
12947 case IX86_BUILTIN_LOADLPS:
12948 case IX86_BUILTIN_LOADHPD:
12949 case IX86_BUILTIN_LOADLPD:
12950 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12951 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12952 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12953 : CODE_FOR_sse2_movlpd);
12954 arg0 = TREE_VALUE (arglist);
12955 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12956 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12957 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12958 tmode = insn_data[icode].operand[0].mode;
12959 mode0 = insn_data[icode].operand[1].mode;
12960 mode1 = insn_data[icode].operand[2].mode;
12962 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12963 op0 = copy_to_mode_reg (mode0, op0);
12964 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12966 || GET_MODE (target) != tmode
12967 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12968 target = gen_reg_rtx (tmode);
12969 pat = GEN_FCN (icode) (target, op0, op1);
12975 case IX86_BUILTIN_STOREHPS:
12976 case IX86_BUILTIN_STORELPS:
12977 case IX86_BUILTIN_STOREHPD:
12978 case IX86_BUILTIN_STORELPD:
12979 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12980 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12981 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12982 : CODE_FOR_sse2_movlpd);
12983 arg0 = TREE_VALUE (arglist);
12984 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12985 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12986 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12987 mode0 = insn_data[icode].operand[1].mode;
12988 mode1 = insn_data[icode].operand[2].mode;
12990 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12991 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12992 op1 = copy_to_mode_reg (mode1, op1);
12994 pat = GEN_FCN (icode) (op0, op0, op1);
13000 case IX86_BUILTIN_MOVNTPS:
13001 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13002 case IX86_BUILTIN_MOVNTQ:
13003 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13005 case IX86_BUILTIN_LDMXCSR:
13006 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13007 target = assign_386_stack_local (SImode, 0);
13008 emit_move_insn (target, op0);
13009 emit_insn (gen_ldmxcsr (target));
13012 case IX86_BUILTIN_STMXCSR:
13013 target = assign_386_stack_local (SImode, 0);
13014 emit_insn (gen_stmxcsr (target));
13015 return copy_to_mode_reg (SImode, target);
13017 case IX86_BUILTIN_SHUFPS:
13018 case IX86_BUILTIN_SHUFPD:
13019 icode = (fcode == IX86_BUILTIN_SHUFPS
13020 ? CODE_FOR_sse_shufps
13021 : CODE_FOR_sse2_shufpd);
13022 arg0 = TREE_VALUE (arglist);
13023 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13024 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13025 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13026 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13027 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13028 tmode = insn_data[icode].operand[0].mode;
13029 mode0 = insn_data[icode].operand[1].mode;
13030 mode1 = insn_data[icode].operand[2].mode;
13031 mode2 = insn_data[icode].operand[3].mode;
13033 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13034 op0 = copy_to_mode_reg (mode0, op0);
13035 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13036 op1 = copy_to_mode_reg (mode1, op1);
13037 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13039 /* @@@ better error message */
13040 error ("mask must be an immediate");
13041 return gen_reg_rtx (tmode);
13044 || GET_MODE (target) != tmode
13045 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13046 target = gen_reg_rtx (tmode);
13047 pat = GEN_FCN (icode) (target, op0, op1, op2);
13053 case IX86_BUILTIN_PSHUFW:
13054 case IX86_BUILTIN_PSHUFD:
13055 case IX86_BUILTIN_PSHUFHW:
13056 case IX86_BUILTIN_PSHUFLW:
13057 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13058 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13059 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13060 : CODE_FOR_mmx_pshufw);
13061 arg0 = TREE_VALUE (arglist);
13062 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13063 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13064 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13065 tmode = insn_data[icode].operand[0].mode;
13066 mode1 = insn_data[icode].operand[1].mode;
13067 mode2 = insn_data[icode].operand[2].mode;
13069 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13070 op0 = copy_to_mode_reg (mode1, op0);
13071 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13073 /* @@@ better error message */
13074 error ("mask must be an immediate");
13078 || GET_MODE (target) != tmode
13079 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13080 target = gen_reg_rtx (tmode);
13081 pat = GEN_FCN (icode) (target, op0, op1);
13087 case IX86_BUILTIN_FEMMS:
13088 emit_insn (gen_femms ());
13091 case IX86_BUILTIN_PAVGUSB:
13092 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13094 case IX86_BUILTIN_PF2ID:
13095 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13097 case IX86_BUILTIN_PFACC:
13098 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13100 case IX86_BUILTIN_PFADD:
13101 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13103 case IX86_BUILTIN_PFCMPEQ:
13104 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13106 case IX86_BUILTIN_PFCMPGE:
13107 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13109 case IX86_BUILTIN_PFCMPGT:
13110 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13112 case IX86_BUILTIN_PFMAX:
13113 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13115 case IX86_BUILTIN_PFMIN:
13116 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13118 case IX86_BUILTIN_PFMUL:
13119 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13121 case IX86_BUILTIN_PFRCP:
13122 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13124 case IX86_BUILTIN_PFRCPIT1:
13125 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13127 case IX86_BUILTIN_PFRCPIT2:
13128 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13130 case IX86_BUILTIN_PFRSQIT1:
13131 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13133 case IX86_BUILTIN_PFRSQRT:
13134 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13136 case IX86_BUILTIN_PFSUB:
13137 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13139 case IX86_BUILTIN_PFSUBR:
13140 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13142 case IX86_BUILTIN_PI2FD:
13143 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13145 case IX86_BUILTIN_PMULHRW:
13146 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13148 case IX86_BUILTIN_PF2IW:
13149 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13151 case IX86_BUILTIN_PFNACC:
13152 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13154 case IX86_BUILTIN_PFPNACC:
13155 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13157 case IX86_BUILTIN_PI2FW:
13158 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13160 case IX86_BUILTIN_PSWAPDSI:
13161 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13163 case IX86_BUILTIN_PSWAPDSF:
13164 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13166 case IX86_BUILTIN_SSE_ZERO:
13167 target = gen_reg_rtx (V4SFmode);
13168 emit_insn (gen_sse_clrv4sf (target));
13171 case IX86_BUILTIN_MMX_ZERO:
13172 target = gen_reg_rtx (DImode);
13173 emit_insn (gen_mmx_clrdi (target));
13176 case IX86_BUILTIN_SQRTSD:
13177 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13178 case IX86_BUILTIN_LOADAPD:
13179 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13180 case IX86_BUILTIN_LOADUPD:
13181 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13183 case IX86_BUILTIN_STOREAPD:
13184 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13185 case IX86_BUILTIN_STOREUPD:
13186 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13188 case IX86_BUILTIN_LOADSD:
13189 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13191 case IX86_BUILTIN_STORESD:
13192 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13194 case IX86_BUILTIN_SETPD1:
13195 target = assign_386_stack_local (DFmode, 0);
13196 arg0 = TREE_VALUE (arglist);
13197 emit_move_insn (adjust_address (target, DFmode, 0),
13198 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13199 op0 = gen_reg_rtx (V2DFmode);
13200 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13201 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13204 case IX86_BUILTIN_SETPD:
13205 target = assign_386_stack_local (V2DFmode, 0);
13206 arg0 = TREE_VALUE (arglist);
13207 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13208 emit_move_insn (adjust_address (target, DFmode, 0),
13209 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13210 emit_move_insn (adjust_address (target, DFmode, 8),
13211 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13212 op0 = gen_reg_rtx (V2DFmode);
13213 emit_insn (gen_sse2_movapd (op0, target));
13216 case IX86_BUILTIN_LOADRPD:
13217 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13218 gen_reg_rtx (V2DFmode), 1);
13219 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13222 case IX86_BUILTIN_LOADPD1:
13223 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13224 gen_reg_rtx (V2DFmode), 1);
13225 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13228 case IX86_BUILTIN_STOREPD1:
13229 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13230 case IX86_BUILTIN_STORERPD:
13231 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13233 case IX86_BUILTIN_MFENCE:
13234 emit_insn (gen_sse2_mfence ());
13236 case IX86_BUILTIN_LFENCE:
13237 emit_insn (gen_sse2_lfence ());
13240 case IX86_BUILTIN_CLFLUSH:
13241 arg0 = TREE_VALUE (arglist);
13242 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13243 icode = CODE_FOR_sse2_clflush;
13244 mode0 = insn_data[icode].operand[0].mode;
13245 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13246 op0 = copy_to_mode_reg (mode0, op0);
13248 emit_insn (gen_sse2_clflush (op0));
13251 case IX86_BUILTIN_MOVNTPD:
13252 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13253 case IX86_BUILTIN_MOVNTDQ:
13254 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13255 case IX86_BUILTIN_MOVNTI:
13256 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13262 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13263 if (d->code == fcode)
13265 /* Compares are treated specially. */
13266 if (d->icode == CODE_FOR_maskcmpv4sf3
13267 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13268 || d->icode == CODE_FOR_maskncmpv4sf3
13269 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13270 || d->icode == CODE_FOR_maskcmpv2df3
13271 || d->icode == CODE_FOR_vmmaskcmpv2df3
13272 || d->icode == CODE_FOR_maskncmpv2df3
13273 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13274 return ix86_expand_sse_compare (d, arglist, target);
13276 return ix86_expand_binop_builtin (d->icode, arglist, target);
13279 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13280 if (d->code == fcode)
13281 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13283 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13284 if (d->code == fcode)
13285 return ix86_expand_sse_comi (d, arglist, target);
13287 /* @@@ Should really do something sensible here. */
13291 /* Store OPERAND to the memory after reload is completed. This means
13292 that we can't easily use assign_stack_local. */
13294 ix86_force_to_memory (mode, operand)
13295 enum machine_mode mode;
13299 if (!reload_completed)
13301 if (TARGET_64BIT && TARGET_RED_ZONE)
13303 result = gen_rtx_MEM (mode,
13304 gen_rtx_PLUS (Pmode,
13306 GEN_INT (-RED_ZONE_SIZE)));
13307 emit_move_insn (result, operand);
13309 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13315 operand = gen_lowpart (DImode, operand);
13319 gen_rtx_SET (VOIDmode,
13320 gen_rtx_MEM (DImode,
13321 gen_rtx_PRE_DEC (DImode,
13322 stack_pointer_rtx)),
13328 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13337 split_di (&operand, 1, operands, operands + 1);
13339 gen_rtx_SET (VOIDmode,
13340 gen_rtx_MEM (SImode,
13341 gen_rtx_PRE_DEC (Pmode,
13342 stack_pointer_rtx)),
13345 gen_rtx_SET (VOIDmode,
13346 gen_rtx_MEM (SImode,
13347 gen_rtx_PRE_DEC (Pmode,
13348 stack_pointer_rtx)),
13353 /* It is better to store HImodes as SImodes. */
13354 if (!TARGET_PARTIAL_REG_STALL)
13355 operand = gen_lowpart (SImode, operand);
13359 gen_rtx_SET (VOIDmode,
13360 gen_rtx_MEM (GET_MODE (operand),
13361 gen_rtx_PRE_DEC (SImode,
13362 stack_pointer_rtx)),
13368 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13373 /* Free operand from the memory. */
13375 ix86_free_from_memory (mode)
13376 enum machine_mode mode;
13378 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13382 if (mode == DImode || TARGET_64BIT)
13384 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13388 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13389 to pop or add instruction if registers are available. */
13390 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13391 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13396 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13397 QImode must go into class Q_REGS.
13398 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13399 movdf to do mem-to-mem moves through integer regs. */
13401 ix86_preferred_reload_class (x, class)
13403 enum reg_class class;
13405 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13407 /* SSE can't load any constant directly yet. */
13408 if (SSE_CLASS_P (class))
13410 /* Floats can load 0 and 1. */
13411 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13413 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13414 if (MAYBE_SSE_CLASS_P (class))
13415 return (reg_class_subset_p (class, GENERAL_REGS)
13416 ? GENERAL_REGS : FLOAT_REGS);
13420 /* General regs can load everything. */
13421 if (reg_class_subset_p (class, GENERAL_REGS))
13422 return GENERAL_REGS;
13423 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13424 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13427 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13429 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13434 /* If we are copying between general and FP registers, we need a memory
13435 location. The same is true for SSE and MMX registers.
13437 The macro can't work reliably when one of the CLASSES is class containing
13438 registers from multiple units (SSE, MMX, integer). We avoid this by never
13439 combining those units in single alternative in the machine description.
13440 Ensure that this constraint holds to avoid unexpected surprises.
13442 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13443 enforce these sanity checks. */
13445 ix86_secondary_memory_needed (class1, class2, mode, strict)
13446 enum reg_class class1, class2;
13447 enum machine_mode mode;
13450 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13451 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13452 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13453 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13454 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13455 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13462 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13463 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13464 && (mode) != SImode)
13465 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13466 && (mode) != SImode));
13468 /* Return the cost of moving data from a register in class CLASS1 to
13469 one in class CLASS2.
13471 It is not required that the cost always equal 2 when FROM is the same as TO;
13472 on some machines it is expensive to move between registers if they are not
13473 general registers. */
13475 ix86_register_move_cost (mode, class1, class2)
13476 enum machine_mode mode;
13477 enum reg_class class1, class2;
13479 /* In case we require secondary memory, compute cost of the store followed
13480 by load. In case of copying from general_purpose_register we may emit
13481 multiple stores followed by single load causing memory size mismatch
13482 stall. Count this as arbitarily high cost of 20. */
13483 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13486 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13488 return (MEMORY_MOVE_COST (mode, class1, 0)
13489 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13491 /* Moves between SSE/MMX and integer unit are expensive. */
13492 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13493 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13494 return ix86_cost->mmxsse_to_integer;
13495 if (MAYBE_FLOAT_CLASS_P (class1))
13496 return ix86_cost->fp_move;
13497 if (MAYBE_SSE_CLASS_P (class1))
13498 return ix86_cost->sse_move;
13499 if (MAYBE_MMX_CLASS_P (class1))
13500 return ix86_cost->mmx_move;
13504 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13506 ix86_hard_regno_mode_ok (regno, mode)
13508 enum machine_mode mode;
13510 /* Flags and only flags can only hold CCmode values. */
13511 if (CC_REGNO_P (regno))
13512 return GET_MODE_CLASS (mode) == MODE_CC;
13513 if (GET_MODE_CLASS (mode) == MODE_CC
13514 || GET_MODE_CLASS (mode) == MODE_RANDOM
13515 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13517 if (FP_REGNO_P (regno))
13518 return VALID_FP_MODE_P (mode);
13519 if (SSE_REGNO_P (regno))
13520 return VALID_SSE_REG_MODE (mode);
13521 if (MMX_REGNO_P (regno))
13522 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13523 /* We handle both integer and floats in the general purpose registers.
13524 In future we should be able to handle vector modes as well. */
13525 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13527 /* Take care for QImode values - they can be in non-QI regs, but then
13528 they do cause partial register stalls. */
13529 if (regno < 4 || mode != QImode || TARGET_64BIT)
13531 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13534 /* Return the cost of moving data of mode M between a
13535 register and memory. A value of 2 is the default; this cost is
13536 relative to those in `REGISTER_MOVE_COST'.
13538 If moving between registers and memory is more expensive than
13539 between two registers, you should define this macro to express the
13542 Model also increased moving costs of QImode registers in non
13546 ix86_memory_move_cost (mode, class, in)
13547 enum machine_mode mode;
13548 enum reg_class class;
13551 if (FLOAT_CLASS_P (class))
13569 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13571 if (SSE_CLASS_P (class))
13574 switch (GET_MODE_SIZE (mode))
13588 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13590 if (MMX_CLASS_P (class))
13593 switch (GET_MODE_SIZE (mode))
13604 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13606 switch (GET_MODE_SIZE (mode))
13610 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13611 : ix86_cost->movzbl_load);
13613 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13614 : ix86_cost->int_store[0] + 4);
13617 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13619 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13620 if (mode == TFmode)
13622 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13623 * (int) GET_MODE_SIZE (mode) / 4);
13627 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13629 ix86_svr3_asm_out_constructor (symbol, priority)
13631 int priority ATTRIBUTE_UNUSED;
13634 fputs ("\tpushl $", asm_out_file);
13635 assemble_name (asm_out_file, XSTR (symbol, 0));
13636 fputc ('\n', asm_out_file);
13642 static int current_machopic_label_num;
13644 /* Given a symbol name and its associated stub, write out the
13645 definition of the stub. */
13648 machopic_output_stub (file, symb, stub)
13650 const char *symb, *stub;
13652 unsigned int length;
13653 char *binder_name, *symbol_name, lazy_ptr_name[32];
13654 int label = ++current_machopic_label_num;
13656 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13657 symb = (*targetm.strip_name_encoding) (symb);
13659 length = strlen (stub);
13660 binder_name = alloca (length + 32);
13661 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13663 length = strlen (symb);
13664 symbol_name = alloca (length + 32);
13665 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13667 sprintf (lazy_ptr_name, "L%d$lz", label);
13670 machopic_picsymbol_stub_section ();
13672 machopic_symbol_stub_section ();
13674 fprintf (file, "%s:\n", stub);
13675 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13679 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13680 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13681 fprintf (file, "\tjmp %%edx\n");
13684 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13686 fprintf (file, "%s:\n", binder_name);
13690 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13691 fprintf (file, "\tpushl %%eax\n");
13694 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13696 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13698 machopic_lazy_symbol_ptr_section ();
13699 fprintf (file, "%s:\n", lazy_ptr_name);
13700 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13701 fprintf (file, "\t.long %s\n", binder_name);
13703 #endif /* TARGET_MACHO */
13705 /* Order the registers for register allocator. */
13708 x86_order_regs_for_local_alloc ()
13713 /* First allocate the local general purpose registers. */
13714 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13715 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13716 reg_alloc_order [pos++] = i;
13718 /* Global general purpose registers. */
13719 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13720 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13721 reg_alloc_order [pos++] = i;
13723 /* x87 registers come first in case we are doing FP math
13725 if (!TARGET_SSE_MATH)
13726 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13727 reg_alloc_order [pos++] = i;
13729 /* SSE registers. */
13730 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13731 reg_alloc_order [pos++] = i;
13732 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13733 reg_alloc_order [pos++] = i;
13735 /* x87 registerts. */
13736 if (TARGET_SSE_MATH)
13737 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13738 reg_alloc_order [pos++] = i;
13740 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13741 reg_alloc_order [pos++] = i;
13743 /* Initialize the rest of array as we do not allocate some registers
13745 while (pos < FIRST_PSEUDO_REGISTER)
13746 reg_alloc_order [pos++] = 0;
13750 x86_output_mi_thunk (file, delta, function)
13758 if (ix86_regparm > 0)
13759 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13762 for (; parm; parm = TREE_CHAIN (parm))
13763 if (TREE_VALUE (parm) == void_type_node)
13766 xops[0] = GEN_INT (delta);
13769 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13770 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13771 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13774 fprintf (file, "\tjmp *");
13775 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13776 fprintf (file, "@GOTPCREL(%%rip)\n");
13780 fprintf (file, "\tjmp ");
13781 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13782 fprintf (file, "\n");
13788 xops[1] = gen_rtx_REG (SImode, 0);
13789 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13790 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13792 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13793 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13797 xops[0] = pic_offset_table_rtx;
13798 xops[1] = gen_label_rtx ();
13799 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13801 if (ix86_regparm > 2)
13803 output_asm_insn ("push{l}\t%0", xops);
13804 output_asm_insn ("call\t%P1", xops);
13805 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13806 output_asm_insn ("pop{l}\t%0", xops);
13808 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13809 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13811 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13812 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13813 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13817 fprintf (file, "\tjmp ");
13818 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13819 fprintf (file, "\n");
13825 x86_field_alignment (field, computed)
13829 enum machine_mode mode;
13830 tree type = TREE_TYPE (field);
13832 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13834 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13835 ? get_inner_array_type (type) : type);
13836 if (mode == DFmode || mode == DCmode
13837 || GET_MODE_CLASS (mode) == MODE_INT
13838 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13839 return MIN (32, computed);
13843 /* Implement machine specific optimizations.
13844 At the moment we implement single transformation: AMD Athlon works faster
13845 when RET is not destination of conditional jump or directly preceeded
13846 by other jump instruction. We avoid the penalty by inserting NOP just
13847 before the RET instructions in such cases. */
13849 x86_machine_dependent_reorg (first)
13850 rtx first ATTRIBUTE_UNUSED;
13854 if (!TARGET_ATHLON || !optimize || optimize_size)
13856 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13858 basic_block bb = e->src;
13861 bool insert = false;
13863 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13865 prev = prev_nonnote_insn (ret);
13866 if (prev && GET_CODE (prev) == CODE_LABEL)
13869 for (e = bb->pred; e; e = e->pred_next)
13870 if (EDGE_FREQUENCY (e) && e->src->index > 0
13871 && !(e->flags & EDGE_FALLTHRU))
13876 prev = prev_real_insn (ret);
13877 if (prev && GET_CODE (prev) == JUMP_INSN
13878 && any_condjump_p (prev))
13882 emit_insn_before (gen_nop (), ret);
13886 #include "gt-i386.h"