1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs *ix86_cost = &pentium_cost;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
403 #define FAST_PROLOGUE_INSN_COUNT 30
405 /* Set by prologue expander and used by epilogue expander to determine
407 static int use_fast_prologue_epilogue;
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
420 AREG, DREG, CREG, BREG,
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
440 /* The "default" register map used in 32bit mode. */
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
453 static int const x86_64_int_parameter_registers[6] =
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
459 static int const x86_64_int_return_registers[4] =
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
547 /* The encoding characters for the four TLS models present in ELF. */
549 static char const tls_model_chars[] = " GLil";
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
567 /* Structure describing stack frame layout.
568 Stack grows downward:
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
581 > to_allocate <- FRAME_POINTER
593 int outgoing_arguments_size;
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
608 enum cmodel ix86_cmodel;
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
635 /* ix86_regparm_string as a number */
638 /* Alignment to use for loops and jumps: */
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
707 rtx base, index, disp;
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
761 enum x86_64_reg_class
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
851 struct gcc_target targetm = TARGET_INITIALIZER;
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
881 const processor_target_table[PROCESSOR_max] =
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
902 PTA_PREFETCH_SSE = 8,
907 const processor_alias_table[] =
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
915 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
916 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
917 {"i686", PROCESSOR_PENTIUMPRO, 0},
918 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
919 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
920 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
921 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
922 PTA_MMX | PTA_PREFETCH_SSE},
923 {"k6", PROCESSOR_K6, PTA_MMX},
924 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
925 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
926 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
928 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
929 | PTA_3DNOW | PTA_3DNOW_A},
930 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
931 | PTA_3DNOW_A | PTA_SSE},
932 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
933 | PTA_3DNOW_A | PTA_SSE},
934 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
935 | PTA_3DNOW_A | PTA_SSE},
938 int const pta_size = ARRAY_SIZE (processor_alias_table);
940 #ifdef SUBTARGET_OVERRIDE_OPTIONS
941 SUBTARGET_OVERRIDE_OPTIONS;
944 if (!ix86_cpu_string && ix86_arch_string)
945 ix86_cpu_string = ix86_arch_string;
946 if (!ix86_cpu_string)
947 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
948 if (!ix86_arch_string)
949 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
951 if (ix86_cmodel_string != 0)
953 if (!strcmp (ix86_cmodel_string, "small"))
954 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
956 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
957 else if (!strcmp (ix86_cmodel_string, "32"))
959 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
960 ix86_cmodel = CM_KERNEL;
961 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
962 ix86_cmodel = CM_MEDIUM;
963 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
964 ix86_cmodel = CM_LARGE;
966 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
972 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
974 if (ix86_asm_string != 0)
976 if (!strcmp (ix86_asm_string, "intel"))
977 ix86_asm_dialect = ASM_INTEL;
978 else if (!strcmp (ix86_asm_string, "att"))
979 ix86_asm_dialect = ASM_ATT;
981 error ("bad value (%s) for -masm= switch", ix86_asm_string);
983 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
984 error ("code model `%s' not supported in the %s bit mode",
985 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
986 if (ix86_cmodel == CM_LARGE)
987 sorry ("code model `large' not supported yet");
988 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
989 sorry ("%i-bit mode not compiled in",
990 (target_flags & MASK_64BIT) ? 64 : 32);
992 for (i = 0; i < pta_size; i++)
993 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
995 ix86_arch = processor_alias_table[i].processor;
996 /* Default cpu tuning to the architecture. */
997 ix86_cpu = ix86_arch;
998 if (processor_alias_table[i].flags & PTA_MMX
999 && !(target_flags & MASK_MMX_SET))
1000 target_flags |= MASK_MMX;
1001 if (processor_alias_table[i].flags & PTA_3DNOW
1002 && !(target_flags & MASK_3DNOW_SET))
1003 target_flags |= MASK_3DNOW;
1004 if (processor_alias_table[i].flags & PTA_3DNOW_A
1005 && !(target_flags & MASK_3DNOW_A_SET))
1006 target_flags |= MASK_3DNOW_A;
1007 if (processor_alias_table[i].flags & PTA_SSE
1008 && !(target_flags & MASK_SSE_SET))
1009 target_flags |= MASK_SSE;
1010 if (processor_alias_table[i].flags & PTA_SSE2
1011 && !(target_flags & MASK_SSE2_SET))
1012 target_flags |= MASK_SSE2;
1013 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1014 x86_prefetch_sse = true;
1019 error ("bad value (%s) for -march= switch", ix86_arch_string);
1021 for (i = 0; i < pta_size; i++)
1022 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1024 ix86_cpu = processor_alias_table[i].processor;
1027 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1028 x86_prefetch_sse = true;
1030 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1033 ix86_cost = &size_cost;
1035 ix86_cost = processor_target_table[ix86_cpu].cost;
1036 target_flags |= processor_target_table[ix86_cpu].target_enable;
1037 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1039 /* Arrange to set up i386_stack_locals for all functions. */
1040 init_machine_status = ix86_init_machine_status;
1042 /* Validate -mregparm= value. */
1043 if (ix86_regparm_string)
1045 i = atoi (ix86_regparm_string);
1046 if (i < 0 || i > REGPARM_MAX)
1047 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1053 ix86_regparm = REGPARM_MAX;
1055 /* If the user has provided any of the -malign-* options,
1056 warn and use that value only if -falign-* is not set.
1057 Remove this code in GCC 3.2 or later. */
1058 if (ix86_align_loops_string)
1060 warning ("-malign-loops is obsolete, use -falign-loops");
1061 if (align_loops == 0)
1063 i = atoi (ix86_align_loops_string);
1064 if (i < 0 || i > MAX_CODE_ALIGN)
1065 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1067 align_loops = 1 << i;
1071 if (ix86_align_jumps_string)
1073 warning ("-malign-jumps is obsolete, use -falign-jumps");
1074 if (align_jumps == 0)
1076 i = atoi (ix86_align_jumps_string);
1077 if (i < 0 || i > MAX_CODE_ALIGN)
1078 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1080 align_jumps = 1 << i;
1084 if (ix86_align_funcs_string)
1086 warning ("-malign-functions is obsolete, use -falign-functions");
1087 if (align_functions == 0)
1089 i = atoi (ix86_align_funcs_string);
1090 if (i < 0 || i > MAX_CODE_ALIGN)
1091 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1093 align_functions = 1 << i;
1097 /* Default align_* from the processor table. */
1098 if (align_loops == 0)
1100 align_loops = processor_target_table[ix86_cpu].align_loop;
1101 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1103 if (align_jumps == 0)
1105 align_jumps = processor_target_table[ix86_cpu].align_jump;
1106 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1108 if (align_functions == 0)
1110 align_functions = processor_target_table[ix86_cpu].align_func;
1113 /* Validate -mpreferred-stack-boundary= value, or provide default.
1114 The default of 128 bits is for Pentium III's SSE __m128, but we
1115 don't want additional code to keep the stack aligned when
1116 optimizing for code size. */
1117 ix86_preferred_stack_boundary = (optimize_size
1118 ? TARGET_64BIT ? 64 : 32
1120 if (ix86_preferred_stack_boundary_string)
1122 i = atoi (ix86_preferred_stack_boundary_string);
1123 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1124 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1125 TARGET_64BIT ? 3 : 2);
1127 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1130 /* Validate -mbranch-cost= value, or provide default. */
1131 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1132 if (ix86_branch_cost_string)
1134 i = atoi (ix86_branch_cost_string);
1136 error ("-mbranch-cost=%d is not between 0 and 5", i);
1138 ix86_branch_cost = i;
1141 if (ix86_tls_dialect_string)
1143 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1144 ix86_tls_dialect = TLS_DIALECT_GNU;
1145 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1146 ix86_tls_dialect = TLS_DIALECT_SUN;
1148 error ("bad value (%s) for -mtls-dialect= switch",
1149 ix86_tls_dialect_string);
1153 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1155 /* Keep nonleaf frame pointers. */
1156 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1157 flag_omit_frame_pointer = 1;
1159 /* If we're doing fast math, we don't care about comparison order
1160 wrt NaNs. This lets us use a shorter comparison sequence. */
1161 if (flag_unsafe_math_optimizations)
1162 target_flags &= ~MASK_IEEE_FP;
1164 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1165 since the insns won't need emulation. */
1166 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1167 target_flags &= ~MASK_NO_FANCY_MATH_387;
1171 if (TARGET_ALIGN_DOUBLE)
1172 error ("-malign-double makes no sense in the 64bit mode");
1174 error ("-mrtd calling convention not supported in the 64bit mode");
1175 /* Enable by default the SSE and MMX builtins. */
1176 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1177 ix86_fpmath = FPMATH_SSE;
1180 ix86_fpmath = FPMATH_387;
1182 if (ix86_fpmath_string != 0)
1184 if (! strcmp (ix86_fpmath_string, "387"))
1185 ix86_fpmath = FPMATH_387;
1186 else if (! strcmp (ix86_fpmath_string, "sse"))
1190 warning ("SSE instruction set disabled, using 387 arithmetics");
1191 ix86_fpmath = FPMATH_387;
1194 ix86_fpmath = FPMATH_SSE;
1196 else if (! strcmp (ix86_fpmath_string, "387,sse")
1197 || ! strcmp (ix86_fpmath_string, "sse,387"))
1201 warning ("SSE instruction set disabled, using 387 arithmetics");
1202 ix86_fpmath = FPMATH_387;
1204 else if (!TARGET_80387)
1206 warning ("387 instruction set disabled, using SSE arithmetics");
1207 ix86_fpmath = FPMATH_SSE;
1210 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1213 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1216 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1220 target_flags |= MASK_MMX;
1221 x86_prefetch_sse = true;
1224 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1227 target_flags |= MASK_MMX;
1228 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1229 extensions it adds. */
1230 if (x86_3dnow_a & (1 << ix86_arch))
1231 target_flags |= MASK_3DNOW_A;
1233 if ((x86_accumulate_outgoing_args & CPUMASK)
1234 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1236 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1238 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1241 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1242 p = strchr (internal_label_prefix, 'X');
1243 internal_label_prefix_len = p - internal_label_prefix;
1249 optimization_options (level, size)
1251 int size ATTRIBUTE_UNUSED;
1253 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1254 make the problem with not enough registers even worse. */
1255 #ifdef INSN_SCHEDULING
1257 flag_schedule_insns = 0;
1259 if (TARGET_64BIT && optimize >= 1)
1260 flag_omit_frame_pointer = 1;
1263 flag_pcc_struct_return = 0;
1264 flag_asynchronous_unwind_tables = 1;
1267 flag_omit_frame_pointer = 0;
1270 /* Table of valid machine attributes. */
1271 const struct attribute_spec ix86_attribute_table[] =
1273 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1274 /* Stdcall attribute says callee is responsible for popping arguments
1275 if they are not variable. */
1276 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1277 /* Cdecl attribute says the callee is a normal C declaration */
1278 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1279 /* Regparm attribute specifies how many integer arguments are to be
1280 passed in registers. */
1281 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1282 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1283 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1284 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1285 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1287 { NULL, 0, 0, false, false, false, NULL }
1290 /* Handle a "cdecl" or "stdcall" attribute;
1291 arguments as in struct attribute_spec.handler. */
1293 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1296 tree args ATTRIBUTE_UNUSED;
1297 int flags ATTRIBUTE_UNUSED;
1300 if (TREE_CODE (*node) != FUNCTION_TYPE
1301 && TREE_CODE (*node) != METHOD_TYPE
1302 && TREE_CODE (*node) != FIELD_DECL
1303 && TREE_CODE (*node) != TYPE_DECL)
1305 warning ("`%s' attribute only applies to functions",
1306 IDENTIFIER_POINTER (name));
1307 *no_add_attrs = true;
1312 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1313 *no_add_attrs = true;
1319 /* Handle a "regparm" attribute;
1320 arguments as in struct attribute_spec.handler. */
1322 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1326 int flags ATTRIBUTE_UNUSED;
1329 if (TREE_CODE (*node) != FUNCTION_TYPE
1330 && TREE_CODE (*node) != METHOD_TYPE
1331 && TREE_CODE (*node) != FIELD_DECL
1332 && TREE_CODE (*node) != TYPE_DECL)
1334 warning ("`%s' attribute only applies to functions",
1335 IDENTIFIER_POINTER (name));
1336 *no_add_attrs = true;
1342 cst = TREE_VALUE (args);
1343 if (TREE_CODE (cst) != INTEGER_CST)
1345 warning ("`%s' attribute requires an integer constant argument",
1346 IDENTIFIER_POINTER (name));
1347 *no_add_attrs = true;
1349 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1351 warning ("argument to `%s' attribute larger than %d",
1352 IDENTIFIER_POINTER (name), REGPARM_MAX);
1353 *no_add_attrs = true;
1360 /* Return 0 if the attributes for two types are incompatible, 1 if they
1361 are compatible, and 2 if they are nearly compatible (which causes a
1362 warning to be generated). */
1365 ix86_comp_type_attributes (type1, type2)
1369 /* Check for mismatch of non-default calling convention. */
1370 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1372 if (TREE_CODE (type1) != FUNCTION_TYPE)
1375 /* Check for mismatched return types (cdecl vs stdcall). */
1376 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1377 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1382 /* Value is the number of bytes of arguments automatically
1383 popped when returning from a subroutine call.
1384 FUNDECL is the declaration node of the function (as a tree),
1385 FUNTYPE is the data type of the function (as a tree),
1386 or for a library call it is an identifier node for the subroutine name.
1387 SIZE is the number of bytes of arguments passed on the stack.
1389 On the 80386, the RTD insn may be used to pop them if the number
1390 of args is fixed, but if the number is variable then the caller
1391 must pop them all. RTD can't be used for library calls now
1392 because the library is compiled with the Unix compiler.
1393 Use of RTD is a selectable option, since it is incompatible with
1394 standard Unix calling sequences. If the option is not selected,
1395 the caller must always pop the args.
1397 The attribute stdcall is equivalent to RTD on a per module basis. */
1400 ix86_return_pops_args (fundecl, funtype, size)
1405 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1407 /* Cdecl functions override -mrtd, and never pop the stack. */
1408 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1410 /* Stdcall functions will pop the stack if not variable args. */
1411 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1415 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1416 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1417 == void_type_node)))
1421 /* Lose any fake structure return argument if it is passed on the stack. */
1422 if (aggregate_value_p (TREE_TYPE (funtype))
1425 int nregs = ix86_regparm;
1429 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1432 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1436 return GET_MODE_SIZE (Pmode);
1442 /* Argument support functions. */
1444 /* Return true when register may be used to pass function parameters. */
1446 ix86_function_arg_regno_p (regno)
1451 return (regno < REGPARM_MAX
1452 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1453 if (SSE_REGNO_P (regno) && TARGET_SSE)
1455 /* RAX is used as hidden argument to va_arg functions. */
1458 for (i = 0; i < REGPARM_MAX; i++)
1459 if (regno == x86_64_int_parameter_registers[i])
1464 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1465 for a call to a function whose data type is FNTYPE.
1466 For a library call, FNTYPE is 0. */
1469 init_cumulative_args (cum, fntype, libname)
1470 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1471 tree fntype; /* tree ptr for function decl */
1472 rtx libname; /* SYMBOL_REF of library name or 0 */
1474 static CUMULATIVE_ARGS zero_cum;
1475 tree param, next_param;
1477 if (TARGET_DEBUG_ARG)
1479 fprintf (stderr, "\ninit_cumulative_args (");
1481 fprintf (stderr, "fntype code = %s, ret code = %s",
1482 tree_code_name[(int) TREE_CODE (fntype)],
1483 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1485 fprintf (stderr, "no fntype");
1488 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1493 /* Set up the number of registers to use for passing arguments. */
1494 cum->nregs = ix86_regparm;
1495 cum->sse_nregs = SSE_REGPARM_MAX;
1496 if (fntype && !TARGET_64BIT)
1498 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1501 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1503 cum->maybe_vaarg = false;
1505 /* Determine if this function has variable arguments. This is
1506 indicated by the last argument being 'void_type_mode' if there
1507 are no variable arguments. If there are variable arguments, then
1508 we won't pass anything in registers */
1512 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1513 param != 0; param = next_param)
1515 next_param = TREE_CHAIN (param);
1516 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1520 cum->maybe_vaarg = true;
1524 if ((!fntype && !libname)
1525 || (fntype && !TYPE_ARG_TYPES (fntype)))
1526 cum->maybe_vaarg = 1;
1528 if (TARGET_DEBUG_ARG)
1529 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1534 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1535 of this code is to classify each 8bytes of incoming argument by the register
1536 class and assign registers accordingly. */
1538 /* Return the union class of CLASS1 and CLASS2.
1539 See the x86-64 PS ABI for details. */
1541 static enum x86_64_reg_class
1542 merge_classes (class1, class2)
1543 enum x86_64_reg_class class1, class2;
1545 /* Rule #1: If both classes are equal, this is the resulting class. */
1546 if (class1 == class2)
1549 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1551 if (class1 == X86_64_NO_CLASS)
1553 if (class2 == X86_64_NO_CLASS)
1556 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1557 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1558 return X86_64_MEMORY_CLASS;
1560 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1561 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1562 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1563 return X86_64_INTEGERSI_CLASS;
1564 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1565 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1566 return X86_64_INTEGER_CLASS;
1568 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1569 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1570 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1571 return X86_64_MEMORY_CLASS;
1573 /* Rule #6: Otherwise class SSE is used. */
1574 return X86_64_SSE_CLASS;
1577 /* Classify the argument of type TYPE and mode MODE.
1578 CLASSES will be filled by the register class used to pass each word
1579 of the operand. The number of words is returned. In case the parameter
1580 should be passed in memory, 0 is returned. As a special case for zero
1581 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1583 BIT_OFFSET is used internally for handling records and specifies offset
1584 of the offset in bits modulo 256 to avoid overflow cases.
1586 See the x86-64 PS ABI for details.
1590 classify_argument (mode, type, classes, bit_offset)
1591 enum machine_mode mode;
1593 enum x86_64_reg_class classes[MAX_CLASSES];
1597 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1598 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1600 /* Variable sized entities are always passed/returned in memory. */
1604 if (type && AGGREGATE_TYPE_P (type))
1608 enum x86_64_reg_class subclasses[MAX_CLASSES];
1610 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1614 for (i = 0; i < words; i++)
1615 classes[i] = X86_64_NO_CLASS;
1617 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1618 signalize memory class, so handle it as special case. */
1621 classes[0] = X86_64_NO_CLASS;
1625 /* Classify each field of record and merge classes. */
1626 if (TREE_CODE (type) == RECORD_TYPE)
1628 /* For classes first merge in the field of the subclasses. */
1629 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1631 tree bases = TYPE_BINFO_BASETYPES (type);
1632 int n_bases = TREE_VEC_LENGTH (bases);
1635 for (i = 0; i < n_bases; ++i)
1637 tree binfo = TREE_VEC_ELT (bases, i);
1639 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1640 tree type = BINFO_TYPE (binfo);
1642 num = classify_argument (TYPE_MODE (type),
1644 (offset + bit_offset) % 256);
1647 for (i = 0; i < num; i++)
1649 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1651 merge_classes (subclasses[i], classes[i + pos]);
1655 /* And now merge the fields of structure. */
1656 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1658 if (TREE_CODE (field) == FIELD_DECL)
1662 /* Bitfields are always classified as integer. Handle them
1663 early, since later code would consider them to be
1664 misaligned integers. */
1665 if (DECL_BIT_FIELD (field))
1667 for (i = int_bit_position (field) / 8 / 8;
1668 i < (int_bit_position (field)
1669 + tree_low_cst (DECL_SIZE (field), 0)
1672 merge_classes (X86_64_INTEGER_CLASS,
1677 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1678 TREE_TYPE (field), subclasses,
1679 (int_bit_position (field)
1680 + bit_offset) % 256);
1683 for (i = 0; i < num; i++)
1686 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1688 merge_classes (subclasses[i], classes[i + pos]);
1694 /* Arrays are handled as small records. */
1695 else if (TREE_CODE (type) == ARRAY_TYPE)
1698 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1699 TREE_TYPE (type), subclasses, bit_offset);
1703 /* The partial classes are now full classes. */
1704 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1705 subclasses[0] = X86_64_SSE_CLASS;
1706 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1707 subclasses[0] = X86_64_INTEGER_CLASS;
1709 for (i = 0; i < words; i++)
1710 classes[i] = subclasses[i % num];
1712 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1713 else if (TREE_CODE (type) == UNION_TYPE
1714 || TREE_CODE (type) == QUAL_UNION_TYPE)
1716 /* For classes first merge in the field of the subclasses. */
1717 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1719 tree bases = TYPE_BINFO_BASETYPES (type);
1720 int n_bases = TREE_VEC_LENGTH (bases);
1723 for (i = 0; i < n_bases; ++i)
1725 tree binfo = TREE_VEC_ELT (bases, i);
1727 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1728 tree type = BINFO_TYPE (binfo);
1730 num = classify_argument (TYPE_MODE (type),
1732 (offset + (bit_offset % 64)) % 256);
1735 for (i = 0; i < num; i++)
1737 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1739 merge_classes (subclasses[i], classes[i + pos]);
1743 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1745 if (TREE_CODE (field) == FIELD_DECL)
1748 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1749 TREE_TYPE (field), subclasses,
1753 for (i = 0; i < num; i++)
1754 classes[i] = merge_classes (subclasses[i], classes[i]);
1761 /* Final merger cleanup. */
1762 for (i = 0; i < words; i++)
1764 /* If one class is MEMORY, everything should be passed in
1766 if (classes[i] == X86_64_MEMORY_CLASS)
1769 /* The X86_64_SSEUP_CLASS should be always preceded by
1770 X86_64_SSE_CLASS. */
1771 if (classes[i] == X86_64_SSEUP_CLASS
1772 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1773 classes[i] = X86_64_SSE_CLASS;
1775 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1776 if (classes[i] == X86_64_X87UP_CLASS
1777 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1778 classes[i] = X86_64_SSE_CLASS;
1783 /* Compute alignment needed. We align all types to natural boundaries with
1784 exception of XFmode that is aligned to 64bits. */
1785 if (mode != VOIDmode && mode != BLKmode)
1787 int mode_alignment = GET_MODE_BITSIZE (mode);
1790 mode_alignment = 128;
1791 else if (mode == XCmode)
1792 mode_alignment = 256;
1793 /* Misaligned fields are always returned in memory. */
1794 if (bit_offset % mode_alignment)
1798 /* Classification of atomic types. */
1808 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1809 classes[0] = X86_64_INTEGERSI_CLASS;
1811 classes[0] = X86_64_INTEGER_CLASS;
1815 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1818 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1819 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1822 if (!(bit_offset % 64))
1823 classes[0] = X86_64_SSESF_CLASS;
1825 classes[0] = X86_64_SSE_CLASS;
1828 classes[0] = X86_64_SSEDF_CLASS;
1831 classes[0] = X86_64_X87_CLASS;
1832 classes[1] = X86_64_X87UP_CLASS;
1835 classes[0] = X86_64_X87_CLASS;
1836 classes[1] = X86_64_X87UP_CLASS;
1837 classes[2] = X86_64_X87_CLASS;
1838 classes[3] = X86_64_X87UP_CLASS;
1841 classes[0] = X86_64_SSEDF_CLASS;
1842 classes[1] = X86_64_SSEDF_CLASS;
1845 classes[0] = X86_64_SSE_CLASS;
1853 classes[0] = X86_64_SSE_CLASS;
1854 classes[1] = X86_64_SSEUP_CLASS;
1860 classes[0] = X86_64_SSE_CLASS;
1870 /* Examine the argument and return set number of register required in each
1871 class. Return 0 iff parameter should be passed in memory. */
1873 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1874 enum machine_mode mode;
1876 int *int_nregs, *sse_nregs;
1879 enum x86_64_reg_class class[MAX_CLASSES];
1880 int n = classify_argument (mode, type, class, 0);
1886 for (n--; n >= 0; n--)
1889 case X86_64_INTEGER_CLASS:
1890 case X86_64_INTEGERSI_CLASS:
1893 case X86_64_SSE_CLASS:
1894 case X86_64_SSESF_CLASS:
1895 case X86_64_SSEDF_CLASS:
1898 case X86_64_NO_CLASS:
1899 case X86_64_SSEUP_CLASS:
1901 case X86_64_X87_CLASS:
1902 case X86_64_X87UP_CLASS:
1906 case X86_64_MEMORY_CLASS:
1911 /* Construct container for the argument used by GCC interface. See
1912 FUNCTION_ARG for the detailed description. */
1914 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1915 enum machine_mode mode;
1918 int nintregs, nsseregs;
1922 enum machine_mode tmpmode;
1924 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1925 enum x86_64_reg_class class[MAX_CLASSES];
1929 int needed_sseregs, needed_intregs;
1930 rtx exp[MAX_CLASSES];
1933 n = classify_argument (mode, type, class, 0);
1934 if (TARGET_DEBUG_ARG)
1937 fprintf (stderr, "Memory class\n");
1940 fprintf (stderr, "Classes:");
1941 for (i = 0; i < n; i++)
1943 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1945 fprintf (stderr, "\n");
1950 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1952 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1955 /* First construct simple cases. Avoid SCmode, since we want to use
1956 single register to pass this type. */
1957 if (n == 1 && mode != SCmode)
1960 case X86_64_INTEGER_CLASS:
1961 case X86_64_INTEGERSI_CLASS:
1962 return gen_rtx_REG (mode, intreg[0]);
1963 case X86_64_SSE_CLASS:
1964 case X86_64_SSESF_CLASS:
1965 case X86_64_SSEDF_CLASS:
1966 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1967 case X86_64_X87_CLASS:
1968 return gen_rtx_REG (mode, FIRST_STACK_REG);
1969 case X86_64_NO_CLASS:
1970 /* Zero sized array, struct or class. */
1975 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1976 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1978 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1979 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1980 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1981 && class[1] == X86_64_INTEGER_CLASS
1982 && (mode == CDImode || mode == TImode)
1983 && intreg[0] + 1 == intreg[1])
1984 return gen_rtx_REG (mode, intreg[0]);
1986 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1987 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1988 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1990 /* Otherwise figure out the entries of the PARALLEL. */
1991 for (i = 0; i < n; i++)
1995 case X86_64_NO_CLASS:
1997 case X86_64_INTEGER_CLASS:
1998 case X86_64_INTEGERSI_CLASS:
1999 /* Merge TImodes on aligned occassions here too. */
2000 if (i * 8 + 8 > bytes)
2001 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2002 else if (class[i] == X86_64_INTEGERSI_CLASS)
2006 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2007 if (tmpmode == BLKmode)
2009 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2010 gen_rtx_REG (tmpmode, *intreg),
2014 case X86_64_SSESF_CLASS:
2015 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2016 gen_rtx_REG (SFmode,
2017 SSE_REGNO (sse_regno)),
2021 case X86_64_SSEDF_CLASS:
2022 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2023 gen_rtx_REG (DFmode,
2024 SSE_REGNO (sse_regno)),
2028 case X86_64_SSE_CLASS:
2029 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2030 tmpmode = TImode, i++;
2033 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2034 gen_rtx_REG (tmpmode,
2035 SSE_REGNO (sse_regno)),
2043 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2044 for (i = 0; i < nexps; i++)
2045 XVECEXP (ret, 0, i) = exp [i];
2049 /* Update the data in CUM to advance over an argument
2050 of mode MODE and data type TYPE.
2051 (TYPE is null for libcalls where that information may not be available.) */
2054 function_arg_advance (cum, mode, type, named)
2055 CUMULATIVE_ARGS *cum; /* current arg information */
2056 enum machine_mode mode; /* current arg mode */
2057 tree type; /* type of the argument or 0 if lib support */
2058 int named; /* whether or not the argument was named */
2061 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2062 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2064 if (TARGET_DEBUG_ARG)
2066 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2067 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2070 int int_nregs, sse_nregs;
2071 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2072 cum->words += words;
2073 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2075 cum->nregs -= int_nregs;
2076 cum->sse_nregs -= sse_nregs;
2077 cum->regno += int_nregs;
2078 cum->sse_regno += sse_nregs;
2081 cum->words += words;
2085 if (TARGET_SSE && mode == TImode)
2087 cum->sse_words += words;
2088 cum->sse_nregs -= 1;
2089 cum->sse_regno += 1;
2090 if (cum->sse_nregs <= 0)
2098 cum->words += words;
2099 cum->nregs -= words;
2100 cum->regno += words;
2102 if (cum->nregs <= 0)
2112 /* Define where to put the arguments to a function.
2113 Value is zero to push the argument on the stack,
2114 or a hard register in which to store the argument.
2116 MODE is the argument's machine mode.
2117 TYPE is the data type of the argument (as a tree).
2118 This is null for libcalls where that information may
2120 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2121 the preceding args and about the function being called.
2122 NAMED is nonzero if this argument is a named parameter
2123 (otherwise it is an extra parameter matching an ellipsis). */
2126 function_arg (cum, mode, type, named)
2127 CUMULATIVE_ARGS *cum; /* current arg information */
2128 enum machine_mode mode; /* current arg mode */
2129 tree type; /* type of the argument or 0 if lib support */
2130 int named; /* != 0 for normal args, == 0 for ... args */
2134 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2135 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2137 /* Handle an hidden AL argument containing number of registers for varargs
2138 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2140 if (mode == VOIDmode)
2143 return GEN_INT (cum->maybe_vaarg
2144 ? (cum->sse_nregs < 0
2152 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2153 &x86_64_int_parameter_registers [cum->regno],
2158 /* For now, pass fp/complex values on the stack. */
2167 if (words <= cum->nregs)
2168 ret = gen_rtx_REG (mode, cum->regno);
2172 ret = gen_rtx_REG (mode, cum->sse_regno);
2176 if (TARGET_DEBUG_ARG)
2179 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2180 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2183 print_simple_rtl (stderr, ret);
2185 fprintf (stderr, ", stack");
2187 fprintf (stderr, " )\n");
2193 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2197 ix86_function_arg_boundary (mode, type)
2198 enum machine_mode mode;
2203 return PARM_BOUNDARY;
2205 align = TYPE_ALIGN (type);
2207 align = GET_MODE_ALIGNMENT (mode);
2208 if (align < PARM_BOUNDARY)
2209 align = PARM_BOUNDARY;
2215 /* Return true if N is a possible register number of function value. */
2217 ix86_function_value_regno_p (regno)
2222 return ((regno) == 0
2223 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2224 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2226 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2227 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2228 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2231 /* Define how to find the value returned by a function.
2232 VALTYPE is the data type of the value (as a tree).
2233 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2234 otherwise, FUNC is 0. */
2236 ix86_function_value (valtype)
2241 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2242 REGPARM_MAX, SSE_REGPARM_MAX,
2243 x86_64_int_return_registers, 0);
2244 /* For zero sized structures, construct_continer return NULL, but we need
2245 to keep rest of compiler happy by returning meaningfull value. */
2247 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2251 return gen_rtx_REG (TYPE_MODE (valtype),
2252 ix86_value_regno (TYPE_MODE (valtype)));
2255 /* Return false iff type is returned in memory. */
2257 ix86_return_in_memory (type)
2260 int needed_intregs, needed_sseregs;
2263 return !examine_argument (TYPE_MODE (type), type, 1,
2264 &needed_intregs, &needed_sseregs);
2268 if (TYPE_MODE (type) == BLKmode
2269 || (VECTOR_MODE_P (TYPE_MODE (type))
2270 && int_size_in_bytes (type) == 8)
2271 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2272 && TYPE_MODE (type) != TFmode
2273 && !VECTOR_MODE_P (TYPE_MODE (type))))
2279 /* Define how to find the value returned by a library function
2280 assuming the value has mode MODE. */
2282 ix86_libcall_value (mode)
2283 enum machine_mode mode;
2293 return gen_rtx_REG (mode, FIRST_SSE_REG);
2296 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2298 return gen_rtx_REG (mode, 0);
2302 return gen_rtx_REG (mode, ix86_value_regno (mode));
2305 /* Given a mode, return the register to use for a return value. */
2308 ix86_value_regno (mode)
2309 enum machine_mode mode;
2311 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2312 return FIRST_FLOAT_REG;
2313 if (mode == TImode || VECTOR_MODE_P (mode))
2314 return FIRST_SSE_REG;
2318 /* Create the va_list data type. */
2321 ix86_build_va_list ()
2323 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2325 /* For i386 we use plain pointer to argument area. */
2327 return build_pointer_type (char_type_node);
2329 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2330 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2332 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2333 unsigned_type_node);
2334 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2335 unsigned_type_node);
2336 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2338 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2341 DECL_FIELD_CONTEXT (f_gpr) = record;
2342 DECL_FIELD_CONTEXT (f_fpr) = record;
2343 DECL_FIELD_CONTEXT (f_ovf) = record;
2344 DECL_FIELD_CONTEXT (f_sav) = record;
2346 TREE_CHAIN (record) = type_decl;
2347 TYPE_NAME (record) = type_decl;
2348 TYPE_FIELDS (record) = f_gpr;
2349 TREE_CHAIN (f_gpr) = f_fpr;
2350 TREE_CHAIN (f_fpr) = f_ovf;
2351 TREE_CHAIN (f_ovf) = f_sav;
2353 layout_type (record);
2355 /* The correct type is an array type of one element. */
2356 return build_array_type (record, build_index_type (size_zero_node));
2359 /* Perform any needed actions needed for a function that is receiving a
2360 variable number of arguments.
2364 MODE and TYPE are the mode and type of the current parameter.
2366 PRETEND_SIZE is a variable that should be set to the amount of stack
2367 that must be pushed by the prolog to pretend that our caller pushed
2370 Normally, this macro will push all remaining incoming registers on the
2371 stack and set PRETEND_SIZE to the length of the registers pushed. */
2374 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2375 CUMULATIVE_ARGS *cum;
2376 enum machine_mode mode;
2378 int *pretend_size ATTRIBUTE_UNUSED;
2382 CUMULATIVE_ARGS next_cum;
2383 rtx save_area = NULL_RTX, mem;
2396 /* Indicate to allocate space on the stack for varargs save area. */
2397 ix86_save_varrargs_registers = 1;
2399 fntype = TREE_TYPE (current_function_decl);
2400 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2401 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2402 != void_type_node));
2404 /* For varargs, we do not want to skip the dummy va_dcl argument.
2405 For stdargs, we do want to skip the last named argument. */
2408 function_arg_advance (&next_cum, mode, type, 1);
2411 save_area = frame_pointer_rtx;
2413 set = get_varargs_alias_set ();
2415 for (i = next_cum.regno; i < ix86_regparm; i++)
2417 mem = gen_rtx_MEM (Pmode,
2418 plus_constant (save_area, i * UNITS_PER_WORD));
2419 set_mem_alias_set (mem, set);
2420 emit_move_insn (mem, gen_rtx_REG (Pmode,
2421 x86_64_int_parameter_registers[i]));
2424 if (next_cum.sse_nregs)
2426 /* Now emit code to save SSE registers. The AX parameter contains number
2427 of SSE parameter regsiters used to call this function. We use
2428 sse_prologue_save insn template that produces computed jump across
2429 SSE saves. We need some preparation work to get this working. */
2431 label = gen_label_rtx ();
2432 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2434 /* Compute address to jump to :
2435 label - 5*eax + nnamed_sse_arguments*5 */
2436 tmp_reg = gen_reg_rtx (Pmode);
2437 nsse_reg = gen_reg_rtx (Pmode);
2438 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2439 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2440 gen_rtx_MULT (Pmode, nsse_reg,
2442 if (next_cum.sse_regno)
2445 gen_rtx_CONST (DImode,
2446 gen_rtx_PLUS (DImode,
2448 GEN_INT (next_cum.sse_regno * 4))));
2450 emit_move_insn (nsse_reg, label_ref);
2451 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2453 /* Compute address of memory block we save into. We always use pointer
2454 pointing 127 bytes after first byte to store - this is needed to keep
2455 instruction size limited by 4 bytes. */
2456 tmp_reg = gen_reg_rtx (Pmode);
2457 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2458 plus_constant (save_area,
2459 8 * REGPARM_MAX + 127)));
2460 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2461 set_mem_alias_set (mem, set);
2462 set_mem_align (mem, BITS_PER_WORD);
2464 /* And finally do the dirty job! */
2465 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2466 GEN_INT (next_cum.sse_regno), label));
2471 /* Implement va_start. */
2474 ix86_va_start (valist, nextarg)
2478 HOST_WIDE_INT words, n_gpr, n_fpr;
2479 tree f_gpr, f_fpr, f_ovf, f_sav;
2480 tree gpr, fpr, ovf, sav, t;
2482 /* Only 64bit target needs something special. */
2485 std_expand_builtin_va_start (valist, nextarg);
2489 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2490 f_fpr = TREE_CHAIN (f_gpr);
2491 f_ovf = TREE_CHAIN (f_fpr);
2492 f_sav = TREE_CHAIN (f_ovf);
2494 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2495 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2496 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2497 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2498 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2500 /* Count number of gp and fp argument registers used. */
2501 words = current_function_args_info.words;
2502 n_gpr = current_function_args_info.regno;
2503 n_fpr = current_function_args_info.sse_regno;
2505 if (TARGET_DEBUG_ARG)
2506 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2507 (int) words, (int) n_gpr, (int) n_fpr);
2509 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2510 build_int_2 (n_gpr * 8, 0));
2511 TREE_SIDE_EFFECTS (t) = 1;
2512 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2514 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2515 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2516 TREE_SIDE_EFFECTS (t) = 1;
2517 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2519 /* Find the overflow area. */
2520 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2522 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2523 build_int_2 (words * UNITS_PER_WORD, 0));
2524 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2525 TREE_SIDE_EFFECTS (t) = 1;
2526 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2528 /* Find the register save area.
2529 Prologue of the function save it right above stack frame. */
2530 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2531 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2532 TREE_SIDE_EFFECTS (t) = 1;
2533 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2536 /* Implement va_arg. */
2538 ix86_va_arg (valist, type)
2541 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2542 tree f_gpr, f_fpr, f_ovf, f_sav;
2543 tree gpr, fpr, ovf, sav, t;
2545 rtx lab_false, lab_over = NULL_RTX;
2549 /* Only 64bit target needs something special. */
2552 return std_expand_builtin_va_arg (valist, type);
2555 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2556 f_fpr = TREE_CHAIN (f_gpr);
2557 f_ovf = TREE_CHAIN (f_fpr);
2558 f_sav = TREE_CHAIN (f_ovf);
2560 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2561 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2562 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2563 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2564 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2566 size = int_size_in_bytes (type);
2567 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2569 container = construct_container (TYPE_MODE (type), type, 0,
2570 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2572 * Pull the value out of the saved registers ...
2575 addr_rtx = gen_reg_rtx (Pmode);
2579 rtx int_addr_rtx, sse_addr_rtx;
2580 int needed_intregs, needed_sseregs;
2583 lab_over = gen_label_rtx ();
2584 lab_false = gen_label_rtx ();
2586 examine_argument (TYPE_MODE (type), type, 0,
2587 &needed_intregs, &needed_sseregs);
2590 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2591 || TYPE_ALIGN (type) > 128);
2593 /* In case we are passing structure, verify that it is consetuctive block
2594 on the register save area. If not we need to do moves. */
2595 if (!need_temp && !REG_P (container))
2597 /* Verify that all registers are strictly consetuctive */
2598 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2602 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2604 rtx slot = XVECEXP (container, 0, i);
2605 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2606 || INTVAL (XEXP (slot, 1)) != i * 16)
2614 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2616 rtx slot = XVECEXP (container, 0, i);
2617 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2618 || INTVAL (XEXP (slot, 1)) != i * 8)
2625 int_addr_rtx = addr_rtx;
2626 sse_addr_rtx = addr_rtx;
2630 int_addr_rtx = gen_reg_rtx (Pmode);
2631 sse_addr_rtx = gen_reg_rtx (Pmode);
2633 /* First ensure that we fit completely in registers. */
2636 emit_cmp_and_jump_insns (expand_expr
2637 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2638 GEN_INT ((REGPARM_MAX - needed_intregs +
2639 1) * 8), GE, const1_rtx, SImode,
2644 emit_cmp_and_jump_insns (expand_expr
2645 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2646 GEN_INT ((SSE_REGPARM_MAX -
2647 needed_sseregs + 1) * 16 +
2648 REGPARM_MAX * 8), GE, const1_rtx,
2649 SImode, 1, lab_false);
2652 /* Compute index to start of area used for integer regs. */
2655 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2656 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2657 if (r != int_addr_rtx)
2658 emit_move_insn (int_addr_rtx, r);
2662 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2663 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2664 if (r != sse_addr_rtx)
2665 emit_move_insn (sse_addr_rtx, r);
2672 /* Never use the memory itself, as it has the alias set. */
2673 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2674 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2675 set_mem_alias_set (mem, get_varargs_alias_set ());
2676 set_mem_align (mem, BITS_PER_UNIT);
2678 for (i = 0; i < XVECLEN (container, 0); i++)
2680 rtx slot = XVECEXP (container, 0, i);
2681 rtx reg = XEXP (slot, 0);
2682 enum machine_mode mode = GET_MODE (reg);
2688 if (SSE_REGNO_P (REGNO (reg)))
2690 src_addr = sse_addr_rtx;
2691 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2695 src_addr = int_addr_rtx;
2696 src_offset = REGNO (reg) * 8;
2698 src_mem = gen_rtx_MEM (mode, src_addr);
2699 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2700 src_mem = adjust_address (src_mem, mode, src_offset);
2701 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2702 emit_move_insn (dest_mem, src_mem);
2709 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2710 build_int_2 (needed_intregs * 8, 0));
2711 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2712 TREE_SIDE_EFFECTS (t) = 1;
2713 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2718 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2719 build_int_2 (needed_sseregs * 16, 0));
2720 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2721 TREE_SIDE_EFFECTS (t) = 1;
2722 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2725 emit_jump_insn (gen_jump (lab_over));
2727 emit_label (lab_false);
2730 /* ... otherwise out of the overflow area. */
2732 /* Care for on-stack alignment if needed. */
2733 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2737 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2738 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2739 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2743 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2745 emit_move_insn (addr_rtx, r);
2748 build (PLUS_EXPR, TREE_TYPE (t), t,
2749 build_int_2 (rsize * UNITS_PER_WORD, 0));
2750 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2751 TREE_SIDE_EFFECTS (t) = 1;
2752 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2755 emit_label (lab_over);
2760 /* Return nonzero if OP is either a i387 or SSE fp register. */
2762 any_fp_register_operand (op, mode)
2764 enum machine_mode mode ATTRIBUTE_UNUSED;
2766 return ANY_FP_REG_P (op);
2769 /* Return nonzero if OP is an i387 fp register. */
2771 fp_register_operand (op, mode)
2773 enum machine_mode mode ATTRIBUTE_UNUSED;
2775 return FP_REG_P (op);
2778 /* Return nonzero if OP is a non-fp register_operand. */
2780 register_and_not_any_fp_reg_operand (op, mode)
2782 enum machine_mode mode;
2784 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2787 /* Return nonzero of OP is a register operand other than an
2788 i387 fp register. */
2790 register_and_not_fp_reg_operand (op, mode)
2792 enum machine_mode mode;
2794 return register_operand (op, mode) && !FP_REG_P (op);
2797 /* Return nonzero if OP is general operand representable on x86_64. */
2800 x86_64_general_operand (op, mode)
2802 enum machine_mode mode;
2805 return general_operand (op, mode);
2806 if (nonimmediate_operand (op, mode))
2808 return x86_64_sign_extended_value (op);
2811 /* Return nonzero if OP is general operand representable on x86_64
2812 as either sign extended or zero extended constant. */
2815 x86_64_szext_general_operand (op, mode)
2817 enum machine_mode mode;
2820 return general_operand (op, mode);
2821 if (nonimmediate_operand (op, mode))
2823 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2826 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2829 x86_64_nonmemory_operand (op, mode)
2831 enum machine_mode mode;
2834 return nonmemory_operand (op, mode);
2835 if (register_operand (op, mode))
2837 return x86_64_sign_extended_value (op);
2840 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2843 x86_64_movabs_operand (op, mode)
2845 enum machine_mode mode;
2847 if (!TARGET_64BIT || !flag_pic)
2848 return nonmemory_operand (op, mode);
2849 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2851 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2856 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2859 x86_64_szext_nonmemory_operand (op, mode)
2861 enum machine_mode mode;
2864 return nonmemory_operand (op, mode);
2865 if (register_operand (op, mode))
2867 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2870 /* Return nonzero if OP is immediate operand representable on x86_64. */
2873 x86_64_immediate_operand (op, mode)
2875 enum machine_mode mode;
2878 return immediate_operand (op, mode);
2879 return x86_64_sign_extended_value (op);
2882 /* Return nonzero if OP is immediate operand representable on x86_64. */
2885 x86_64_zext_immediate_operand (op, mode)
2887 enum machine_mode mode ATTRIBUTE_UNUSED;
2889 return x86_64_zero_extended_value (op);
2892 /* Return nonzero if OP is (const_int 1), else return zero. */
2895 const_int_1_operand (op, mode)
2897 enum machine_mode mode ATTRIBUTE_UNUSED;
2899 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2902 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2903 for shift & compare patterns, as shifting by 0 does not change flags),
2904 else return zero. */
2907 const_int_1_31_operand (op, mode)
2909 enum machine_mode mode ATTRIBUTE_UNUSED;
2911 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2914 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2915 reference and a constant. */
2918 symbolic_operand (op, mode)
2920 enum machine_mode mode ATTRIBUTE_UNUSED;
2922 switch (GET_CODE (op))
2930 if (GET_CODE (op) == SYMBOL_REF
2931 || GET_CODE (op) == LABEL_REF
2932 || (GET_CODE (op) == UNSPEC
2933 && (XINT (op, 1) == UNSPEC_GOT
2934 || XINT (op, 1) == UNSPEC_GOTOFF
2935 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2937 if (GET_CODE (op) != PLUS
2938 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2942 if (GET_CODE (op) == SYMBOL_REF
2943 || GET_CODE (op) == LABEL_REF)
2945 /* Only @GOTOFF gets offsets. */
2946 if (GET_CODE (op) != UNSPEC
2947 || XINT (op, 1) != UNSPEC_GOTOFF)
2950 op = XVECEXP (op, 0, 0);
2951 if (GET_CODE (op) == SYMBOL_REF
2952 || GET_CODE (op) == LABEL_REF)
2961 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2964 pic_symbolic_operand (op, mode)
2966 enum machine_mode mode ATTRIBUTE_UNUSED;
2968 if (GET_CODE (op) != CONST)
2973 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2978 if (GET_CODE (op) == UNSPEC)
2980 if (GET_CODE (op) != PLUS
2981 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2984 if (GET_CODE (op) == UNSPEC)
2990 /* Return true if OP is a symbolic operand that resolves locally. */
2993 local_symbolic_operand (op, mode)
2995 enum machine_mode mode ATTRIBUTE_UNUSED;
2997 if (GET_CODE (op) == LABEL_REF)
3000 if (GET_CODE (op) == CONST
3001 && GET_CODE (XEXP (op, 0)) == PLUS
3002 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3003 op = XEXP (XEXP (op, 0), 0);
3005 if (GET_CODE (op) != SYMBOL_REF)
3008 /* These we've been told are local by varasm and encode_section_info
3010 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3013 /* There is, however, a not insubstantial body of code in the rest of
3014 the compiler that assumes it can just stick the results of
3015 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3016 /* ??? This is a hack. Should update the body of the compiler to
3017 always create a DECL an invoke targetm.encode_section_info. */
3018 if (strncmp (XSTR (op, 0), internal_label_prefix,
3019 internal_label_prefix_len) == 0)
3025 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3028 tls_symbolic_operand (op, mode)
3030 enum machine_mode mode ATTRIBUTE_UNUSED;
3032 const char *symbol_str;
3034 if (GET_CODE (op) != SYMBOL_REF)
3036 symbol_str = XSTR (op, 0);
3038 if (symbol_str[0] != '%')
3040 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3044 tls_symbolic_operand_1 (op, kind)
3046 enum tls_model kind;
3048 const char *symbol_str;
3050 if (GET_CODE (op) != SYMBOL_REF)
3052 symbol_str = XSTR (op, 0);
3054 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3058 global_dynamic_symbolic_operand (op, mode)
3060 enum machine_mode mode ATTRIBUTE_UNUSED;
3062 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3066 local_dynamic_symbolic_operand (op, mode)
3068 enum machine_mode mode ATTRIBUTE_UNUSED;
3070 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3074 initial_exec_symbolic_operand (op, mode)
3076 enum machine_mode mode ATTRIBUTE_UNUSED;
3078 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3082 local_exec_symbolic_operand (op, mode)
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3086 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3089 /* Test for a valid operand for a call instruction. Don't allow the
3090 arg pointer register or virtual regs since they may decay into
3091 reg + const, which the patterns can't handle. */
3094 call_insn_operand (op, mode)
3096 enum machine_mode mode ATTRIBUTE_UNUSED;
3098 /* Disallow indirect through a virtual register. This leads to
3099 compiler aborts when trying to eliminate them. */
3100 if (GET_CODE (op) == REG
3101 && (op == arg_pointer_rtx
3102 || op == frame_pointer_rtx
3103 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3104 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3107 /* Disallow `call 1234'. Due to varying assembler lameness this
3108 gets either rejected or translated to `call .+1234'. */
3109 if (GET_CODE (op) == CONST_INT)
3112 /* Explicitly allow SYMBOL_REF even if pic. */
3113 if (GET_CODE (op) == SYMBOL_REF)
3116 /* Otherwise we can allow any general_operand in the address. */
3117 return general_operand (op, Pmode);
3121 constant_call_address_operand (op, mode)
3123 enum machine_mode mode ATTRIBUTE_UNUSED;
3125 if (GET_CODE (op) == CONST
3126 && GET_CODE (XEXP (op, 0)) == PLUS
3127 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3128 op = XEXP (XEXP (op, 0), 0);
3129 return GET_CODE (op) == SYMBOL_REF;
3132 /* Match exactly zero and one. */
3135 const0_operand (op, mode)
3137 enum machine_mode mode;
3139 return op == CONST0_RTX (mode);
3143 const1_operand (op, mode)
3145 enum machine_mode mode ATTRIBUTE_UNUSED;
3147 return op == const1_rtx;
3150 /* Match 2, 4, or 8. Used for leal multiplicands. */
3153 const248_operand (op, mode)
3155 enum machine_mode mode ATTRIBUTE_UNUSED;
3157 return (GET_CODE (op) == CONST_INT
3158 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3161 /* True if this is a constant appropriate for an increment or decremenmt. */
3164 incdec_operand (op, mode)
3166 enum machine_mode mode ATTRIBUTE_UNUSED;
3168 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3169 registers, since carry flag is not set. */
3170 if (TARGET_PENTIUM4 && !optimize_size)
3172 return op == const1_rtx || op == constm1_rtx;
3175 /* Return nonzero if OP is acceptable as operand of DImode shift
3179 shiftdi_operand (op, mode)
3181 enum machine_mode mode ATTRIBUTE_UNUSED;
3184 return nonimmediate_operand (op, mode);
3186 return register_operand (op, mode);
3189 /* Return false if this is the stack pointer, or any other fake
3190 register eliminable to the stack pointer. Otherwise, this is
3193 This is used to prevent esp from being used as an index reg.
3194 Which would only happen in pathological cases. */
3197 reg_no_sp_operand (op, mode)
3199 enum machine_mode mode;
3202 if (GET_CODE (t) == SUBREG)
3204 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3207 return register_operand (op, mode);
3211 mmx_reg_operand (op, mode)
3213 enum machine_mode mode ATTRIBUTE_UNUSED;
3215 return MMX_REG_P (op);
3218 /* Return false if this is any eliminable register. Otherwise
3222 general_no_elim_operand (op, mode)
3224 enum machine_mode mode;
3227 if (GET_CODE (t) == SUBREG)
3229 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3230 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3231 || t == virtual_stack_dynamic_rtx)
3234 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3235 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3238 return general_operand (op, mode);
3241 /* Return false if this is any eliminable register. Otherwise
3242 register_operand or const_int. */
3245 nonmemory_no_elim_operand (op, mode)
3247 enum machine_mode mode;
3250 if (GET_CODE (t) == SUBREG)
3252 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3253 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3254 || t == virtual_stack_dynamic_rtx)
3257 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3260 /* Return false if this is any eliminable register or stack register,
3261 otherwise work like register_operand. */
3264 index_register_operand (op, mode)
3266 enum machine_mode mode;
3269 if (GET_CODE (t) == SUBREG)
3273 if (t == arg_pointer_rtx
3274 || t == frame_pointer_rtx
3275 || t == virtual_incoming_args_rtx
3276 || t == virtual_stack_vars_rtx
3277 || t == virtual_stack_dynamic_rtx
3278 || REGNO (t) == STACK_POINTER_REGNUM)
3281 return general_operand (op, mode);
3284 /* Return true if op is a Q_REGS class register. */
3287 q_regs_operand (op, mode)
3289 enum machine_mode mode;
3291 if (mode != VOIDmode && GET_MODE (op) != mode)
3293 if (GET_CODE (op) == SUBREG)
3294 op = SUBREG_REG (op);
3295 return ANY_QI_REG_P (op);
3298 /* Return true if op is a NON_Q_REGS class register. */
3301 non_q_regs_operand (op, mode)
3303 enum machine_mode mode;
3305 if (mode != VOIDmode && GET_MODE (op) != mode)
3307 if (GET_CODE (op) == SUBREG)
3308 op = SUBREG_REG (op);
3309 return NON_QI_REG_P (op);
3312 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3315 sse_comparison_operator (op, mode)
3317 enum machine_mode mode ATTRIBUTE_UNUSED;
3319 enum rtx_code code = GET_CODE (op);
3322 /* Operations supported directly. */
3332 /* These are equivalent to ones above in non-IEEE comparisons. */
3339 return !TARGET_IEEE_FP;
3344 /* Return 1 if OP is a valid comparison operator in valid mode. */
3346 ix86_comparison_operator (op, mode)
3348 enum machine_mode mode;
3350 enum machine_mode inmode;
3351 enum rtx_code code = GET_CODE (op);
3352 if (mode != VOIDmode && GET_MODE (op) != mode)
3354 if (GET_RTX_CLASS (code) != '<')
3356 inmode = GET_MODE (XEXP (op, 0));
3358 if (inmode == CCFPmode || inmode == CCFPUmode)
3360 enum rtx_code second_code, bypass_code;
3361 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3362 return (bypass_code == NIL && second_code == NIL);
3369 if (inmode == CCmode || inmode == CCGCmode
3370 || inmode == CCGOCmode || inmode == CCNOmode)
3373 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3374 if (inmode == CCmode)
3378 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3386 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3389 fcmov_comparison_operator (op, mode)
3391 enum machine_mode mode;
3393 enum machine_mode inmode;
3394 enum rtx_code code = GET_CODE (op);
3395 if (mode != VOIDmode && GET_MODE (op) != mode)
3397 if (GET_RTX_CLASS (code) != '<')
3399 inmode = GET_MODE (XEXP (op, 0));
3400 if (inmode == CCFPmode || inmode == CCFPUmode)
3402 enum rtx_code second_code, bypass_code;
3403 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3404 if (bypass_code != NIL || second_code != NIL)
3406 code = ix86_fp_compare_code_to_integer (code);
3408 /* i387 supports just limited amount of conditional codes. */
3411 case LTU: case GTU: case LEU: case GEU:
3412 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3415 case ORDERED: case UNORDERED:
3423 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3426 promotable_binary_operator (op, mode)
3428 enum machine_mode mode ATTRIBUTE_UNUSED;
3430 switch (GET_CODE (op))
3433 /* Modern CPUs have same latency for HImode and SImode multiply,
3434 but 386 and 486 do HImode multiply faster. */
3435 return ix86_cpu > PROCESSOR_I486;
3447 /* Nearly general operand, but accept any const_double, since we wish
3448 to be able to drop them into memory rather than have them get pulled
3452 cmp_fp_expander_operand (op, mode)
3454 enum machine_mode mode;
3456 if (mode != VOIDmode && mode != GET_MODE (op))
3458 if (GET_CODE (op) == CONST_DOUBLE)
3460 return general_operand (op, mode);
3463 /* Match an SI or HImode register for a zero_extract. */
3466 ext_register_operand (op, mode)
3468 enum machine_mode mode ATTRIBUTE_UNUSED;
3471 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3472 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3475 if (!register_operand (op, VOIDmode))
3478 /* Be curefull to accept only registers having upper parts. */
3479 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3480 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3483 /* Return 1 if this is a valid binary floating-point operation.
3484 OP is the expression matched, and MODE is its mode. */
3487 binary_fp_operator (op, mode)
3489 enum machine_mode mode;
3491 if (mode != VOIDmode && mode != GET_MODE (op))
3494 switch (GET_CODE (op))
3500 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3508 mult_operator (op, mode)
3510 enum machine_mode mode ATTRIBUTE_UNUSED;
3512 return GET_CODE (op) == MULT;
3516 div_operator (op, mode)
3518 enum machine_mode mode ATTRIBUTE_UNUSED;
3520 return GET_CODE (op) == DIV;
3524 arith_or_logical_operator (op, mode)
3526 enum machine_mode mode;
3528 return ((mode == VOIDmode || GET_MODE (op) == mode)
3529 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3530 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3533 /* Returns 1 if OP is memory operand with a displacement. */
3536 memory_displacement_operand (op, mode)
3538 enum machine_mode mode;
3540 struct ix86_address parts;
3542 if (! memory_operand (op, mode))
3545 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3548 return parts.disp != NULL_RTX;
3551 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3552 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3554 ??? It seems likely that this will only work because cmpsi is an
3555 expander, and no actual insns use this. */
3558 cmpsi_operand (op, mode)
3560 enum machine_mode mode;
3562 if (nonimmediate_operand (op, mode))
3565 if (GET_CODE (op) == AND
3566 && GET_MODE (op) == SImode
3567 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3568 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3569 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3570 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3571 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3572 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3578 /* Returns 1 if OP is memory operand that can not be represented by the
3582 long_memory_operand (op, mode)
3584 enum machine_mode mode;
3586 if (! memory_operand (op, mode))
3589 return memory_address_length (op) != 0;
3592 /* Return nonzero if the rtx is known aligned. */
3595 aligned_operand (op, mode)
3597 enum machine_mode mode;
3599 struct ix86_address parts;
3601 if (!general_operand (op, mode))
3604 /* Registers and immediate operands are always "aligned". */
3605 if (GET_CODE (op) != MEM)
3608 /* Don't even try to do any aligned optimizations with volatiles. */
3609 if (MEM_VOLATILE_P (op))
3614 /* Pushes and pops are only valid on the stack pointer. */
3615 if (GET_CODE (op) == PRE_DEC
3616 || GET_CODE (op) == POST_INC)
3619 /* Decode the address. */
3620 if (! ix86_decompose_address (op, &parts))
3623 if (parts.base && GET_CODE (parts.base) == SUBREG)
3624 parts.base = SUBREG_REG (parts.base);
3625 if (parts.index && GET_CODE (parts.index) == SUBREG)
3626 parts.index = SUBREG_REG (parts.index);
3628 /* Look for some component that isn't known to be aligned. */
3632 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3637 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3642 if (GET_CODE (parts.disp) != CONST_INT
3643 || (INTVAL (parts.disp) & 3) != 0)
3647 /* Didn't find one -- this must be an aligned address. */
3651 /* Return true if the constant is something that can be loaded with
3652 a special instruction. Only handle 0.0 and 1.0; others are less
3656 standard_80387_constant_p (x)
3659 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3661 /* Note that on the 80387, other constants, such as pi, that we should support
3662 too. On some machines, these are much slower to load as standard constant,
3663 than to load from doubles in memory. */
3664 if (x == CONST0_RTX (GET_MODE (x)))
3666 if (x == CONST1_RTX (GET_MODE (x)))
3671 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3674 standard_sse_constant_p (x)
3677 if (GET_CODE (x) != CONST_DOUBLE)
3679 return (x == CONST0_RTX (GET_MODE (x)));
3682 /* Returns 1 if OP contains a symbol reference */
3685 symbolic_reference_mentioned_p (op)
3688 register const char *fmt;
3691 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3694 fmt = GET_RTX_FORMAT (GET_CODE (op));
3695 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3701 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3702 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3706 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3713 /* Return 1 if it is appropriate to emit `ret' instructions in the
3714 body of a function. Do this only if the epilogue is simple, needing a
3715 couple of insns. Prior to reloading, we can't tell how many registers
3716 must be saved, so return 0 then. Return 0 if there is no frame
3717 marker to de-allocate.
3719 If NON_SAVING_SETJMP is defined and true, then it is not possible
3720 for the epilogue to be simple, so return 0. This is a special case
3721 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3722 until final, but jump_optimize may need to know sooner if a
3726 ix86_can_use_return_insn_p ()
3728 struct ix86_frame frame;
3730 #ifdef NON_SAVING_SETJMP
3731 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3735 if (! reload_completed || frame_pointer_needed)
3738 /* Don't allow more than 32 pop, since that's all we can do
3739 with one instruction. */
3740 if (current_function_pops_args
3741 && current_function_args_size >= 32768)
3744 ix86_compute_frame_layout (&frame);
3745 return frame.to_allocate == 0 && frame.nregs == 0;
3748 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3750 x86_64_sign_extended_value (value)
3753 switch (GET_CODE (value))
3755 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3756 to be at least 32 and this all acceptable constants are
3757 represented as CONST_INT. */
3759 if (HOST_BITS_PER_WIDE_INT == 32)
3763 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3764 return trunc_int_for_mode (val, SImode) == val;
3768 /* For certain code models, the symbolic references are known to fit. */
3770 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3772 /* For certain code models, the code is near as well. */
3774 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3776 /* We also may accept the offsetted memory references in certain special
3779 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3780 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3782 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3784 rtx op1 = XEXP (XEXP (value, 0), 0);
3785 rtx op2 = XEXP (XEXP (value, 0), 1);
3786 HOST_WIDE_INT offset;
3788 if (ix86_cmodel == CM_LARGE)
3790 if (GET_CODE (op2) != CONST_INT)
3792 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3793 switch (GET_CODE (op1))
3796 /* For CM_SMALL assume that latest object is 1MB before
3797 end of 31bits boundary. We may also accept pretty
3798 large negative constants knowing that all objects are
3799 in the positive half of address space. */
3800 if (ix86_cmodel == CM_SMALL
3801 && offset < 1024*1024*1024
3802 && trunc_int_for_mode (offset, SImode) == offset)
3804 /* For CM_KERNEL we know that all object resist in the
3805 negative half of 32bits address space. We may not
3806 accept negative offsets, since they may be just off
3807 and we may accept pretty large positive ones. */
3808 if (ix86_cmodel == CM_KERNEL
3810 && trunc_int_for_mode (offset, SImode) == offset)
3814 /* These conditions are similar to SYMBOL_REF ones, just the
3815 constraints for code models differ. */
3816 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3817 && offset < 1024*1024*1024
3818 && trunc_int_for_mode (offset, SImode) == offset)
3820 if (ix86_cmodel == CM_KERNEL
3822 && trunc_int_for_mode (offset, SImode) == offset)
3835 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3837 x86_64_zero_extended_value (value)
3840 switch (GET_CODE (value))
3843 if (HOST_BITS_PER_WIDE_INT == 32)
3844 return (GET_MODE (value) == VOIDmode
3845 && !CONST_DOUBLE_HIGH (value));
3849 if (HOST_BITS_PER_WIDE_INT == 32)
3850 return INTVAL (value) >= 0;
3852 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3855 /* For certain code models, the symbolic references are known to fit. */
3857 return ix86_cmodel == CM_SMALL;
3859 /* For certain code models, the code is near as well. */
3861 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3863 /* We also may accept the offsetted memory references in certain special
3866 if (GET_CODE (XEXP (value, 0)) == PLUS)
3868 rtx op1 = XEXP (XEXP (value, 0), 0);
3869 rtx op2 = XEXP (XEXP (value, 0), 1);
3871 if (ix86_cmodel == CM_LARGE)
3873 switch (GET_CODE (op1))
3877 /* For small code model we may accept pretty large positive
3878 offsets, since one bit is available for free. Negative
3879 offsets are limited by the size of NULL pointer area
3880 specified by the ABI. */
3881 if (ix86_cmodel == CM_SMALL
3882 && GET_CODE (op2) == CONST_INT
3883 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3884 && (trunc_int_for_mode (INTVAL (op2), SImode)
3887 /* ??? For the kernel, we may accept adjustment of
3888 -0x10000000, since we know that it will just convert
3889 negative address space to positive, but perhaps this
3890 is not worthwhile. */
3893 /* These conditions are similar to SYMBOL_REF ones, just the
3894 constraints for code models differ. */
3895 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3896 && GET_CODE (op2) == CONST_INT
3897 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3898 && (trunc_int_for_mode (INTVAL (op2), SImode)
3912 /* Value should be nonzero if functions must have frame pointers.
3913 Zero means the frame pointer need not be set up (and parms may
3914 be accessed via the stack pointer) in functions that seem suitable. */
3917 ix86_frame_pointer_required ()
3919 /* If we accessed previous frames, then the generated code expects
3920 to be able to access the saved ebp value in our frame. */
3921 if (cfun->machine->accesses_prev_frame)
3924 /* Several x86 os'es need a frame pointer for other reasons,
3925 usually pertaining to setjmp. */
3926 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3929 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3930 the frame pointer by default. Turn it back on now if we've not
3931 got a leaf function. */
3932 if (TARGET_OMIT_LEAF_FRAME_POINTER
3933 && (!current_function_is_leaf || current_function_profile))
3939 /* Record that the current function accesses previous call frames. */
3942 ix86_setup_frame_addresses ()
3944 cfun->machine->accesses_prev_frame = 1;
3947 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3948 # define USE_HIDDEN_LINKONCE 1
3950 # define USE_HIDDEN_LINKONCE 0
3953 static int pic_labels_used;
3955 /* Fills in the label name that should be used for a pc thunk for
3956 the given register. */
3959 get_pc_thunk_name (name, regno)
3963 if (USE_HIDDEN_LINKONCE)
3964 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3966 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3970 /* This function generates code for -fpic that loads %ebx with
3971 the return address of the caller and then returns. */
3974 ix86_asm_file_end (file)
3980 for (regno = 0; regno < 8; ++regno)
3984 if (! ((pic_labels_used >> regno) & 1))
3987 get_pc_thunk_name (name, regno);
3989 if (USE_HIDDEN_LINKONCE)
3993 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3995 TREE_PUBLIC (decl) = 1;
3996 TREE_STATIC (decl) = 1;
3997 DECL_ONE_ONLY (decl) = 1;
3999 (*targetm.asm_out.unique_section) (decl, 0);
4000 named_section (decl, NULL, 0);
4002 (*targetm.asm_out.globalize_label) (file, name);
4003 fputs ("\t.hidden\t", file);
4004 assemble_name (file, name);
4006 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4011 ASM_OUTPUT_LABEL (file, name);
4014 xops[0] = gen_rtx_REG (SImode, regno);
4015 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4016 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4017 output_asm_insn ("ret", xops);
4021 /* Emit code for the SET_GOT patterns. */
4024 output_set_got (dest)
4030 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4032 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4034 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4037 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4039 output_asm_insn ("call\t%a2", xops);
4042 /* Output the "canonical" label name ("Lxx$pb") here too. This
4043 is what will be referred to by the Mach-O PIC subsystem. */
4044 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4046 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4047 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4050 output_asm_insn ("pop{l}\t%0", xops);
4055 get_pc_thunk_name (name, REGNO (dest));
4056 pic_labels_used |= 1 << REGNO (dest);
4058 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4059 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4060 output_asm_insn ("call\t%X2", xops);
4063 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4064 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4065 else if (!TARGET_MACHO)
4066 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4071 /* Generate an "push" pattern for input ARG. */
4077 return gen_rtx_SET (VOIDmode,
4079 gen_rtx_PRE_DEC (Pmode,
4080 stack_pointer_rtx)),
4084 /* Return >= 0 if there is an unused call-clobbered register available
4085 for the entire function. */
4088 ix86_select_alt_pic_regnum ()
4090 if (current_function_is_leaf && !current_function_profile)
4093 for (i = 2; i >= 0; --i)
4094 if (!regs_ever_live[i])
4098 return INVALID_REGNUM;
4101 /* Return 1 if we need to save REGNO. */
4103 ix86_save_reg (regno, maybe_eh_return)
4105 int maybe_eh_return;
4107 if (pic_offset_table_rtx
4108 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4109 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4110 || current_function_profile
4111 || current_function_calls_eh_return))
4113 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4118 if (current_function_calls_eh_return && maybe_eh_return)
4123 unsigned test = EH_RETURN_DATA_REGNO (i);
4124 if (test == INVALID_REGNUM)
4131 return (regs_ever_live[regno]
4132 && !call_used_regs[regno]
4133 && !fixed_regs[regno]
4134 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4137 /* Return number of registers to be saved on the stack. */
4145 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4146 if (ix86_save_reg (regno, true))
4151 /* Return the offset between two registers, one to be eliminated, and the other
4152 its replacement, at the start of a routine. */
4155 ix86_initial_elimination_offset (from, to)
4159 struct ix86_frame frame;
4160 ix86_compute_frame_layout (&frame);
4162 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4163 return frame.hard_frame_pointer_offset;
4164 else if (from == FRAME_POINTER_REGNUM
4165 && to == HARD_FRAME_POINTER_REGNUM)
4166 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4169 if (to != STACK_POINTER_REGNUM)
4171 else if (from == ARG_POINTER_REGNUM)
4172 return frame.stack_pointer_offset;
4173 else if (from != FRAME_POINTER_REGNUM)
4176 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4180 /* Fill structure ix86_frame about frame of currently computed function. */
4183 ix86_compute_frame_layout (frame)
4184 struct ix86_frame *frame;
4186 HOST_WIDE_INT total_size;
4187 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4189 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4190 HOST_WIDE_INT size = get_frame_size ();
4192 frame->nregs = ix86_nsaved_regs ();
4195 /* Skip return address and saved base pointer. */
4196 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4198 frame->hard_frame_pointer_offset = offset;
4200 /* Do some sanity checking of stack_alignment_needed and
4201 preferred_alignment, since i386 port is the only using those features
4202 that may break easily. */
4204 if (size && !stack_alignment_needed)
4206 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4208 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4210 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4213 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4214 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4216 /* Register save area */
4217 offset += frame->nregs * UNITS_PER_WORD;
4220 if (ix86_save_varrargs_registers)
4222 offset += X86_64_VARARGS_SIZE;
4223 frame->va_arg_size = X86_64_VARARGS_SIZE;
4226 frame->va_arg_size = 0;
4228 /* Align start of frame for local function. */
4229 frame->padding1 = ((offset + stack_alignment_needed - 1)
4230 & -stack_alignment_needed) - offset;
4232 offset += frame->padding1;
4234 /* Frame pointer points here. */
4235 frame->frame_pointer_offset = offset;
4239 /* Add outgoing arguments area. Can be skipped if we eliminated
4240 all the function calls as dead code. */
4241 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4243 offset += current_function_outgoing_args_size;
4244 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4247 frame->outgoing_arguments_size = 0;
4249 /* Align stack boundary. Only needed if we're calling another function
4251 if (!current_function_is_leaf || current_function_calls_alloca)
4252 frame->padding2 = ((offset + preferred_alignment - 1)
4253 & -preferred_alignment) - offset;
4255 frame->padding2 = 0;
4257 offset += frame->padding2;
4259 /* We've reached end of stack frame. */
4260 frame->stack_pointer_offset = offset;
4262 /* Size prologue needs to allocate. */
4263 frame->to_allocate =
4264 (size + frame->padding1 + frame->padding2
4265 + frame->outgoing_arguments_size + frame->va_arg_size);
4267 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4268 && current_function_is_leaf)
4270 frame->red_zone_size = frame->to_allocate;
4271 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4272 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4275 frame->red_zone_size = 0;
4276 frame->to_allocate -= frame->red_zone_size;
4277 frame->stack_pointer_offset -= frame->red_zone_size;
4279 fprintf (stderr, "nregs: %i\n", frame->nregs);
4280 fprintf (stderr, "size: %i\n", size);
4281 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4282 fprintf (stderr, "padding1: %i\n", frame->padding1);
4283 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4284 fprintf (stderr, "padding2: %i\n", frame->padding2);
4285 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4286 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4287 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4288 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4289 frame->hard_frame_pointer_offset);
4290 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4294 /* Emit code to save registers in the prologue. */
4297 ix86_emit_save_regs ()
4302 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4303 if (ix86_save_reg (regno, true))
4305 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4306 RTX_FRAME_RELATED_P (insn) = 1;
4310 /* Emit code to save registers using MOV insns. First register
4311 is restored from POINTER + OFFSET. */
4313 ix86_emit_save_regs_using_mov (pointer, offset)
4315 HOST_WIDE_INT offset;
4320 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4321 if (ix86_save_reg (regno, true))
4323 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4325 gen_rtx_REG (Pmode, regno));
4326 RTX_FRAME_RELATED_P (insn) = 1;
4327 offset += UNITS_PER_WORD;
4331 /* Expand the prologue into a bunch of separate insns. */
4334 ix86_expand_prologue ()
4338 struct ix86_frame frame;
4340 HOST_WIDE_INT allocate;
4344 use_fast_prologue_epilogue
4345 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4346 if (TARGET_PROLOGUE_USING_MOVE)
4347 use_mov = use_fast_prologue_epilogue;
4349 ix86_compute_frame_layout (&frame);
4351 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4352 slower on all targets. Also sdb doesn't like it. */
4354 if (frame_pointer_needed)
4356 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4357 RTX_FRAME_RELATED_P (insn) = 1;
4359 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4360 RTX_FRAME_RELATED_P (insn) = 1;
4363 allocate = frame.to_allocate;
4364 /* In case we are dealing only with single register and empty frame,
4365 push is equivalent of the mov+add sequence. */
4366 if (allocate == 0 && frame.nregs <= 1)
4370 ix86_emit_save_regs ();
4372 allocate += frame.nregs * UNITS_PER_WORD;
4376 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4378 insn = emit_insn (gen_pro_epilogue_adjust_stack
4379 (stack_pointer_rtx, stack_pointer_rtx,
4380 GEN_INT (-allocate)));
4381 RTX_FRAME_RELATED_P (insn) = 1;
4385 /* ??? Is this only valid for Win32? */
4392 arg0 = gen_rtx_REG (SImode, 0);
4393 emit_move_insn (arg0, GEN_INT (allocate));
4395 sym = gen_rtx_MEM (FUNCTION_MODE,
4396 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4397 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4399 CALL_INSN_FUNCTION_USAGE (insn)
4400 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4401 CALL_INSN_FUNCTION_USAGE (insn));
4405 if (!frame_pointer_needed || !frame.to_allocate)
4406 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4408 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4409 -frame.nregs * UNITS_PER_WORD);
4412 #ifdef SUBTARGET_PROLOGUE
4416 pic_reg_used = false;
4417 if (pic_offset_table_rtx
4418 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4419 || current_function_profile))
4421 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4423 if (alt_pic_reg_used != INVALID_REGNUM)
4424 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4426 pic_reg_used = true;
4431 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4433 /* Even with accurate pre-reload life analysis, we can wind up
4434 deleting all references to the pic register after reload.
4435 Consider if cross-jumping unifies two sides of a branch
4436 controled by a comparison vs the only read from a global.
4437 In which case, allow the set_got to be deleted, though we're
4438 too late to do anything about the ebx save in the prologue. */
4439 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4442 /* Prevent function calls from be scheduled before the call to mcount.
4443 In the pic_reg_used case, make sure that the got load isn't deleted. */
4444 if (current_function_profile)
4445 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4448 /* Emit code to restore saved registers using MOV insns. First register
4449 is restored from POINTER + OFFSET. */
4451 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4454 int maybe_eh_return;
4458 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4459 if (ix86_save_reg (regno, maybe_eh_return))
4461 emit_move_insn (gen_rtx_REG (Pmode, regno),
4462 adjust_address (gen_rtx_MEM (Pmode, pointer),
4464 offset += UNITS_PER_WORD;
4468 /* Restore function stack, frame, and registers. */
4471 ix86_expand_epilogue (style)
4475 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4476 struct ix86_frame frame;
4477 HOST_WIDE_INT offset;
4479 ix86_compute_frame_layout (&frame);
4481 /* Calculate start of saved registers relative to ebp. Special care
4482 must be taken for the normal return case of a function using
4483 eh_return: the eax and edx registers are marked as saved, but not
4484 restored along this path. */
4485 offset = frame.nregs;
4486 if (current_function_calls_eh_return && style != 2)
4488 offset *= -UNITS_PER_WORD;
4490 /* If we're only restoring one register and sp is not valid then
4491 using a move instruction to restore the register since it's
4492 less work than reloading sp and popping the register.
4494 The default code result in stack adjustment using add/lea instruction,
4495 while this code results in LEAVE instruction (or discrete equivalent),
4496 so it is profitable in some other cases as well. Especially when there
4497 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4498 and there is exactly one register to pop. This heruistic may need some
4499 tuning in future. */
4500 if ((!sp_valid && frame.nregs <= 1)
4501 || (TARGET_EPILOGUE_USING_MOVE
4502 && use_fast_prologue_epilogue
4503 && (frame.nregs > 1 || frame.to_allocate))
4504 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4505 || (frame_pointer_needed && TARGET_USE_LEAVE
4506 && use_fast_prologue_epilogue && frame.nregs == 1)
4507 || current_function_calls_eh_return)
4509 /* Restore registers. We can use ebp or esp to address the memory
4510 locations. If both are available, default to ebp, since offsets
4511 are known to be small. Only exception is esp pointing directly to the
4512 end of block of saved registers, where we may simplify addressing
4515 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4516 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4517 frame.to_allocate, style == 2);
4519 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4520 offset, style == 2);
4522 /* eh_return epilogues need %ecx added to the stack pointer. */
4525 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4527 if (frame_pointer_needed)
4529 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4530 tmp = plus_constant (tmp, UNITS_PER_WORD);
4531 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4533 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4534 emit_move_insn (hard_frame_pointer_rtx, tmp);
4536 emit_insn (gen_pro_epilogue_adjust_stack
4537 (stack_pointer_rtx, sa, const0_rtx));
4541 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4542 tmp = plus_constant (tmp, (frame.to_allocate
4543 + frame.nregs * UNITS_PER_WORD));
4544 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4547 else if (!frame_pointer_needed)
4548 emit_insn (gen_pro_epilogue_adjust_stack
4549 (stack_pointer_rtx, stack_pointer_rtx,
4550 GEN_INT (frame.to_allocate
4551 + frame.nregs * UNITS_PER_WORD)));
4552 /* If not an i386, mov & pop is faster than "leave". */
4553 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4554 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4557 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4558 hard_frame_pointer_rtx,
4561 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4563 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4568 /* First step is to deallocate the stack frame so that we can
4569 pop the registers. */
4572 if (!frame_pointer_needed)
4574 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4575 hard_frame_pointer_rtx,
4578 else if (frame.to_allocate)
4579 emit_insn (gen_pro_epilogue_adjust_stack
4580 (stack_pointer_rtx, stack_pointer_rtx,
4581 GEN_INT (frame.to_allocate)));
4583 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4584 if (ix86_save_reg (regno, false))
4587 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4589 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4591 if (frame_pointer_needed)
4593 /* Leave results in shorter dependency chains on CPUs that are
4594 able to grok it fast. */
4595 if (TARGET_USE_LEAVE)
4596 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4597 else if (TARGET_64BIT)
4598 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4600 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4604 /* Sibcall epilogues don't want a return instruction. */
4608 if (current_function_pops_args && current_function_args_size)
4610 rtx popc = GEN_INT (current_function_pops_args);
4612 /* i386 can only pop 64K bytes. If asked to pop more, pop
4613 return address, do explicit add, and jump indirectly to the
4616 if (current_function_pops_args >= 65536)
4618 rtx ecx = gen_rtx_REG (SImode, 2);
4620 /* There are is no "pascal" calling convention in 64bit ABI. */
4624 emit_insn (gen_popsi1 (ecx));
4625 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4626 emit_jump_insn (gen_return_indirect_internal (ecx));
4629 emit_jump_insn (gen_return_pop_internal (popc));
4632 emit_jump_insn (gen_return_internal ());
4635 /* Reset from the function's potential modifications. */
4638 ix86_output_function_epilogue (file, size)
4639 FILE *file ATTRIBUTE_UNUSED;
4640 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4642 if (pic_offset_table_rtx)
4643 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4646 /* Extract the parts of an RTL expression that is a valid memory address
4647 for an instruction. Return 0 if the structure of the address is
4648 grossly off. Return -1 if the address contains ASHIFT, so it is not
4649 strictly valid, but still used for computing length of lea instruction.
4653 ix86_decompose_address (addr, out)
4655 struct ix86_address *out;
4657 rtx base = NULL_RTX;
4658 rtx index = NULL_RTX;
4659 rtx disp = NULL_RTX;
4660 HOST_WIDE_INT scale = 1;
4661 rtx scale_rtx = NULL_RTX;
4664 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4666 else if (GET_CODE (addr) == PLUS)
4668 rtx op0 = XEXP (addr, 0);
4669 rtx op1 = XEXP (addr, 1);
4670 enum rtx_code code0 = GET_CODE (op0);
4671 enum rtx_code code1 = GET_CODE (op1);
4673 if (code0 == REG || code0 == SUBREG)
4675 if (code1 == REG || code1 == SUBREG)
4676 index = op0, base = op1; /* index + base */
4678 base = op0, disp = op1; /* base + displacement */
4680 else if (code0 == MULT)
4682 index = XEXP (op0, 0);
4683 scale_rtx = XEXP (op0, 1);
4684 if (code1 == REG || code1 == SUBREG)
4685 base = op1; /* index*scale + base */
4687 disp = op1; /* index*scale + disp */
4689 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4691 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4692 scale_rtx = XEXP (XEXP (op0, 0), 1);
4693 base = XEXP (op0, 1);
4696 else if (code0 == PLUS)
4698 index = XEXP (op0, 0); /* index + base + disp */
4699 base = XEXP (op0, 1);
4705 else if (GET_CODE (addr) == MULT)
4707 index = XEXP (addr, 0); /* index*scale */
4708 scale_rtx = XEXP (addr, 1);
4710 else if (GET_CODE (addr) == ASHIFT)
4714 /* We're called for lea too, which implements ashift on occasion. */
4715 index = XEXP (addr, 0);
4716 tmp = XEXP (addr, 1);
4717 if (GET_CODE (tmp) != CONST_INT)
4719 scale = INTVAL (tmp);
4720 if ((unsigned HOST_WIDE_INT) scale > 3)
4726 disp = addr; /* displacement */
4728 /* Extract the integral value of scale. */
4731 if (GET_CODE (scale_rtx) != CONST_INT)
4733 scale = INTVAL (scale_rtx);
4736 /* Allow arg pointer and stack pointer as index if there is not scaling */
4737 if (base && index && scale == 1
4738 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4739 || index == stack_pointer_rtx))
4746 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4747 if ((base == hard_frame_pointer_rtx
4748 || base == frame_pointer_rtx
4749 || base == arg_pointer_rtx) && !disp)
4752 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4753 Avoid this by transforming to [%esi+0]. */
4754 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4755 && base && !index && !disp
4757 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4760 /* Special case: encode reg+reg instead of reg*2. */
4761 if (!base && index && scale && scale == 2)
4762 base = index, scale = 1;
4764 /* Special case: scaling cannot be encoded without base or displacement. */
4765 if (!base && !disp && index && scale != 1)
4776 /* Return cost of the memory address x.
4777 For i386, it is better to use a complex address than let gcc copy
4778 the address into a reg and make a new pseudo. But not if the address
4779 requires to two regs - that would mean more pseudos with longer
4782 ix86_address_cost (x)
4785 struct ix86_address parts;
4788 if (!ix86_decompose_address (x, &parts))
4791 if (parts.base && GET_CODE (parts.base) == SUBREG)
4792 parts.base = SUBREG_REG (parts.base);
4793 if (parts.index && GET_CODE (parts.index) == SUBREG)
4794 parts.index = SUBREG_REG (parts.index);
4796 /* More complex memory references are better. */
4797 if (parts.disp && parts.disp != const0_rtx)
4800 /* Attempt to minimize number of registers in the address. */
4802 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4804 && (!REG_P (parts.index)
4805 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4809 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4811 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4812 && parts.base != parts.index)
4815 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4816 since it's predecode logic can't detect the length of instructions
4817 and it degenerates to vector decoded. Increase cost of such
4818 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4819 to split such addresses or even refuse such addresses at all.
4821 Following addressing modes are affected:
4826 The first and last case may be avoidable by explicitly coding the zero in
4827 memory address, but I don't have AMD-K6 machine handy to check this
4831 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4832 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4833 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4839 /* If X is a machine specific address (i.e. a symbol or label being
4840 referenced as a displacement from the GOT implemented using an
4841 UNSPEC), then return the base term. Otherwise return X. */
4844 ix86_find_base_term (x)
4851 if (GET_CODE (x) != CONST)
4854 if (GET_CODE (term) == PLUS
4855 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4856 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4857 term = XEXP (term, 0);
4858 if (GET_CODE (term) != UNSPEC
4859 || XINT (term, 1) != UNSPEC_GOTPCREL)
4862 term = XVECEXP (term, 0, 0);
4864 if (GET_CODE (term) != SYMBOL_REF
4865 && GET_CODE (term) != LABEL_REF)
4871 if (GET_CODE (x) != PLUS
4872 || XEXP (x, 0) != pic_offset_table_rtx
4873 || GET_CODE (XEXP (x, 1)) != CONST)
4876 term = XEXP (XEXP (x, 1), 0);
4878 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4879 term = XEXP (term, 0);
4881 if (GET_CODE (term) != UNSPEC
4882 || XINT (term, 1) != UNSPEC_GOTOFF)
4885 term = XVECEXP (term, 0, 0);
4887 if (GET_CODE (term) != SYMBOL_REF
4888 && GET_CODE (term) != LABEL_REF)
4894 /* Determine if a given RTX is a valid constant. We already know this
4895 satisfies CONSTANT_P. */
4898 legitimate_constant_p (x)
4903 switch (GET_CODE (x))
4906 /* TLS symbols are not constant. */
4907 if (tls_symbolic_operand (x, Pmode))
4912 inner = XEXP (x, 0);
4914 /* Offsets of TLS symbols are never valid.
4915 Discourage CSE from creating them. */
4916 if (GET_CODE (inner) == PLUS
4917 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4920 /* Only some unspecs are valid as "constants". */
4921 if (GET_CODE (inner) == UNSPEC)
4922 switch (XINT (inner, 1))
4925 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4935 /* Otherwise we handle everything else in the move patterns. */
4939 /* Determine if a given RTX is a valid constant address. */
4942 constant_address_p (x)
4945 switch (GET_CODE (x))
4952 return TARGET_64BIT;
4955 /* For Mach-O, really believe the CONST. */
4958 /* Otherwise fall through. */
4960 return !flag_pic && legitimate_constant_p (x);
4967 /* Nonzero if the constant value X is a legitimate general operand
4968 when generating PIC code. It is given that flag_pic is on and
4969 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4972 legitimate_pic_operand_p (x)
4977 switch (GET_CODE (x))
4980 inner = XEXP (x, 0);
4982 /* Only some unspecs are valid as "constants". */
4983 if (GET_CODE (inner) == UNSPEC)
4984 switch (XINT (inner, 1))
4987 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4995 return legitimate_pic_address_disp_p (x);
5002 /* Determine if a given CONST RTX is a valid memory displacement
5006 legitimate_pic_address_disp_p (disp)
5011 /* In 64bit mode we can allow direct addresses of symbols and labels
5012 when they are not dynamic symbols. */
5016 if (GET_CODE (disp) == CONST)
5018 /* ??? Handle PIC code models */
5019 if (GET_CODE (x) == PLUS
5020 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5021 && ix86_cmodel == CM_SMALL_PIC
5022 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5023 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5025 if (local_symbolic_operand (x, Pmode))
5028 if (GET_CODE (disp) != CONST)
5030 disp = XEXP (disp, 0);
5034 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5035 of GOT tables. We should not need these anyway. */
5036 if (GET_CODE (disp) != UNSPEC
5037 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5040 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5041 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5047 if (GET_CODE (disp) == PLUS)
5049 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5051 disp = XEXP (disp, 0);
5055 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5056 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5058 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5059 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5060 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5062 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5063 if (strstr (sym_name, "$pb") != 0)
5068 if (GET_CODE (disp) != UNSPEC)
5071 switch (XINT (disp, 1))
5076 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5078 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5079 case UNSPEC_GOTTPOFF:
5082 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5084 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5086 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5092 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5093 memory address for an instruction. The MODE argument is the machine mode
5094 for the MEM expression that wants to use this address.
5096 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5097 convert common non-canonical forms to canonical form so that they will
5101 legitimate_address_p (mode, addr, strict)
5102 enum machine_mode mode;
5106 struct ix86_address parts;
5107 rtx base, index, disp;
5108 HOST_WIDE_INT scale;
5109 const char *reason = NULL;
5110 rtx reason_rtx = NULL_RTX;
5112 if (TARGET_DEBUG_ADDR)
5115 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5116 GET_MODE_NAME (mode), strict);
5120 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5122 if (TARGET_DEBUG_ADDR)
5123 fprintf (stderr, "Success.\n");
5127 if (ix86_decompose_address (addr, &parts) <= 0)
5129 reason = "decomposition failed";
5134 index = parts.index;
5136 scale = parts.scale;
5138 /* Validate base register.
5140 Don't allow SUBREG's here, it can lead to spill failures when the base
5141 is one word out of a two word structure, which is represented internally
5149 if (GET_CODE (base) == SUBREG)
5150 reg = SUBREG_REG (base);
5154 if (GET_CODE (reg) != REG)
5156 reason = "base is not a register";
5160 if (GET_MODE (base) != Pmode)
5162 reason = "base is not in Pmode";
5166 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5167 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5169 reason = "base is not valid";
5174 /* Validate index register.
5176 Don't allow SUBREG's here, it can lead to spill failures when the index
5177 is one word out of a two word structure, which is represented internally
5185 if (GET_CODE (index) == SUBREG)
5186 reg = SUBREG_REG (index);
5190 if (GET_CODE (reg) != REG)
5192 reason = "index is not a register";
5196 if (GET_MODE (index) != Pmode)
5198 reason = "index is not in Pmode";
5202 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5203 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5205 reason = "index is not valid";
5210 /* Validate scale factor. */
5213 reason_rtx = GEN_INT (scale);
5216 reason = "scale without index";
5220 if (scale != 2 && scale != 4 && scale != 8)
5222 reason = "scale is not a valid multiplier";
5227 /* Validate displacement. */
5234 if (!x86_64_sign_extended_value (disp))
5236 reason = "displacement is out of range";
5242 if (GET_CODE (disp) == CONST_DOUBLE)
5244 reason = "displacement is a const_double";
5249 if (GET_CODE (disp) == CONST
5250 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5251 switch (XINT (XEXP (disp, 0), 1))
5255 case UNSPEC_GOTPCREL:
5258 goto is_legitimate_pic;
5260 case UNSPEC_GOTTPOFF:
5266 reason = "invalid address unspec";
5270 else if (flag_pic && (SYMBOLIC_CONST (disp)
5272 && !machopic_operand_p (disp)
5277 if (TARGET_64BIT && (index || base))
5279 reason = "non-constant pic memory reference";
5282 if (! legitimate_pic_address_disp_p (disp))
5284 reason = "displacement is an invalid pic construct";
5288 /* This code used to verify that a symbolic pic displacement
5289 includes the pic_offset_table_rtx register.
5291 While this is good idea, unfortunately these constructs may
5292 be created by "adds using lea" optimization for incorrect
5301 This code is nonsensical, but results in addressing
5302 GOT table with pic_offset_table_rtx base. We can't
5303 just refuse it easily, since it gets matched by
5304 "addsi3" pattern, that later gets split to lea in the
5305 case output register differs from input. While this
5306 can be handled by separate addsi pattern for this case
5307 that never results in lea, this seems to be easier and
5308 correct fix for crash to disable this test. */
5310 else if (!CONSTANT_ADDRESS_P (disp))
5312 reason = "displacement is not constant";
5317 /* Everything looks valid. */
5318 if (TARGET_DEBUG_ADDR)
5319 fprintf (stderr, "Success.\n");
5323 if (TARGET_DEBUG_ADDR)
5325 fprintf (stderr, "Error: %s\n", reason);
5326 debug_rtx (reason_rtx);
5331 /* Return an unique alias set for the GOT. */
5333 static HOST_WIDE_INT
5334 ix86_GOT_alias_set ()
5336 static HOST_WIDE_INT set = -1;
5338 set = new_alias_set ();
5342 /* Return a legitimate reference for ORIG (an address) using the
5343 register REG. If REG is 0, a new pseudo is generated.
5345 There are two types of references that must be handled:
5347 1. Global data references must load the address from the GOT, via
5348 the PIC reg. An insn is emitted to do this load, and the reg is
5351 2. Static data references, constant pool addresses, and code labels
5352 compute the address as an offset from the GOT, whose base is in
5353 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5354 differentiate them from global data objects. The returned
5355 address is the PIC reg + an unspec constant.
5357 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5358 reg also appears in the address. */
5361 legitimize_pic_address (orig, reg)
5371 reg = gen_reg_rtx (Pmode);
5372 /* Use the generic Mach-O PIC machinery. */
5373 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5376 if (local_symbolic_operand (addr, Pmode))
5378 /* In 64bit mode we can address such objects directly. */
5383 /* This symbol may be referenced via a displacement from the PIC
5384 base address (@GOTOFF). */
5386 if (reload_in_progress)
5387 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5388 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5389 new = gen_rtx_CONST (Pmode, new);
5390 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5394 emit_move_insn (reg, new);
5399 else if (GET_CODE (addr) == SYMBOL_REF)
5403 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5404 new = gen_rtx_CONST (Pmode, new);
5405 new = gen_rtx_MEM (Pmode, new);
5406 RTX_UNCHANGING_P (new) = 1;
5407 set_mem_alias_set (new, ix86_GOT_alias_set ());
5410 reg = gen_reg_rtx (Pmode);
5411 /* Use directly gen_movsi, otherwise the address is loaded
5412 into register for CSE. We don't want to CSE this addresses,
5413 instead we CSE addresses from the GOT table, so skip this. */
5414 emit_insn (gen_movsi (reg, new));
5419 /* This symbol must be referenced via a load from the
5420 Global Offset Table (@GOT). */
5422 if (reload_in_progress)
5423 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5424 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5425 new = gen_rtx_CONST (Pmode, new);
5426 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5427 new = gen_rtx_MEM (Pmode, new);
5428 RTX_UNCHANGING_P (new) = 1;
5429 set_mem_alias_set (new, ix86_GOT_alias_set ());
5432 reg = gen_reg_rtx (Pmode);
5433 emit_move_insn (reg, new);
5439 if (GET_CODE (addr) == CONST)
5441 addr = XEXP (addr, 0);
5443 /* We must match stuff we generate before. Assume the only
5444 unspecs that can get here are ours. Not that we could do
5445 anything with them anyway... */
5446 if (GET_CODE (addr) == UNSPEC
5447 || (GET_CODE (addr) == PLUS
5448 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5450 if (GET_CODE (addr) != PLUS)
5453 if (GET_CODE (addr) == PLUS)
5455 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5457 /* Check first to see if this is a constant offset from a @GOTOFF
5458 symbol reference. */
5459 if (local_symbolic_operand (op0, Pmode)
5460 && GET_CODE (op1) == CONST_INT)
5464 if (reload_in_progress)
5465 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5466 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5468 new = gen_rtx_PLUS (Pmode, new, op1);
5469 new = gen_rtx_CONST (Pmode, new);
5470 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5474 emit_move_insn (reg, new);
5480 /* ??? We need to limit offsets here. */
5485 base = legitimize_pic_address (XEXP (addr, 0), reg);
5486 new = legitimize_pic_address (XEXP (addr, 1),
5487 base == reg ? NULL_RTX : reg);
5489 if (GET_CODE (new) == CONST_INT)
5490 new = plus_constant (base, INTVAL (new));
5493 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5495 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5496 new = XEXP (new, 1);
5498 new = gen_rtx_PLUS (Pmode, base, new);
5507 ix86_encode_section_info (decl, first)
5509 int first ATTRIBUTE_UNUSED;
5511 bool local_p = (*targetm.binds_local_p) (decl);
5514 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5515 if (GET_CODE (rtl) != MEM)
5517 symbol = XEXP (rtl, 0);
5518 if (GET_CODE (symbol) != SYMBOL_REF)
5521 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5522 symbol so that we may access it directly in the GOT. */
5525 SYMBOL_REF_FLAG (symbol) = local_p;
5527 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5528 "local dynamic", "initial exec" or "local exec" TLS models
5531 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5533 const char *symbol_str;
5536 enum tls_model kind;
5541 kind = TLS_MODEL_LOCAL_EXEC;
5543 kind = TLS_MODEL_INITIAL_EXEC;
5545 /* Local dynamic is inefficient when we're not combining the
5546 parts of the address. */
5547 else if (optimize && local_p)
5548 kind = TLS_MODEL_LOCAL_DYNAMIC;
5550 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5551 if (kind < flag_tls_default)
5552 kind = flag_tls_default;
5554 symbol_str = XSTR (symbol, 0);
5556 if (symbol_str[0] == '%')
5558 if (symbol_str[1] == tls_model_chars[kind])
5562 len = strlen (symbol_str) + 1;
5563 newstr = alloca (len + 2);
5566 newstr[1] = tls_model_chars[kind];
5567 memcpy (newstr + 2, symbol_str, len);
5569 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5573 /* Undo the above when printing symbol names. */
5576 ix86_strip_name_encoding (str)
5586 /* Load the thread pointer into a register. */
5589 get_thread_pointer ()
5593 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5594 tp = gen_rtx_MEM (Pmode, tp);
5595 RTX_UNCHANGING_P (tp) = 1;
5596 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5597 tp = force_reg (Pmode, tp);
5602 /* Try machine-dependent ways of modifying an illegitimate address
5603 to be legitimate. If we find one, return the new, valid address.
5604 This macro is used in only one place: `memory_address' in explow.c.
5606 OLDX is the address as it was before break_out_memory_refs was called.
5607 In some cases it is useful to look at this to decide what needs to be done.
5609 MODE and WIN are passed so that this macro can use
5610 GO_IF_LEGITIMATE_ADDRESS.
5612 It is always safe for this macro to do nothing. It exists to recognize
5613 opportunities to optimize the output.
5615 For the 80386, we handle X+REG by loading X into a register R and
5616 using R+REG. R will go in a general reg and indexing will be used.
5617 However, if REG is a broken-out memory address or multiplication,
5618 nothing needs to be done because REG can certainly go in a general reg.
5620 When -fpic is used, special handling is needed for symbolic references.
5621 See comments by legitimize_pic_address in i386.c for details. */
5624 legitimize_address (x, oldx, mode)
5626 register rtx oldx ATTRIBUTE_UNUSED;
5627 enum machine_mode mode;
5632 if (TARGET_DEBUG_ADDR)
5634 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5635 GET_MODE_NAME (mode));
5639 log = tls_symbolic_operand (x, mode);
5642 rtx dest, base, off, pic;
5646 case TLS_MODEL_GLOBAL_DYNAMIC:
5647 dest = gen_reg_rtx (Pmode);
5648 emit_insn (gen_tls_global_dynamic (dest, x));
5651 case TLS_MODEL_LOCAL_DYNAMIC:
5652 base = gen_reg_rtx (Pmode);
5653 emit_insn (gen_tls_local_dynamic_base (base));
5655 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5656 off = gen_rtx_CONST (Pmode, off);
5658 return gen_rtx_PLUS (Pmode, base, off);
5660 case TLS_MODEL_INITIAL_EXEC:
5663 if (reload_in_progress)
5664 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5665 pic = pic_offset_table_rtx;
5669 pic = gen_reg_rtx (Pmode);
5670 emit_insn (gen_set_got (pic));
5673 base = get_thread_pointer ();
5675 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5676 off = gen_rtx_CONST (Pmode, off);
5677 off = gen_rtx_PLUS (Pmode, pic, off);
5678 off = gen_rtx_MEM (Pmode, off);
5679 RTX_UNCHANGING_P (off) = 1;
5680 set_mem_alias_set (off, ix86_GOT_alias_set ());
5682 /* Damn Sun for specifing a set of dynamic relocations without
5683 considering the two-operand nature of the architecture!
5684 We'd be much better off with a "GOTNTPOFF" relocation that
5685 already contained the negated constant. */
5686 /* ??? Using negl and reg+reg addressing appears to be a lose
5687 size-wise. The negl is two bytes, just like the extra movl
5688 incurred by the two-operand subl, but reg+reg addressing
5689 uses the two-byte modrm form, unlike plain reg. */
5691 dest = gen_reg_rtx (Pmode);
5692 emit_insn (gen_subsi3 (dest, base, off));
5695 case TLS_MODEL_LOCAL_EXEC:
5696 base = get_thread_pointer ();
5698 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5699 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5700 off = gen_rtx_CONST (Pmode, off);
5703 return gen_rtx_PLUS (Pmode, base, off);
5706 dest = gen_reg_rtx (Pmode);
5707 emit_insn (gen_subsi3 (dest, base, off));
5718 if (flag_pic && SYMBOLIC_CONST (x))
5719 return legitimize_pic_address (x, 0);
5721 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5722 if (GET_CODE (x) == ASHIFT
5723 && GET_CODE (XEXP (x, 1)) == CONST_INT
5724 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5727 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5728 GEN_INT (1 << log));
5731 if (GET_CODE (x) == PLUS)
5733 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5735 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5736 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5737 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5740 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5741 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5742 GEN_INT (1 << log));
5745 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5746 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5747 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5750 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5751 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5752 GEN_INT (1 << log));
5755 /* Put multiply first if it isn't already. */
5756 if (GET_CODE (XEXP (x, 1)) == MULT)
5758 rtx tmp = XEXP (x, 0);
5759 XEXP (x, 0) = XEXP (x, 1);
5764 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5765 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5766 created by virtual register instantiation, register elimination, and
5767 similar optimizations. */
5768 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5771 x = gen_rtx_PLUS (Pmode,
5772 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5773 XEXP (XEXP (x, 1), 0)),
5774 XEXP (XEXP (x, 1), 1));
5778 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5779 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5780 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5781 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5782 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5783 && CONSTANT_P (XEXP (x, 1)))
5786 rtx other = NULL_RTX;
5788 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5790 constant = XEXP (x, 1);
5791 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5793 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5795 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5796 other = XEXP (x, 1);
5804 x = gen_rtx_PLUS (Pmode,
5805 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5806 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5807 plus_constant (other, INTVAL (constant)));
5811 if (changed && legitimate_address_p (mode, x, FALSE))
5814 if (GET_CODE (XEXP (x, 0)) == MULT)
5817 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5820 if (GET_CODE (XEXP (x, 1)) == MULT)
5823 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5827 && GET_CODE (XEXP (x, 1)) == REG
5828 && GET_CODE (XEXP (x, 0)) == REG)
5831 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5834 x = legitimize_pic_address (x, 0);
5837 if (changed && legitimate_address_p (mode, x, FALSE))
5840 if (GET_CODE (XEXP (x, 0)) == REG)
5842 register rtx temp = gen_reg_rtx (Pmode);
5843 register rtx val = force_operand (XEXP (x, 1), temp);
5845 emit_move_insn (temp, val);
5851 else if (GET_CODE (XEXP (x, 1)) == REG)
5853 register rtx temp = gen_reg_rtx (Pmode);
5854 register rtx val = force_operand (XEXP (x, 0), temp);
5856 emit_move_insn (temp, val);
5866 /* Print an integer constant expression in assembler syntax. Addition
5867 and subtraction are the only arithmetic that may appear in these
5868 expressions. FILE is the stdio stream to write to, X is the rtx, and
5869 CODE is the operand print code from the output string. */
5872 output_pic_addr_const (file, x, code)
5879 switch (GET_CODE (x))
5889 assemble_name (file, XSTR (x, 0));
5890 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5891 fputs ("@PLT", file);
5898 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5899 assemble_name (asm_out_file, buf);
5903 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5907 /* This used to output parentheses around the expression,
5908 but that does not work on the 386 (either ATT or BSD assembler). */
5909 output_pic_addr_const (file, XEXP (x, 0), code);
5913 if (GET_MODE (x) == VOIDmode)
5915 /* We can use %d if the number is <32 bits and positive. */
5916 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5917 fprintf (file, "0x%lx%08lx",
5918 (unsigned long) CONST_DOUBLE_HIGH (x),
5919 (unsigned long) CONST_DOUBLE_LOW (x));
5921 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5924 /* We can't handle floating point constants;
5925 PRINT_OPERAND must handle them. */
5926 output_operand_lossage ("floating constant misused");
5930 /* Some assemblers need integer constants to appear first. */
5931 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5933 output_pic_addr_const (file, XEXP (x, 0), code);
5935 output_pic_addr_const (file, XEXP (x, 1), code);
5937 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5939 output_pic_addr_const (file, XEXP (x, 1), code);
5941 output_pic_addr_const (file, XEXP (x, 0), code);
5949 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5950 output_pic_addr_const (file, XEXP (x, 0), code);
5952 output_pic_addr_const (file, XEXP (x, 1), code);
5954 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5958 if (XVECLEN (x, 0) != 1)
5960 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5961 switch (XINT (x, 1))
5964 fputs ("@GOT", file);
5967 fputs ("@GOTOFF", file);
5969 case UNSPEC_GOTPCREL:
5970 fputs ("@GOTPCREL(%rip)", file);
5972 case UNSPEC_GOTTPOFF:
5973 fputs ("@GOTTPOFF", file);
5976 fputs ("@TPOFF", file);
5979 fputs ("@NTPOFF", file);
5982 fputs ("@DTPOFF", file);
5985 output_operand_lossage ("invalid UNSPEC as operand");
5991 output_operand_lossage ("invalid expression as operand");
5995 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5996 We need to handle our special PIC relocations. */
5999 i386_dwarf_output_addr_const (file, x)
6004 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6008 fprintf (file, "%s", ASM_LONG);
6011 output_pic_addr_const (file, x, '\0');
6013 output_addr_const (file, x);
6017 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6018 We need to emit DTP-relative relocations. */
6021 i386_output_dwarf_dtprel (file, size, x)
6029 fputs (ASM_LONG, file);
6033 fputs (ASM_QUAD, file);
6040 output_addr_const (file, x);
6041 fputs ("@DTPOFF", file);
6044 /* In the name of slightly smaller debug output, and to cater to
6045 general assembler losage, recognize PIC+GOTOFF and turn it back
6046 into a direct symbol reference. */
6049 i386_simplify_dwarf_addr (orig_x)
6054 if (GET_CODE (x) == MEM)
6059 if (GET_CODE (x) != CONST
6060 || GET_CODE (XEXP (x, 0)) != UNSPEC
6061 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6062 || GET_CODE (orig_x) != MEM)
6064 return XVECEXP (XEXP (x, 0), 0, 0);
6067 if (GET_CODE (x) != PLUS
6068 || GET_CODE (XEXP (x, 1)) != CONST)
6071 if (GET_CODE (XEXP (x, 0)) == REG
6072 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6073 /* %ebx + GOT/GOTOFF */
6075 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6077 /* %ebx + %reg * scale + GOT/GOTOFF */
6079 if (GET_CODE (XEXP (y, 0)) == REG
6080 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6082 else if (GET_CODE (XEXP (y, 1)) == REG
6083 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6087 if (GET_CODE (y) != REG
6088 && GET_CODE (y) != MULT
6089 && GET_CODE (y) != ASHIFT)
6095 x = XEXP (XEXP (x, 1), 0);
6096 if (GET_CODE (x) == UNSPEC
6097 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6098 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6101 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6102 return XVECEXP (x, 0, 0);
6105 if (GET_CODE (x) == PLUS
6106 && GET_CODE (XEXP (x, 0)) == UNSPEC
6107 && GET_CODE (XEXP (x, 1)) == CONST_INT
6108 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6109 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6110 && GET_CODE (orig_x) != MEM)))
6112 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6114 return gen_rtx_PLUS (Pmode, y, x);
6122 put_condition_code (code, mode, reverse, fp, file)
6124 enum machine_mode mode;
6130 if (mode == CCFPmode || mode == CCFPUmode)
6132 enum rtx_code second_code, bypass_code;
6133 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6134 if (bypass_code != NIL || second_code != NIL)
6136 code = ix86_fp_compare_code_to_integer (code);
6140 code = reverse_condition (code);
6151 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6156 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6157 Those same assemblers have the same but opposite losage on cmov. */
6160 suffix = fp ? "nbe" : "a";
6163 if (mode == CCNOmode || mode == CCGOCmode)
6165 else if (mode == CCmode || mode == CCGCmode)
6176 if (mode == CCNOmode || mode == CCGOCmode)
6178 else if (mode == CCmode || mode == CCGCmode)
6187 suffix = fp ? "nb" : "ae";
6190 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6200 suffix = fp ? "u" : "p";
6203 suffix = fp ? "nu" : "np";
6208 fputs (suffix, file);
6212 print_reg (x, code, file)
6217 if (REGNO (x) == ARG_POINTER_REGNUM
6218 || REGNO (x) == FRAME_POINTER_REGNUM
6219 || REGNO (x) == FLAGS_REG
6220 || REGNO (x) == FPSR_REG)
6223 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6226 if (code == 'w' || MMX_REG_P (x))
6228 else if (code == 'b')
6230 else if (code == 'k')
6232 else if (code == 'q')
6234 else if (code == 'y')
6236 else if (code == 'h')
6239 code = GET_MODE_SIZE (GET_MODE (x));
6241 /* Irritatingly, AMD extended registers use different naming convention
6242 from the normal registers. */
6243 if (REX_INT_REG_P (x))
6250 error ("extended registers have no high halves");
6253 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6256 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6259 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6262 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6265 error ("unsupported operand size for extended register");
6273 if (STACK_TOP_P (x))
6275 fputs ("st(0)", file);
6282 if (! ANY_FP_REG_P (x))
6283 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6287 fputs (hi_reg_name[REGNO (x)], file);
6290 fputs (qi_reg_name[REGNO (x)], file);
6293 fputs (qi_high_reg_name[REGNO (x)], file);
6300 /* Locate some local-dynamic symbol still in use by this function
6301 so that we can print its name in some tls_local_dynamic_base
6305 get_some_local_dynamic_name ()
6309 if (cfun->machine->some_ld_name)
6310 return cfun->machine->some_ld_name;
6312 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6314 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6315 return cfun->machine->some_ld_name;
6321 get_some_local_dynamic_name_1 (px, data)
6323 void *data ATTRIBUTE_UNUSED;
6327 if (GET_CODE (x) == SYMBOL_REF
6328 && local_dynamic_symbolic_operand (x, Pmode))
6330 cfun->machine->some_ld_name = XSTR (x, 0);
6338 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6339 C -- print opcode suffix for set/cmov insn.
6340 c -- like C, but print reversed condition
6341 F,f -- likewise, but for floating-point.
6342 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6344 R -- print the prefix for register names.
6345 z -- print the opcode suffix for the size of the current operand.
6346 * -- print a star (in certain assembler syntax)
6347 A -- print an absolute memory reference.
6348 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6349 s -- print a shift double count, followed by the assemblers argument
6351 b -- print the QImode name of the register for the indicated operand.
6352 %b0 would print %al if operands[0] is reg 0.
6353 w -- likewise, print the HImode name of the register.
6354 k -- likewise, print the SImode name of the register.
6355 q -- likewise, print the DImode name of the register.
6356 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6357 y -- print "st(0)" instead of "st" as a register.
6358 D -- print condition for SSE cmp instruction.
6359 P -- if PIC, print an @PLT suffix.
6360 X -- don't print any sort of PIC '@' suffix for a symbol.
6361 & -- print some in-use local-dynamic symbol name.
6365 print_operand (file, x, code)
6375 if (ASSEMBLER_DIALECT == ASM_ATT)
6380 assemble_name (file, get_some_local_dynamic_name ());
6384 if (ASSEMBLER_DIALECT == ASM_ATT)
6386 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6388 /* Intel syntax. For absolute addresses, registers should not
6389 be surrounded by braces. */
6390 if (GET_CODE (x) != REG)
6393 PRINT_OPERAND (file, x, 0);
6401 PRINT_OPERAND (file, x, 0);
6406 if (ASSEMBLER_DIALECT == ASM_ATT)
6411 if (ASSEMBLER_DIALECT == ASM_ATT)
6416 if (ASSEMBLER_DIALECT == ASM_ATT)
6421 if (ASSEMBLER_DIALECT == ASM_ATT)
6426 if (ASSEMBLER_DIALECT == ASM_ATT)
6431 if (ASSEMBLER_DIALECT == ASM_ATT)
6436 /* 387 opcodes don't get size suffixes if the operands are
6438 if (STACK_REG_P (x))
6441 /* Likewise if using Intel opcodes. */
6442 if (ASSEMBLER_DIALECT == ASM_INTEL)
6445 /* This is the size of op from size of operand. */
6446 switch (GET_MODE_SIZE (GET_MODE (x)))
6449 #ifdef HAVE_GAS_FILDS_FISTS
6455 if (GET_MODE (x) == SFmode)
6470 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6472 #ifdef GAS_MNEMONICS
6498 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6500 PRINT_OPERAND (file, x, 0);
6506 /* Little bit of braindamage here. The SSE compare instructions
6507 does use completely different names for the comparisons that the
6508 fp conditional moves. */
6509 switch (GET_CODE (x))
6524 fputs ("unord", file);
6528 fputs ("neq", file);
6532 fputs ("nlt", file);
6536 fputs ("nle", file);
6539 fputs ("ord", file);
6547 #ifdef CMOV_SUN_AS_SYNTAX
6548 if (ASSEMBLER_DIALECT == ASM_ATT)
6550 switch (GET_MODE (x))
6552 case HImode: putc ('w', file); break;
6554 case SFmode: putc ('l', file); break;
6556 case DFmode: putc ('q', file); break;
6564 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6567 #ifdef CMOV_SUN_AS_SYNTAX
6568 if (ASSEMBLER_DIALECT == ASM_ATT)
6571 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6574 /* Like above, but reverse condition */
6576 /* Check to see if argument to %c is really a constant
6577 and not a condition code which needs to be reversed. */
6578 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6580 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6583 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6586 #ifdef CMOV_SUN_AS_SYNTAX
6587 if (ASSEMBLER_DIALECT == ASM_ATT)
6590 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6596 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6599 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6602 int pred_val = INTVAL (XEXP (x, 0));
6604 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6605 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6607 int taken = pred_val > REG_BR_PROB_BASE / 2;
6608 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6610 /* Emit hints only in the case default branch prediction
6611 heruistics would fail. */
6612 if (taken != cputaken)
6614 /* We use 3e (DS) prefix for taken branches and
6615 2e (CS) prefix for not taken branches. */
6617 fputs ("ds ; ", file);
6619 fputs ("cs ; ", file);
6626 output_operand_lossage ("invalid operand code `%c'", code);
6630 if (GET_CODE (x) == REG)
6632 PRINT_REG (x, code, file);
6635 else if (GET_CODE (x) == MEM)
6637 /* No `byte ptr' prefix for call instructions. */
6638 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6641 switch (GET_MODE_SIZE (GET_MODE (x)))
6643 case 1: size = "BYTE"; break;
6644 case 2: size = "WORD"; break;
6645 case 4: size = "DWORD"; break;
6646 case 8: size = "QWORD"; break;
6647 case 12: size = "XWORD"; break;
6648 case 16: size = "XMMWORD"; break;
6653 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6656 else if (code == 'w')
6658 else if (code == 'k')
6662 fputs (" PTR ", file);
6666 if (flag_pic && CONSTANT_ADDRESS_P (x))
6667 output_pic_addr_const (file, x, code);
6668 /* Avoid (%rip) for call operands. */
6669 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6670 && GET_CODE (x) != CONST_INT)
6671 output_addr_const (file, x);
6672 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6673 output_operand_lossage ("invalid constraints for operand");
6678 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6683 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6684 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6686 if (ASSEMBLER_DIALECT == ASM_ATT)
6688 fprintf (file, "0x%lx", l);
6691 /* These float cases don't actually occur as immediate operands. */
6692 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6697 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6698 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6699 fprintf (file, "%s", dstr);
6702 else if (GET_CODE (x) == CONST_DOUBLE
6703 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6708 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6709 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6710 fprintf (file, "%s", dstr);
6717 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6719 if (ASSEMBLER_DIALECT == ASM_ATT)
6722 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6723 || GET_CODE (x) == LABEL_REF)
6725 if (ASSEMBLER_DIALECT == ASM_ATT)
6728 fputs ("OFFSET FLAT:", file);
6731 if (GET_CODE (x) == CONST_INT)
6732 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6734 output_pic_addr_const (file, x, code);
6736 output_addr_const (file, x);
6740 /* Print a memory operand whose address is ADDR. */
6743 print_operand_address (file, addr)
6747 struct ix86_address parts;
6748 rtx base, index, disp;
6751 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6753 if (ASSEMBLER_DIALECT == ASM_INTEL)
6754 fputs ("DWORD PTR ", file);
6755 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6757 fputs ("gs:0", file);
6761 if (! ix86_decompose_address (addr, &parts))
6765 index = parts.index;
6767 scale = parts.scale;
6769 if (!base && !index)
6771 /* Displacement only requires special attention. */
6773 if (GET_CODE (disp) == CONST_INT)
6775 if (ASSEMBLER_DIALECT == ASM_INTEL)
6777 if (USER_LABEL_PREFIX[0] == 0)
6779 fputs ("ds:", file);
6781 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6784 output_pic_addr_const (file, addr, 0);
6786 output_addr_const (file, addr);
6788 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6790 && (GET_CODE (addr) == SYMBOL_REF
6791 || GET_CODE (addr) == LABEL_REF
6792 || (GET_CODE (addr) == CONST
6793 && GET_CODE (XEXP (addr, 0)) == PLUS
6794 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6795 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6796 fputs ("(%rip)", file);
6800 if (ASSEMBLER_DIALECT == ASM_ATT)
6805 output_pic_addr_const (file, disp, 0);
6806 else if (GET_CODE (disp) == LABEL_REF)
6807 output_asm_label (disp);
6809 output_addr_const (file, disp);
6814 PRINT_REG (base, 0, file);
6818 PRINT_REG (index, 0, file);
6820 fprintf (file, ",%d", scale);
6826 rtx offset = NULL_RTX;
6830 /* Pull out the offset of a symbol; print any symbol itself. */
6831 if (GET_CODE (disp) == CONST
6832 && GET_CODE (XEXP (disp, 0)) == PLUS
6833 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6835 offset = XEXP (XEXP (disp, 0), 1);
6836 disp = gen_rtx_CONST (VOIDmode,
6837 XEXP (XEXP (disp, 0), 0));
6841 output_pic_addr_const (file, disp, 0);
6842 else if (GET_CODE (disp) == LABEL_REF)
6843 output_asm_label (disp);
6844 else if (GET_CODE (disp) == CONST_INT)
6847 output_addr_const (file, disp);
6853 PRINT_REG (base, 0, file);
6856 if (INTVAL (offset) >= 0)
6858 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6862 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6869 PRINT_REG (index, 0, file);
6871 fprintf (file, "*%d", scale);
6879 output_addr_const_extra (file, x)
6885 if (GET_CODE (x) != UNSPEC)
6888 op = XVECEXP (x, 0, 0);
6889 switch (XINT (x, 1))
6891 case UNSPEC_GOTTPOFF:
6892 output_addr_const (file, op);
6893 fputs ("@GOTTPOFF", file);
6896 output_addr_const (file, op);
6897 fputs ("@TPOFF", file);
6900 output_addr_const (file, op);
6901 fputs ("@NTPOFF", file);
6904 output_addr_const (file, op);
6905 fputs ("@DTPOFF", file);
6915 /* Split one or more DImode RTL references into pairs of SImode
6916 references. The RTL can be REG, offsettable MEM, integer constant, or
6917 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6918 split and "num" is its length. lo_half and hi_half are output arrays
6919 that parallel "operands". */
6922 split_di (operands, num, lo_half, hi_half)
6925 rtx lo_half[], hi_half[];
6929 rtx op = operands[num];
6931 /* simplify_subreg refuse to split volatile memory addresses,
6932 but we still have to handle it. */
6933 if (GET_CODE (op) == MEM)
6935 lo_half[num] = adjust_address (op, SImode, 0);
6936 hi_half[num] = adjust_address (op, SImode, 4);
6940 lo_half[num] = simplify_gen_subreg (SImode, op,
6941 GET_MODE (op) == VOIDmode
6942 ? DImode : GET_MODE (op), 0);
6943 hi_half[num] = simplify_gen_subreg (SImode, op,
6944 GET_MODE (op) == VOIDmode
6945 ? DImode : GET_MODE (op), 4);
6949 /* Split one or more TImode RTL references into pairs of SImode
6950 references. The RTL can be REG, offsettable MEM, integer constant, or
6951 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6952 split and "num" is its length. lo_half and hi_half are output arrays
6953 that parallel "operands". */
6956 split_ti (operands, num, lo_half, hi_half)
6959 rtx lo_half[], hi_half[];
6963 rtx op = operands[num];
6965 /* simplify_subreg refuse to split volatile memory addresses, but we
6966 still have to handle it. */
6967 if (GET_CODE (op) == MEM)
6969 lo_half[num] = adjust_address (op, DImode, 0);
6970 hi_half[num] = adjust_address (op, DImode, 8);
6974 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6975 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6980 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6981 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6982 is the expression of the binary operation. The output may either be
6983 emitted here, or returned to the caller, like all output_* functions.
6985 There is no guarantee that the operands are the same mode, as they
6986 might be within FLOAT or FLOAT_EXTEND expressions. */
6988 #ifndef SYSV386_COMPAT
6989 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6990 wants to fix the assemblers because that causes incompatibility
6991 with gcc. No-one wants to fix gcc because that causes
6992 incompatibility with assemblers... You can use the option of
6993 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6994 #define SYSV386_COMPAT 1
6998 output_387_binary_op (insn, operands)
7002 static char buf[30];
7005 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7007 #ifdef ENABLE_CHECKING
7008 /* Even if we do not want to check the inputs, this documents input
7009 constraints. Which helps in understanding the following code. */
7010 if (STACK_REG_P (operands[0])
7011 && ((REG_P (operands[1])
7012 && REGNO (operands[0]) == REGNO (operands[1])
7013 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7014 || (REG_P (operands[2])
7015 && REGNO (operands[0]) == REGNO (operands[2])
7016 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7017 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7023 switch (GET_CODE (operands[3]))
7026 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7027 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7035 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7036 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7044 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7045 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7053 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7054 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7068 if (GET_MODE (operands[0]) == SFmode)
7069 strcat (buf, "ss\t{%2, %0|%0, %2}");
7071 strcat (buf, "sd\t{%2, %0|%0, %2}");
7076 switch (GET_CODE (operands[3]))
7080 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7082 rtx temp = operands[2];
7083 operands[2] = operands[1];
7087 /* know operands[0] == operands[1]. */
7089 if (GET_CODE (operands[2]) == MEM)
7095 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7097 if (STACK_TOP_P (operands[0]))
7098 /* How is it that we are storing to a dead operand[2]?
7099 Well, presumably operands[1] is dead too. We can't
7100 store the result to st(0) as st(0) gets popped on this
7101 instruction. Instead store to operands[2] (which I
7102 think has to be st(1)). st(1) will be popped later.
7103 gcc <= 2.8.1 didn't have this check and generated
7104 assembly code that the Unixware assembler rejected. */
7105 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7107 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7111 if (STACK_TOP_P (operands[0]))
7112 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7114 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7119 if (GET_CODE (operands[1]) == MEM)
7125 if (GET_CODE (operands[2]) == MEM)
7131 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7134 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7135 derived assemblers, confusingly reverse the direction of
7136 the operation for fsub{r} and fdiv{r} when the
7137 destination register is not st(0). The Intel assembler
7138 doesn't have this brain damage. Read !SYSV386_COMPAT to
7139 figure out what the hardware really does. */
7140 if (STACK_TOP_P (operands[0]))
7141 p = "{p\t%0, %2|rp\t%2, %0}";
7143 p = "{rp\t%2, %0|p\t%0, %2}";
7145 if (STACK_TOP_P (operands[0]))
7146 /* As above for fmul/fadd, we can't store to st(0). */
7147 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7149 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7154 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7157 if (STACK_TOP_P (operands[0]))
7158 p = "{rp\t%0, %1|p\t%1, %0}";
7160 p = "{p\t%1, %0|rp\t%0, %1}";
7162 if (STACK_TOP_P (operands[0]))
7163 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7165 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7170 if (STACK_TOP_P (operands[0]))
7172 if (STACK_TOP_P (operands[1]))
7173 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7175 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7178 else if (STACK_TOP_P (operands[1]))
7181 p = "{\t%1, %0|r\t%0, %1}";
7183 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7189 p = "{r\t%2, %0|\t%0, %2}";
7191 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7204 /* Output code to initialize control word copies used by
7205 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7206 is set to control word rounding downwards. */
7208 emit_i387_cw_initialization (normal, round_down)
7209 rtx normal, round_down;
7211 rtx reg = gen_reg_rtx (HImode);
7213 emit_insn (gen_x86_fnstcw_1 (normal));
7214 emit_move_insn (reg, normal);
7215 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7217 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7219 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7220 emit_move_insn (round_down, reg);
7223 /* Output code for INSN to convert a float to a signed int. OPERANDS
7224 are the insn operands. The output may be [HSD]Imode and the input
7225 operand may be [SDX]Fmode. */
7228 output_fix_trunc (insn, operands)
7232 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7233 int dimode_p = GET_MODE (operands[0]) == DImode;
7235 /* Jump through a hoop or two for DImode, since the hardware has no
7236 non-popping instruction. We used to do this a different way, but
7237 that was somewhat fragile and broke with post-reload splitters. */
7238 if (dimode_p && !stack_top_dies)
7239 output_asm_insn ("fld\t%y1", operands);
7241 if (!STACK_TOP_P (operands[1]))
7244 if (GET_CODE (operands[0]) != MEM)
7247 output_asm_insn ("fldcw\t%3", operands);
7248 if (stack_top_dies || dimode_p)
7249 output_asm_insn ("fistp%z0\t%0", operands);
7251 output_asm_insn ("fist%z0\t%0", operands);
7252 output_asm_insn ("fldcw\t%2", operands);
7257 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7258 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7259 when fucom should be used. */
7262 output_fp_compare (insn, operands, eflags_p, unordered_p)
7265 int eflags_p, unordered_p;
7268 rtx cmp_op0 = operands[0];
7269 rtx cmp_op1 = operands[1];
7270 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7275 cmp_op1 = operands[2];
7279 if (GET_MODE (operands[0]) == SFmode)
7281 return "ucomiss\t{%1, %0|%0, %1}";
7283 return "comiss\t{%1, %0|%0, %y}";
7286 return "ucomisd\t{%1, %0|%0, %1}";
7288 return "comisd\t{%1, %0|%0, %y}";
7291 if (! STACK_TOP_P (cmp_op0))
7294 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7296 if (STACK_REG_P (cmp_op1)
7298 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7299 && REGNO (cmp_op1) != FIRST_STACK_REG)
7301 /* If both the top of the 387 stack dies, and the other operand
7302 is also a stack register that dies, then this must be a
7303 `fcompp' float compare */
7307 /* There is no double popping fcomi variant. Fortunately,
7308 eflags is immune from the fstp's cc clobbering. */
7310 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7312 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7320 return "fucompp\n\tfnstsw\t%0";
7322 return "fcompp\n\tfnstsw\t%0";
7335 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7337 static const char * const alt[24] =
7349 "fcomi\t{%y1, %0|%0, %y1}",
7350 "fcomip\t{%y1, %0|%0, %y1}",
7351 "fucomi\t{%y1, %0|%0, %y1}",
7352 "fucomip\t{%y1, %0|%0, %y1}",
7359 "fcom%z2\t%y2\n\tfnstsw\t%0",
7360 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7361 "fucom%z2\t%y2\n\tfnstsw\t%0",
7362 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7364 "ficom%z2\t%y2\n\tfnstsw\t%0",
7365 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7373 mask = eflags_p << 3;
7374 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7375 mask |= unordered_p << 1;
7376 mask |= stack_top_dies;
7389 ix86_output_addr_vec_elt (file, value)
7393 const char *directive = ASM_LONG;
7398 directive = ASM_QUAD;
7404 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7408 ix86_output_addr_diff_elt (file, value, rel)
7413 fprintf (file, "%s%s%d-%s%d\n",
7414 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7415 else if (HAVE_AS_GOTOFF_IN_DATA)
7416 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7418 else if (TARGET_MACHO)
7419 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7420 machopic_function_base_name () + 1);
7423 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7424 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7427 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7431 ix86_expand_clear (dest)
7436 /* We play register width games, which are only valid after reload. */
7437 if (!reload_completed)
7440 /* Avoid HImode and its attendant prefix byte. */
7441 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7442 dest = gen_rtx_REG (SImode, REGNO (dest));
7444 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7446 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7447 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7449 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7450 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7456 /* X is an unchanging MEM. If it is a constant pool reference, return
7457 the constant pool rtx, else NULL. */
7460 maybe_get_pool_constant (x)
7467 if (GET_CODE (x) != PLUS)
7469 if (XEXP (x, 0) != pic_offset_table_rtx)
7472 if (GET_CODE (x) != CONST)
7475 if (GET_CODE (x) != UNSPEC)
7477 if (XINT (x, 1) != UNSPEC_GOTOFF)
7479 x = XVECEXP (x, 0, 0);
7482 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7483 return get_pool_constant (x);
7489 ix86_expand_move (mode, operands)
7490 enum machine_mode mode;
7493 int strict = (reload_in_progress || reload_completed);
7494 rtx insn, op0, op1, tmp;
7499 /* ??? We have a slight problem. We need to say that tls symbols are
7500 not legitimate constants so that reload does not helpfully reload
7501 these constants from a REG_EQUIV, which we cannot handle. (Recall
7502 that general- and local-dynamic address resolution requires a
7505 However, if we say that tls symbols are not legitimate constants,
7506 then emit_move_insn helpfully drop them into the constant pool.
7508 It is far easier to work around emit_move_insn than reload. Recognize
7509 the MEM that we would have created and extract the symbol_ref. */
7512 && GET_CODE (op1) == MEM
7513 && RTX_UNCHANGING_P (op1))
7515 tmp = maybe_get_pool_constant (op1);
7516 /* Note that we only care about symbolic constants here, which
7517 unlike CONST_INT will always have a proper mode. */
7518 if (tmp && GET_MODE (tmp) == Pmode)
7522 if (tls_symbolic_operand (op1, Pmode))
7524 op1 = legitimize_address (op1, op1, VOIDmode);
7525 if (GET_CODE (op0) == MEM)
7527 tmp = gen_reg_rtx (mode);
7528 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7532 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7537 rtx temp = ((reload_in_progress
7538 || ((op0 && GET_CODE (op0) == REG)
7540 ? op0 : gen_reg_rtx (Pmode));
7541 op1 = machopic_indirect_data_reference (op1, temp);
7542 op1 = machopic_legitimize_pic_address (op1, mode,
7543 temp == op1 ? 0 : temp);
7547 if (MACHOPIC_INDIRECT)
7548 op1 = machopic_indirect_data_reference (op1, 0);
7552 insn = gen_rtx_SET (VOIDmode, op0, op1);
7556 #endif /* TARGET_MACHO */
7557 if (GET_CODE (op0) == MEM)
7558 op1 = force_reg (Pmode, op1);
7562 if (GET_CODE (temp) != REG)
7563 temp = gen_reg_rtx (Pmode);
7564 temp = legitimize_pic_address (op1, temp);
7572 if (GET_CODE (op0) == MEM
7573 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7574 || !push_operand (op0, mode))
7575 && GET_CODE (op1) == MEM)
7576 op1 = force_reg (mode, op1);
7578 if (push_operand (op0, mode)
7579 && ! general_no_elim_operand (op1, mode))
7580 op1 = copy_to_mode_reg (mode, op1);
7582 /* Force large constants in 64bit compilation into register
7583 to get them CSEed. */
7584 if (TARGET_64BIT && mode == DImode
7585 && immediate_operand (op1, mode)
7586 && !x86_64_zero_extended_value (op1)
7587 && !register_operand (op0, mode)
7588 && optimize && !reload_completed && !reload_in_progress)
7589 op1 = copy_to_mode_reg (mode, op1);
7591 if (FLOAT_MODE_P (mode))
7593 /* If we are loading a floating point constant to a register,
7594 force the value to memory now, since we'll get better code
7595 out the back end. */
7599 else if (GET_CODE (op1) == CONST_DOUBLE
7600 && register_operand (op0, mode))
7601 op1 = validize_mem (force_const_mem (mode, op1));
7605 insn = gen_rtx_SET (VOIDmode, op0, op1);
7611 ix86_expand_vector_move (mode, operands)
7612 enum machine_mode mode;
7615 /* Force constants other than zero into memory. We do not know how
7616 the instructions used to build constants modify the upper 64 bits
7617 of the register, once we have that information we may be able
7618 to handle some of them more efficiently. */
7619 if ((reload_in_progress | reload_completed) == 0
7620 && register_operand (operands[0], mode)
7621 && CONSTANT_P (operands[1]))
7623 rtx addr = gen_reg_rtx (Pmode);
7624 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7625 operands[1] = gen_rtx_MEM (mode, addr);
7628 /* Make operand1 a register if it isn't already. */
7629 if ((reload_in_progress | reload_completed) == 0
7630 && !register_operand (operands[0], mode)
7631 && !register_operand (operands[1], mode)
7632 && operands[1] != CONST0_RTX (mode))
7634 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7635 emit_move_insn (operands[0], temp);
7639 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7642 /* Attempt to expand a binary operator. Make the expansion closer to the
7643 actual machine, then just general_operand, which will allow 3 separate
7644 memory references (one output, two input) in a single insn. */
7647 ix86_expand_binary_operator (code, mode, operands)
7649 enum machine_mode mode;
7652 int matching_memory;
7653 rtx src1, src2, dst, op, clob;
7659 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7660 if (GET_RTX_CLASS (code) == 'c'
7661 && (rtx_equal_p (dst, src2)
7662 || immediate_operand (src1, mode)))
7669 /* If the destination is memory, and we do not have matching source
7670 operands, do things in registers. */
7671 matching_memory = 0;
7672 if (GET_CODE (dst) == MEM)
7674 if (rtx_equal_p (dst, src1))
7675 matching_memory = 1;
7676 else if (GET_RTX_CLASS (code) == 'c'
7677 && rtx_equal_p (dst, src2))
7678 matching_memory = 2;
7680 dst = gen_reg_rtx (mode);
7683 /* Both source operands cannot be in memory. */
7684 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7686 if (matching_memory != 2)
7687 src2 = force_reg (mode, src2);
7689 src1 = force_reg (mode, src1);
7692 /* If the operation is not commutable, source 1 cannot be a constant
7693 or non-matching memory. */
7694 if ((CONSTANT_P (src1)
7695 || (!matching_memory && GET_CODE (src1) == MEM))
7696 && GET_RTX_CLASS (code) != 'c')
7697 src1 = force_reg (mode, src1);
7699 /* If optimizing, copy to regs to improve CSE */
7700 if (optimize && ! no_new_pseudos)
7702 if (GET_CODE (dst) == MEM)
7703 dst = gen_reg_rtx (mode);
7704 if (GET_CODE (src1) == MEM)
7705 src1 = force_reg (mode, src1);
7706 if (GET_CODE (src2) == MEM)
7707 src2 = force_reg (mode, src2);
7710 /* Emit the instruction. */
7712 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7713 if (reload_in_progress)
7715 /* Reload doesn't know about the flags register, and doesn't know that
7716 it doesn't want to clobber it. We can only do this with PLUS. */
7723 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7724 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7727 /* Fix up the destination if needed. */
7728 if (dst != operands[0])
7729 emit_move_insn (operands[0], dst);
7732 /* Return TRUE or FALSE depending on whether the binary operator meets the
7733 appropriate constraints. */
7736 ix86_binary_operator_ok (code, mode, operands)
7738 enum machine_mode mode ATTRIBUTE_UNUSED;
7741 /* Both source operands cannot be in memory. */
7742 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7744 /* If the operation is not commutable, source 1 cannot be a constant. */
7745 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7747 /* If the destination is memory, we must have a matching source operand. */
7748 if (GET_CODE (operands[0]) == MEM
7749 && ! (rtx_equal_p (operands[0], operands[1])
7750 || (GET_RTX_CLASS (code) == 'c'
7751 && rtx_equal_p (operands[0], operands[2]))))
7753 /* If the operation is not commutable and the source 1 is memory, we must
7754 have a matching destination. */
7755 if (GET_CODE (operands[1]) == MEM
7756 && GET_RTX_CLASS (code) != 'c'
7757 && ! rtx_equal_p (operands[0], operands[1]))
7762 /* Attempt to expand a unary operator. Make the expansion closer to the
7763 actual machine, then just general_operand, which will allow 2 separate
7764 memory references (one output, one input) in a single insn. */
7767 ix86_expand_unary_operator (code, mode, operands)
7769 enum machine_mode mode;
7772 int matching_memory;
7773 rtx src, dst, op, clob;
7778 /* If the destination is memory, and we do not have matching source
7779 operands, do things in registers. */
7780 matching_memory = 0;
7781 if (GET_CODE (dst) == MEM)
7783 if (rtx_equal_p (dst, src))
7784 matching_memory = 1;
7786 dst = gen_reg_rtx (mode);
7789 /* When source operand is memory, destination must match. */
7790 if (!matching_memory && GET_CODE (src) == MEM)
7791 src = force_reg (mode, src);
7793 /* If optimizing, copy to regs to improve CSE */
7794 if (optimize && ! no_new_pseudos)
7796 if (GET_CODE (dst) == MEM)
7797 dst = gen_reg_rtx (mode);
7798 if (GET_CODE (src) == MEM)
7799 src = force_reg (mode, src);
7802 /* Emit the instruction. */
7804 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7805 if (reload_in_progress || code == NOT)
7807 /* Reload doesn't know about the flags register, and doesn't know that
7808 it doesn't want to clobber it. */
7815 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7816 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7819 /* Fix up the destination if needed. */
7820 if (dst != operands[0])
7821 emit_move_insn (operands[0], dst);
7824 /* Return TRUE or FALSE depending on whether the unary operator meets the
7825 appropriate constraints. */
7828 ix86_unary_operator_ok (code, mode, operands)
7829 enum rtx_code code ATTRIBUTE_UNUSED;
7830 enum machine_mode mode ATTRIBUTE_UNUSED;
7831 rtx operands[2] ATTRIBUTE_UNUSED;
7833 /* If one of operands is memory, source and destination must match. */
7834 if ((GET_CODE (operands[0]) == MEM
7835 || GET_CODE (operands[1]) == MEM)
7836 && ! rtx_equal_p (operands[0], operands[1]))
7841 /* Return TRUE or FALSE depending on whether the first SET in INSN
7842 has source and destination with matching CC modes, and that the
7843 CC mode is at least as constrained as REQ_MODE. */
7846 ix86_match_ccmode (insn, req_mode)
7848 enum machine_mode req_mode;
7851 enum machine_mode set_mode;
7853 set = PATTERN (insn);
7854 if (GET_CODE (set) == PARALLEL)
7855 set = XVECEXP (set, 0, 0);
7856 if (GET_CODE (set) != SET)
7858 if (GET_CODE (SET_SRC (set)) != COMPARE)
7861 set_mode = GET_MODE (SET_DEST (set));
7865 if (req_mode != CCNOmode
7866 && (req_mode != CCmode
7867 || XEXP (SET_SRC (set), 1) != const0_rtx))
7871 if (req_mode == CCGCmode)
7875 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7879 if (req_mode == CCZmode)
7889 return (GET_MODE (SET_SRC (set)) == set_mode);
7892 /* Generate insn patterns to do an integer compare of OPERANDS. */
7895 ix86_expand_int_compare (code, op0, op1)
7899 enum machine_mode cmpmode;
7902 cmpmode = SELECT_CC_MODE (code, op0, op1);
7903 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7905 /* This is very simple, but making the interface the same as in the
7906 FP case makes the rest of the code easier. */
7907 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7908 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7910 /* Return the test that should be put into the flags user, i.e.
7911 the bcc, scc, or cmov instruction. */
7912 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7915 /* Figure out whether to use ordered or unordered fp comparisons.
7916 Return the appropriate mode to use. */
7919 ix86_fp_compare_mode (code)
7920 enum rtx_code code ATTRIBUTE_UNUSED;
7922 /* ??? In order to make all comparisons reversible, we do all comparisons
7923 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7924 all forms trapping and nontrapping comparisons, we can make inequality
7925 comparisons trapping again, since it results in better code when using
7926 FCOM based compares. */
7927 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7931 ix86_cc_mode (code, op0, op1)
7935 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7936 return ix86_fp_compare_mode (code);
7939 /* Only zero flag is needed. */
7941 case NE: /* ZF!=0 */
7943 /* Codes needing carry flag. */
7944 case GEU: /* CF=0 */
7945 case GTU: /* CF=0 & ZF=0 */
7946 case LTU: /* CF=1 */
7947 case LEU: /* CF=1 | ZF=1 */
7949 /* Codes possibly doable only with sign flag when
7950 comparing against zero. */
7951 case GE: /* SF=OF or SF=0 */
7952 case LT: /* SF<>OF or SF=1 */
7953 if (op1 == const0_rtx)
7956 /* For other cases Carry flag is not required. */
7958 /* Codes doable only with sign flag when comparing
7959 against zero, but we miss jump instruction for it
7960 so we need to use relational tests agains overflow
7961 that thus needs to be zero. */
7962 case GT: /* ZF=0 & SF=OF */
7963 case LE: /* ZF=1 | SF<>OF */
7964 if (op1 == const0_rtx)
7968 /* strcmp pattern do (use flags) and combine may ask us for proper
7977 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7980 ix86_use_fcomi_compare (code)
7981 enum rtx_code code ATTRIBUTE_UNUSED;
7983 enum rtx_code swapped_code = swap_condition (code);
7984 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7985 || (ix86_fp_comparison_cost (swapped_code)
7986 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7989 /* Swap, force into registers, or otherwise massage the two operands
7990 to a fp comparison. The operands are updated in place; the new
7991 comparsion code is returned. */
7993 static enum rtx_code
7994 ix86_prepare_fp_compare_args (code, pop0, pop1)
7998 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7999 rtx op0 = *pop0, op1 = *pop1;
8000 enum machine_mode op_mode = GET_MODE (op0);
8001 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8003 /* All of the unordered compare instructions only work on registers.
8004 The same is true of the XFmode compare instructions. The same is
8005 true of the fcomi compare instructions. */
8008 && (fpcmp_mode == CCFPUmode
8009 || op_mode == XFmode
8010 || op_mode == TFmode
8011 || ix86_use_fcomi_compare (code)))
8013 op0 = force_reg (op_mode, op0);
8014 op1 = force_reg (op_mode, op1);
8018 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8019 things around if they appear profitable, otherwise force op0
8022 if (standard_80387_constant_p (op0) == 0
8023 || (GET_CODE (op0) == MEM
8024 && ! (standard_80387_constant_p (op1) == 0
8025 || GET_CODE (op1) == MEM)))
8028 tmp = op0, op0 = op1, op1 = tmp;
8029 code = swap_condition (code);
8032 if (GET_CODE (op0) != REG)
8033 op0 = force_reg (op_mode, op0);
8035 if (CONSTANT_P (op1))
8037 if (standard_80387_constant_p (op1))
8038 op1 = force_reg (op_mode, op1);
8040 op1 = validize_mem (force_const_mem (op_mode, op1));
8044 /* Try to rearrange the comparison to make it cheaper. */
8045 if (ix86_fp_comparison_cost (code)
8046 > ix86_fp_comparison_cost (swap_condition (code))
8047 && (GET_CODE (op1) == REG || !no_new_pseudos))
8050 tmp = op0, op0 = op1, op1 = tmp;
8051 code = swap_condition (code);
8052 if (GET_CODE (op0) != REG)
8053 op0 = force_reg (op_mode, op0);
8061 /* Convert comparison codes we use to represent FP comparison to integer
8062 code that will result in proper branch. Return UNKNOWN if no such code
8064 static enum rtx_code
8065 ix86_fp_compare_code_to_integer (code)
8095 /* Split comparison code CODE into comparisons we can do using branch
8096 instructions. BYPASS_CODE is comparison code for branch that will
8097 branch around FIRST_CODE and SECOND_CODE. If some of branches
8098 is not required, set value to NIL.
8099 We never require more than two branches. */
8101 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8102 enum rtx_code code, *bypass_code, *first_code, *second_code;
8108 /* The fcomi comparison sets flags as follows:
8118 case GT: /* GTU - CF=0 & ZF=0 */
8119 case GE: /* GEU - CF=0 */
8120 case ORDERED: /* PF=0 */
8121 case UNORDERED: /* PF=1 */
8122 case UNEQ: /* EQ - ZF=1 */
8123 case UNLT: /* LTU - CF=1 */
8124 case UNLE: /* LEU - CF=1 | ZF=1 */
8125 case LTGT: /* EQ - ZF=0 */
8127 case LT: /* LTU - CF=1 - fails on unordered */
8129 *bypass_code = UNORDERED;
8131 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8133 *bypass_code = UNORDERED;
8135 case EQ: /* EQ - ZF=1 - fails on unordered */
8137 *bypass_code = UNORDERED;
8139 case NE: /* NE - ZF=0 - fails on unordered */
8141 *second_code = UNORDERED;
8143 case UNGE: /* GEU - CF=0 - fails on unordered */
8145 *second_code = UNORDERED;
8147 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8149 *second_code = UNORDERED;
8154 if (!TARGET_IEEE_FP)
8161 /* Return cost of comparison done fcom + arithmetics operations on AX.
8162 All following functions do use number of instructions as an cost metrics.
8163 In future this should be tweaked to compute bytes for optimize_size and
8164 take into account performance of various instructions on various CPUs. */
8166 ix86_fp_comparison_arithmetics_cost (code)
8169 if (!TARGET_IEEE_FP)
8171 /* The cost of code output by ix86_expand_fp_compare. */
8199 /* Return cost of comparison done using fcomi operation.
8200 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8202 ix86_fp_comparison_fcomi_cost (code)
8205 enum rtx_code bypass_code, first_code, second_code;
8206 /* Return arbitarily high cost when instruction is not supported - this
8207 prevents gcc from using it. */
8210 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8211 return (bypass_code != NIL || second_code != NIL) + 2;
8214 /* Return cost of comparison done using sahf operation.
8215 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8217 ix86_fp_comparison_sahf_cost (code)
8220 enum rtx_code bypass_code, first_code, second_code;
8221 /* Return arbitarily high cost when instruction is not preferred - this
8222 avoids gcc from using it. */
8223 if (!TARGET_USE_SAHF && !optimize_size)
8225 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8226 return (bypass_code != NIL || second_code != NIL) + 3;
8229 /* Compute cost of the comparison done using any method.
8230 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8232 ix86_fp_comparison_cost (code)
8235 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8238 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8239 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8241 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8242 if (min > sahf_cost)
8244 if (min > fcomi_cost)
8249 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8252 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8254 rtx op0, op1, scratch;
8258 enum machine_mode fpcmp_mode, intcmp_mode;
8260 int cost = ix86_fp_comparison_cost (code);
8261 enum rtx_code bypass_code, first_code, second_code;
8263 fpcmp_mode = ix86_fp_compare_mode (code);
8264 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8267 *second_test = NULL_RTX;
8269 *bypass_test = NULL_RTX;
8271 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8273 /* Do fcomi/sahf based test when profitable. */
8274 if ((bypass_code == NIL || bypass_test)
8275 && (second_code == NIL || second_test)
8276 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8280 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8281 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8287 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8288 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8290 scratch = gen_reg_rtx (HImode);
8291 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8292 emit_insn (gen_x86_sahf_1 (scratch));
8295 /* The FP codes work out to act like unsigned. */
8296 intcmp_mode = fpcmp_mode;
8298 if (bypass_code != NIL)
8299 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8300 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8302 if (second_code != NIL)
8303 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8304 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8309 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8310 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8311 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8313 scratch = gen_reg_rtx (HImode);
8314 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8316 /* In the unordered case, we have to check C2 for NaN's, which
8317 doesn't happen to work out to anything nice combination-wise.
8318 So do some bit twiddling on the value we've got in AH to come
8319 up with an appropriate set of condition codes. */
8321 intcmp_mode = CCNOmode;
8326 if (code == GT || !TARGET_IEEE_FP)
8328 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8333 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8334 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8335 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8336 intcmp_mode = CCmode;
8342 if (code == LT && TARGET_IEEE_FP)
8344 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8345 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8346 intcmp_mode = CCmode;
8351 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8357 if (code == GE || !TARGET_IEEE_FP)
8359 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8364 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8365 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8372 if (code == LE && TARGET_IEEE_FP)
8374 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8375 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8376 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8377 intcmp_mode = CCmode;
8382 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8388 if (code == EQ && TARGET_IEEE_FP)
8390 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8391 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8392 intcmp_mode = CCmode;
8397 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8404 if (code == NE && TARGET_IEEE_FP)
8406 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8407 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8413 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8419 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8423 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8432 /* Return the test that should be put into the flags user, i.e.
8433 the bcc, scc, or cmov instruction. */
8434 return gen_rtx_fmt_ee (code, VOIDmode,
8435 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8440 ix86_expand_compare (code, second_test, bypass_test)
8442 rtx *second_test, *bypass_test;
8445 op0 = ix86_compare_op0;
8446 op1 = ix86_compare_op1;
8449 *second_test = NULL_RTX;
8451 *bypass_test = NULL_RTX;
8453 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8454 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8455 second_test, bypass_test);
8457 ret = ix86_expand_int_compare (code, op0, op1);
8462 /* Return true if the CODE will result in nontrivial jump sequence. */
8464 ix86_fp_jump_nontrivial_p (code)
8467 enum rtx_code bypass_code, first_code, second_code;
8470 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8471 return bypass_code != NIL || second_code != NIL;
8475 ix86_expand_branch (code, label)
8481 switch (GET_MODE (ix86_compare_op0))
8487 tmp = ix86_expand_compare (code, NULL, NULL);
8488 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8489 gen_rtx_LABEL_REF (VOIDmode, label),
8491 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8501 enum rtx_code bypass_code, first_code, second_code;
8503 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8506 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8508 /* Check whether we will use the natural sequence with one jump. If
8509 so, we can expand jump early. Otherwise delay expansion by
8510 creating compound insn to not confuse optimizers. */
8511 if (bypass_code == NIL && second_code == NIL
8514 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8515 gen_rtx_LABEL_REF (VOIDmode, label),
8520 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8521 ix86_compare_op0, ix86_compare_op1);
8522 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8523 gen_rtx_LABEL_REF (VOIDmode, label),
8525 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8527 use_fcomi = ix86_use_fcomi_compare (code);
8528 vec = rtvec_alloc (3 + !use_fcomi);
8529 RTVEC_ELT (vec, 0) = tmp;
8531 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8533 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8536 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8538 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8546 /* Expand DImode branch into multiple compare+branch. */
8548 rtx lo[2], hi[2], label2;
8549 enum rtx_code code1, code2, code3;
8551 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8553 tmp = ix86_compare_op0;
8554 ix86_compare_op0 = ix86_compare_op1;
8555 ix86_compare_op1 = tmp;
8556 code = swap_condition (code);
8558 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8559 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8561 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8562 avoid two branches. This costs one extra insn, so disable when
8563 optimizing for size. */
8565 if ((code == EQ || code == NE)
8567 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8572 if (hi[1] != const0_rtx)
8573 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8574 NULL_RTX, 0, OPTAB_WIDEN);
8577 if (lo[1] != const0_rtx)
8578 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8579 NULL_RTX, 0, OPTAB_WIDEN);
8581 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8582 NULL_RTX, 0, OPTAB_WIDEN);
8584 ix86_compare_op0 = tmp;
8585 ix86_compare_op1 = const0_rtx;
8586 ix86_expand_branch (code, label);
8590 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8591 op1 is a constant and the low word is zero, then we can just
8592 examine the high word. */
8594 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8597 case LT: case LTU: case GE: case GEU:
8598 ix86_compare_op0 = hi[0];
8599 ix86_compare_op1 = hi[1];
8600 ix86_expand_branch (code, label);
8606 /* Otherwise, we need two or three jumps. */
8608 label2 = gen_label_rtx ();
8611 code2 = swap_condition (code);
8612 code3 = unsigned_condition (code);
8616 case LT: case GT: case LTU: case GTU:
8619 case LE: code1 = LT; code2 = GT; break;
8620 case GE: code1 = GT; code2 = LT; break;
8621 case LEU: code1 = LTU; code2 = GTU; break;
8622 case GEU: code1 = GTU; code2 = LTU; break;
8624 case EQ: code1 = NIL; code2 = NE; break;
8625 case NE: code2 = NIL; break;
8633 * if (hi(a) < hi(b)) goto true;
8634 * if (hi(a) > hi(b)) goto false;
8635 * if (lo(a) < lo(b)) goto true;
8639 ix86_compare_op0 = hi[0];
8640 ix86_compare_op1 = hi[1];
8643 ix86_expand_branch (code1, label);
8645 ix86_expand_branch (code2, label2);
8647 ix86_compare_op0 = lo[0];
8648 ix86_compare_op1 = lo[1];
8649 ix86_expand_branch (code3, label);
8652 emit_label (label2);
8661 /* Split branch based on floating point condition. */
8663 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8665 rtx op1, op2, target1, target2, tmp;
8668 rtx label = NULL_RTX;
8670 int bypass_probability = -1, second_probability = -1, probability = -1;
8673 if (target2 != pc_rtx)
8676 code = reverse_condition_maybe_unordered (code);
8681 condition = ix86_expand_fp_compare (code, op1, op2,
8682 tmp, &second, &bypass);
8684 if (split_branch_probability >= 0)
8686 /* Distribute the probabilities across the jumps.
8687 Assume the BYPASS and SECOND to be always test
8689 probability = split_branch_probability;
8691 /* Value of 1 is low enough to make no need for probability
8692 to be updated. Later we may run some experiments and see
8693 if unordered values are more frequent in practice. */
8695 bypass_probability = 1;
8697 second_probability = 1;
8699 if (bypass != NULL_RTX)
8701 label = gen_label_rtx ();
8702 i = emit_jump_insn (gen_rtx_SET
8704 gen_rtx_IF_THEN_ELSE (VOIDmode,
8706 gen_rtx_LABEL_REF (VOIDmode,
8709 if (bypass_probability >= 0)
8711 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8712 GEN_INT (bypass_probability),
8715 i = emit_jump_insn (gen_rtx_SET
8717 gen_rtx_IF_THEN_ELSE (VOIDmode,
8718 condition, target1, target2)));
8719 if (probability >= 0)
8721 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8722 GEN_INT (probability),
8724 if (second != NULL_RTX)
8726 i = emit_jump_insn (gen_rtx_SET
8728 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8730 if (second_probability >= 0)
8732 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8733 GEN_INT (second_probability),
8736 if (label != NULL_RTX)
8741 ix86_expand_setcc (code, dest)
8745 rtx ret, tmp, tmpreg;
8746 rtx second_test, bypass_test;
8748 if (GET_MODE (ix86_compare_op0) == DImode
8750 return 0; /* FAIL */
8752 if (GET_MODE (dest) != QImode)
8755 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8756 PUT_MODE (ret, QImode);
8761 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8762 if (bypass_test || second_test)
8764 rtx test = second_test;
8766 rtx tmp2 = gen_reg_rtx (QImode);
8773 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8775 PUT_MODE (test, QImode);
8776 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8779 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8781 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8784 return 1; /* DONE */
8788 ix86_expand_int_movcc (operands)
8791 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8792 rtx compare_seq, compare_op;
8793 rtx second_test, bypass_test;
8794 enum machine_mode mode = GET_MODE (operands[0]);
8796 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8797 In case comparsion is done with immediate, we can convert it to LTU or
8798 GEU by altering the integer. */
8800 if ((code == LEU || code == GTU)
8801 && GET_CODE (ix86_compare_op1) == CONST_INT
8803 && INTVAL (ix86_compare_op1) != -1
8804 /* For x86-64, the immediate field in the instruction is 32-bit
8805 signed, so we can't increment a DImode value above 0x7fffffff. */
8807 || GET_MODE (ix86_compare_op0) != DImode
8808 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8809 && GET_CODE (operands[2]) == CONST_INT
8810 && GET_CODE (operands[3]) == CONST_INT)
8816 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8817 GET_MODE (ix86_compare_op0));
8821 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8822 compare_seq = get_insns ();
8825 compare_code = GET_CODE (compare_op);
8827 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8828 HImode insns, we'd be swallowed in word prefix ops. */
8831 && (mode != DImode || TARGET_64BIT)
8832 && GET_CODE (operands[2]) == CONST_INT
8833 && GET_CODE (operands[3]) == CONST_INT)
8835 rtx out = operands[0];
8836 HOST_WIDE_INT ct = INTVAL (operands[2]);
8837 HOST_WIDE_INT cf = INTVAL (operands[3]);
8840 if ((compare_code == LTU || compare_code == GEU)
8841 && !second_test && !bypass_test)
8843 /* Detect overlap between destination and compare sources. */
8846 /* To simplify rest of code, restrict to the GEU case. */
8847 if (compare_code == LTU)
8852 compare_code = reverse_condition (compare_code);
8853 code = reverse_condition (code);
8857 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8858 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8859 tmp = gen_reg_rtx (mode);
8861 emit_insn (compare_seq);
8863 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8865 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8877 tmp = expand_simple_binop (mode, PLUS,
8879 tmp, 1, OPTAB_DIRECT);
8890 tmp = expand_simple_binop (mode, IOR,
8892 tmp, 1, OPTAB_DIRECT);
8894 else if (diff == -1 && ct)
8904 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8906 tmp = expand_simple_binop (mode, PLUS,
8908 tmp, 1, OPTAB_DIRECT);
8916 * andl cf - ct, dest
8926 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8929 tmp = expand_simple_binop (mode, AND,
8931 gen_int_mode (cf - ct, mode),
8932 tmp, 1, OPTAB_DIRECT);
8934 tmp = expand_simple_binop (mode, PLUS,
8936 tmp, 1, OPTAB_DIRECT);
8940 emit_move_insn (out, tmp);
8942 return 1; /* DONE */
8949 tmp = ct, ct = cf, cf = tmp;
8951 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8953 /* We may be reversing unordered compare to normal compare, that
8954 is not valid in general (we may convert non-trapping condition
8955 to trapping one), however on i386 we currently emit all
8956 comparisons unordered. */
8957 compare_code = reverse_condition_maybe_unordered (compare_code);
8958 code = reverse_condition_maybe_unordered (code);
8962 compare_code = reverse_condition (compare_code);
8963 code = reverse_condition (code);
8968 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8969 && GET_CODE (ix86_compare_op1) == CONST_INT)
8971 if (ix86_compare_op1 == const0_rtx
8972 && (code == LT || code == GE))
8973 compare_code = code;
8974 else if (ix86_compare_op1 == constm1_rtx)
8978 else if (code == GT)
8983 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8984 if (compare_code != NIL
8985 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8986 && (cf == -1 || ct == -1))
8988 /* If lea code below could be used, only optimize
8989 if it results in a 2 insn sequence. */
8991 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8992 || diff == 3 || diff == 5 || diff == 9)
8993 || (compare_code == LT && ct == -1)
8994 || (compare_code == GE && cf == -1))
8997 * notl op1 (if necessary)
9005 code = reverse_condition (code);
9008 out = emit_store_flag (out, code, ix86_compare_op0,
9009 ix86_compare_op1, VOIDmode, 0, -1);
9011 out = expand_simple_binop (mode, IOR,
9013 out, 1, OPTAB_DIRECT);
9014 if (out != operands[0])
9015 emit_move_insn (operands[0], out);
9017 return 1; /* DONE */
9021 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9022 || diff == 3 || diff == 5 || diff == 9)
9023 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9029 * lea cf(dest*(ct-cf)),dest
9033 * This also catches the degenerate setcc-only case.
9039 out = emit_store_flag (out, code, ix86_compare_op0,
9040 ix86_compare_op1, VOIDmode, 0, 1);
9043 /* On x86_64 the lea instruction operates on Pmode, so we need
9044 to get arithmetics done in proper mode to match. */
9051 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9055 tmp = gen_rtx_PLUS (mode, tmp, out1);
9061 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9065 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9071 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9072 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9074 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9075 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9079 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9081 if (out != operands[0])
9082 emit_move_insn (operands[0], copy_rtx (out));
9084 return 1; /* DONE */
9088 * General case: Jumpful:
9089 * xorl dest,dest cmpl op1, op2
9090 * cmpl op1, op2 movl ct, dest
9092 * decl dest movl cf, dest
9093 * andl (cf-ct),dest 1:
9098 * This is reasonably steep, but branch mispredict costs are
9099 * high on modern cpus, so consider failing only if optimizing
9102 * %%% Parameterize branch_cost on the tuning architecture, then
9103 * use that. The 80386 couldn't care less about mispredicts.
9106 if (!optimize_size && !TARGET_CMOVE)
9112 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9113 /* We may be reversing unordered compare to normal compare,
9114 that is not valid in general (we may convert non-trapping
9115 condition to trapping one), however on i386 we currently
9116 emit all comparisons unordered. */
9117 code = reverse_condition_maybe_unordered (code);
9120 code = reverse_condition (code);
9121 if (compare_code != NIL)
9122 compare_code = reverse_condition (compare_code);
9126 if (compare_code != NIL)
9128 /* notl op1 (if needed)
9133 For x < 0 (resp. x <= -1) there will be no notl,
9134 so if possible swap the constants to get rid of the
9136 True/false will be -1/0 while code below (store flag
9137 followed by decrement) is 0/-1, so the constants need
9138 to be exchanged once more. */
9140 if (compare_code == GE || !cf)
9142 code = reverse_condition (code);
9147 HOST_WIDE_INT tmp = cf;
9152 out = emit_store_flag (out, code, ix86_compare_op0,
9153 ix86_compare_op1, VOIDmode, 0, -1);
9157 out = emit_store_flag (out, code, ix86_compare_op0,
9158 ix86_compare_op1, VOIDmode, 0, 1);
9160 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9161 out, 1, OPTAB_DIRECT);
9164 out = expand_simple_binop (mode, AND, out,
9165 gen_int_mode (cf - ct, mode),
9166 out, 1, OPTAB_DIRECT);
9168 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9169 out, 1, OPTAB_DIRECT);
9170 if (out != operands[0])
9171 emit_move_insn (operands[0], out);
9173 return 1; /* DONE */
9179 /* Try a few things more with specific constants and a variable. */
9182 rtx var, orig_out, out, tmp;
9185 return 0; /* FAIL */
9187 /* If one of the two operands is an interesting constant, load a
9188 constant with the above and mask it in with a logical operation. */
9190 if (GET_CODE (operands[2]) == CONST_INT)
9193 if (INTVAL (operands[2]) == 0)
9194 operands[3] = constm1_rtx, op = and_optab;
9195 else if (INTVAL (operands[2]) == -1)
9196 operands[3] = const0_rtx, op = ior_optab;
9198 return 0; /* FAIL */
9200 else if (GET_CODE (operands[3]) == CONST_INT)
9203 if (INTVAL (operands[3]) == 0)
9204 operands[2] = constm1_rtx, op = and_optab;
9205 else if (INTVAL (operands[3]) == -1)
9206 operands[2] = const0_rtx, op = ior_optab;
9208 return 0; /* FAIL */
9211 return 0; /* FAIL */
9213 orig_out = operands[0];
9214 tmp = gen_reg_rtx (mode);
9217 /* Recurse to get the constant loaded. */
9218 if (ix86_expand_int_movcc (operands) == 0)
9219 return 0; /* FAIL */
9221 /* Mask in the interesting variable. */
9222 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9224 if (out != orig_out)
9225 emit_move_insn (orig_out, out);
9227 return 1; /* DONE */
9231 * For comparison with above,
9241 if (! nonimmediate_operand (operands[2], mode))
9242 operands[2] = force_reg (mode, operands[2]);
9243 if (! nonimmediate_operand (operands[3], mode))
9244 operands[3] = force_reg (mode, operands[3]);
9246 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9248 rtx tmp = gen_reg_rtx (mode);
9249 emit_move_insn (tmp, operands[3]);
9252 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9254 rtx tmp = gen_reg_rtx (mode);
9255 emit_move_insn (tmp, operands[2]);
9258 if (! register_operand (operands[2], VOIDmode)
9259 && ! register_operand (operands[3], VOIDmode))
9260 operands[2] = force_reg (mode, operands[2]);
9262 emit_insn (compare_seq);
9263 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9264 gen_rtx_IF_THEN_ELSE (mode,
9265 compare_op, operands[2],
9268 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9269 gen_rtx_IF_THEN_ELSE (mode,
9274 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9275 gen_rtx_IF_THEN_ELSE (mode,
9280 return 1; /* DONE */
9284 ix86_expand_fp_movcc (operands)
9289 rtx compare_op, second_test, bypass_test;
9291 /* For SF/DFmode conditional moves based on comparisons
9292 in same mode, we may want to use SSE min/max instructions. */
9293 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9294 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9295 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9296 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9298 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9299 /* We may be called from the post-reload splitter. */
9300 && (!REG_P (operands[0])
9301 || SSE_REG_P (operands[0])
9302 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9304 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9305 code = GET_CODE (operands[1]);
9307 /* See if we have (cross) match between comparison operands and
9308 conditional move operands. */
9309 if (rtx_equal_p (operands[2], op1))
9314 code = reverse_condition_maybe_unordered (code);
9316 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9318 /* Check for min operation. */
9321 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9322 if (memory_operand (op0, VOIDmode))
9323 op0 = force_reg (GET_MODE (operands[0]), op0);
9324 if (GET_MODE (operands[0]) == SFmode)
9325 emit_insn (gen_minsf3 (operands[0], op0, op1));
9327 emit_insn (gen_mindf3 (operands[0], op0, op1));
9330 /* Check for max operation. */
9333 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9334 if (memory_operand (op0, VOIDmode))
9335 op0 = force_reg (GET_MODE (operands[0]), op0);
9336 if (GET_MODE (operands[0]) == SFmode)
9337 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9339 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9343 /* Manage condition to be sse_comparison_operator. In case we are
9344 in non-ieee mode, try to canonicalize the destination operand
9345 to be first in the comparison - this helps reload to avoid extra
9347 if (!sse_comparison_operator (operands[1], VOIDmode)
9348 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9350 rtx tmp = ix86_compare_op0;
9351 ix86_compare_op0 = ix86_compare_op1;
9352 ix86_compare_op1 = tmp;
9353 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9354 VOIDmode, ix86_compare_op0,
9357 /* Similary try to manage result to be first operand of conditional
9358 move. We also don't support the NE comparison on SSE, so try to
9360 if ((rtx_equal_p (operands[0], operands[3])
9361 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9362 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9364 rtx tmp = operands[2];
9365 operands[2] = operands[3];
9367 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9368 (GET_CODE (operands[1])),
9369 VOIDmode, ix86_compare_op0,
9372 if (GET_MODE (operands[0]) == SFmode)
9373 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9374 operands[2], operands[3],
9375 ix86_compare_op0, ix86_compare_op1));
9377 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9378 operands[2], operands[3],
9379 ix86_compare_op0, ix86_compare_op1));
9383 /* The floating point conditional move instructions don't directly
9384 support conditions resulting from a signed integer comparison. */
9386 code = GET_CODE (operands[1]);
9387 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9389 /* The floating point conditional move instructions don't directly
9390 support signed integer comparisons. */
9392 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9394 if (second_test != NULL || bypass_test != NULL)
9396 tmp = gen_reg_rtx (QImode);
9397 ix86_expand_setcc (code, tmp);
9399 ix86_compare_op0 = tmp;
9400 ix86_compare_op1 = const0_rtx;
9401 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9403 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9405 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9406 emit_move_insn (tmp, operands[3]);
9409 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9411 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9412 emit_move_insn (tmp, operands[2]);
9416 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9417 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9422 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9423 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9428 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9429 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9437 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9438 works for floating pointer parameters and nonoffsetable memories.
9439 For pushes, it returns just stack offsets; the values will be saved
9440 in the right order. Maximally three parts are generated. */
9443 ix86_split_to_parts (operand, parts, mode)
9446 enum machine_mode mode;
9451 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9453 size = (GET_MODE_SIZE (mode) + 4) / 8;
9455 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9457 if (size < 2 || size > 3)
9460 /* Optimize constant pool reference to immediates. This is used by fp
9461 moves, that force all constants to memory to allow combining. */
9462 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9464 rtx tmp = maybe_get_pool_constant (operand);
9469 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9471 /* The only non-offsetable memories we handle are pushes. */
9472 if (! push_operand (operand, VOIDmode))
9475 operand = copy_rtx (operand);
9476 PUT_MODE (operand, Pmode);
9477 parts[0] = parts[1] = parts[2] = operand;
9479 else if (!TARGET_64BIT)
9482 split_di (&operand, 1, &parts[0], &parts[1]);
9485 if (REG_P (operand))
9487 if (!reload_completed)
9489 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9490 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9492 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9494 else if (offsettable_memref_p (operand))
9496 operand = adjust_address (operand, SImode, 0);
9498 parts[1] = adjust_address (operand, SImode, 4);
9500 parts[2] = adjust_address (operand, SImode, 8);
9502 else if (GET_CODE (operand) == CONST_DOUBLE)
9507 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9512 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9513 parts[2] = gen_int_mode (l[2], SImode);
9516 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9521 parts[1] = gen_int_mode (l[1], SImode);
9522 parts[0] = gen_int_mode (l[0], SImode);
9531 split_ti (&operand, 1, &parts[0], &parts[1]);
9532 if (mode == XFmode || mode == TFmode)
9534 if (REG_P (operand))
9536 if (!reload_completed)
9538 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9539 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9541 else if (offsettable_memref_p (operand))
9543 operand = adjust_address (operand, DImode, 0);
9545 parts[1] = adjust_address (operand, SImode, 8);
9547 else if (GET_CODE (operand) == CONST_DOUBLE)
9552 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9553 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9554 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9555 if (HOST_BITS_PER_WIDE_INT >= 64)
9558 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9559 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9562 parts[0] = immed_double_const (l[0], l[1], DImode);
9563 parts[1] = gen_int_mode (l[2], SImode);
9573 /* Emit insns to perform a move or push of DI, DF, and XF values.
9574 Return false when normal moves are needed; true when all required
9575 insns have been emitted. Operands 2-4 contain the input values
9576 int the correct order; operands 5-7 contain the output values. */
9579 ix86_split_long_move (operands)
9586 enum machine_mode mode = GET_MODE (operands[0]);
9588 /* The DFmode expanders may ask us to move double.
9589 For 64bit target this is single move. By hiding the fact
9590 here we simplify i386.md splitters. */
9591 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9593 /* Optimize constant pool reference to immediates. This is used by
9594 fp moves, that force all constants to memory to allow combining. */
9596 if (GET_CODE (operands[1]) == MEM
9597 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9598 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9599 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9600 if (push_operand (operands[0], VOIDmode))
9602 operands[0] = copy_rtx (operands[0]);
9603 PUT_MODE (operands[0], Pmode);
9606 operands[0] = gen_lowpart (DImode, operands[0]);
9607 operands[1] = gen_lowpart (DImode, operands[1]);
9608 emit_move_insn (operands[0], operands[1]);
9612 /* The only non-offsettable memory we handle is push. */
9613 if (push_operand (operands[0], VOIDmode))
9615 else if (GET_CODE (operands[0]) == MEM
9616 && ! offsettable_memref_p (operands[0]))
9619 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9620 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9622 /* When emitting push, take care for source operands on the stack. */
9623 if (push && GET_CODE (operands[1]) == MEM
9624 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9627 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9628 XEXP (part[1][2], 0));
9629 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9630 XEXP (part[1][1], 0));
9633 /* We need to do copy in the right order in case an address register
9634 of the source overlaps the destination. */
9635 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9637 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9639 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9642 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9645 /* Collision in the middle part can be handled by reordering. */
9646 if (collisions == 1 && nparts == 3
9647 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9650 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9651 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9654 /* If there are more collisions, we can't handle it by reordering.
9655 Do an lea to the last part and use only one colliding move. */
9656 else if (collisions > 1)
9659 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9660 XEXP (part[1][0], 0)));
9661 part[1][0] = change_address (part[1][0],
9662 TARGET_64BIT ? DImode : SImode,
9663 part[0][nparts - 1]);
9664 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9666 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9676 /* We use only first 12 bytes of TFmode value, but for pushing we
9677 are required to adjust stack as if we were pushing real 16byte
9679 if (mode == TFmode && !TARGET_64BIT)
9680 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9682 emit_move_insn (part[0][2], part[1][2]);
9687 /* In 64bit mode we don't have 32bit push available. In case this is
9688 register, it is OK - we will just use larger counterpart. We also
9689 retype memory - these comes from attempt to avoid REX prefix on
9690 moving of second half of TFmode value. */
9691 if (GET_MODE (part[1][1]) == SImode)
9693 if (GET_CODE (part[1][1]) == MEM)
9694 part[1][1] = adjust_address (part[1][1], DImode, 0);
9695 else if (REG_P (part[1][1]))
9696 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9699 if (GET_MODE (part[1][0]) == SImode)
9700 part[1][0] = part[1][1];
9703 emit_move_insn (part[0][1], part[1][1]);
9704 emit_move_insn (part[0][0], part[1][0]);
9708 /* Choose correct order to not overwrite the source before it is copied. */
9709 if ((REG_P (part[0][0])
9710 && REG_P (part[1][1])
9711 && (REGNO (part[0][0]) == REGNO (part[1][1])
9713 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9715 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9719 operands[2] = part[0][2];
9720 operands[3] = part[0][1];
9721 operands[4] = part[0][0];
9722 operands[5] = part[1][2];
9723 operands[6] = part[1][1];
9724 operands[7] = part[1][0];
9728 operands[2] = part[0][1];
9729 operands[3] = part[0][0];
9730 operands[5] = part[1][1];
9731 operands[6] = part[1][0];
9738 operands[2] = part[0][0];
9739 operands[3] = part[0][1];
9740 operands[4] = part[0][2];
9741 operands[5] = part[1][0];
9742 operands[6] = part[1][1];
9743 operands[7] = part[1][2];
9747 operands[2] = part[0][0];
9748 operands[3] = part[0][1];
9749 operands[5] = part[1][0];
9750 operands[6] = part[1][1];
9753 emit_move_insn (operands[2], operands[5]);
9754 emit_move_insn (operands[3], operands[6]);
9756 emit_move_insn (operands[4], operands[7]);
9762 ix86_split_ashldi (operands, scratch)
9763 rtx *operands, scratch;
9765 rtx low[2], high[2];
9768 if (GET_CODE (operands[2]) == CONST_INT)
9770 split_di (operands, 2, low, high);
9771 count = INTVAL (operands[2]) & 63;
9775 emit_move_insn (high[0], low[1]);
9776 emit_move_insn (low[0], const0_rtx);
9779 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9783 if (!rtx_equal_p (operands[0], operands[1]))
9784 emit_move_insn (operands[0], operands[1]);
9785 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9786 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9791 if (!rtx_equal_p (operands[0], operands[1]))
9792 emit_move_insn (operands[0], operands[1]);
9794 split_di (operands, 1, low, high);
9796 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9797 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9799 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9801 if (! no_new_pseudos)
9802 scratch = force_reg (SImode, const0_rtx);
9804 emit_move_insn (scratch, const0_rtx);
9806 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9810 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9815 ix86_split_ashrdi (operands, scratch)
9816 rtx *operands, scratch;
9818 rtx low[2], high[2];
9821 if (GET_CODE (operands[2]) == CONST_INT)
9823 split_di (operands, 2, low, high);
9824 count = INTVAL (operands[2]) & 63;
9828 emit_move_insn (low[0], high[1]);
9830 if (! reload_completed)
9831 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9834 emit_move_insn (high[0], low[0]);
9835 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9839 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9843 if (!rtx_equal_p (operands[0], operands[1]))
9844 emit_move_insn (operands[0], operands[1]);
9845 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9846 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9851 if (!rtx_equal_p (operands[0], operands[1]))
9852 emit_move_insn (operands[0], operands[1]);
9854 split_di (operands, 1, low, high);
9856 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9857 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9859 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9861 if (! no_new_pseudos)
9862 scratch = gen_reg_rtx (SImode);
9863 emit_move_insn (scratch, high[0]);
9864 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9865 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9869 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9874 ix86_split_lshrdi (operands, scratch)
9875 rtx *operands, scratch;
9877 rtx low[2], high[2];
9880 if (GET_CODE (operands[2]) == CONST_INT)
9882 split_di (operands, 2, low, high);
9883 count = INTVAL (operands[2]) & 63;
9887 emit_move_insn (low[0], high[1]);
9888 emit_move_insn (high[0], const0_rtx);
9891 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9895 if (!rtx_equal_p (operands[0], operands[1]))
9896 emit_move_insn (operands[0], operands[1]);
9897 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9898 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9903 if (!rtx_equal_p (operands[0], operands[1]))
9904 emit_move_insn (operands[0], operands[1]);
9906 split_di (operands, 1, low, high);
9908 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9909 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9911 /* Heh. By reversing the arguments, we can reuse this pattern. */
9912 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9914 if (! no_new_pseudos)
9915 scratch = force_reg (SImode, const0_rtx);
9917 emit_move_insn (scratch, const0_rtx);
9919 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9923 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9927 /* Helper function for the string operations below. Dest VARIABLE whether
9928 it is aligned to VALUE bytes. If true, jump to the label. */
9930 ix86_expand_aligntest (variable, value)
9934 rtx label = gen_label_rtx ();
9935 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9936 if (GET_MODE (variable) == DImode)
9937 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9939 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9940 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9945 /* Adjust COUNTER by the VALUE. */
9947 ix86_adjust_counter (countreg, value)
9949 HOST_WIDE_INT value;
9951 if (GET_MODE (countreg) == DImode)
9952 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9954 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9957 /* Zero extend possibly SImode EXP to Pmode register. */
9959 ix86_zero_extend_to_Pmode (exp)
9963 if (GET_MODE (exp) == VOIDmode)
9964 return force_reg (Pmode, exp);
9965 if (GET_MODE (exp) == Pmode)
9966 return copy_to_mode_reg (Pmode, exp);
9967 r = gen_reg_rtx (Pmode);
9968 emit_insn (gen_zero_extendsidi2 (r, exp));
9972 /* Expand string move (memcpy) operation. Use i386 string operations when
9973 profitable. expand_clrstr contains similar code. */
9975 ix86_expand_movstr (dst, src, count_exp, align_exp)
9976 rtx dst, src, count_exp, align_exp;
9978 rtx srcreg, destreg, countreg;
9979 enum machine_mode counter_mode;
9980 HOST_WIDE_INT align = 0;
9981 unsigned HOST_WIDE_INT count = 0;
9986 if (GET_CODE (align_exp) == CONST_INT)
9987 align = INTVAL (align_exp);
9989 /* This simple hack avoids all inlining code and simplifies code below. */
9990 if (!TARGET_ALIGN_STRINGOPS)
9993 if (GET_CODE (count_exp) == CONST_INT)
9994 count = INTVAL (count_exp);
9996 /* Figure out proper mode for counter. For 32bits it is always SImode,
9997 for 64bits use SImode when possible, otherwise DImode.
9998 Set count to number of bytes copied when known at compile time. */
9999 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10000 || x86_64_zero_extended_value (count_exp))
10001 counter_mode = SImode;
10003 counter_mode = DImode;
10005 if (counter_mode != SImode && counter_mode != DImode)
10008 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10009 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10011 emit_insn (gen_cld ());
10013 /* When optimizing for size emit simple rep ; movsb instruction for
10014 counts not divisible by 4. */
10016 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10018 countreg = ix86_zero_extend_to_Pmode (count_exp);
10020 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10021 destreg, srcreg, countreg));
10023 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10024 destreg, srcreg, countreg));
10027 /* For constant aligned (or small unaligned) copies use rep movsl
10028 followed by code copying the rest. For PentiumPro ensure 8 byte
10029 alignment to allow rep movsl acceleration. */
10031 else if (count != 0
10033 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10034 || optimize_size || count < (unsigned int) 64))
10036 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10037 if (count & ~(size - 1))
10039 countreg = copy_to_mode_reg (counter_mode,
10040 GEN_INT ((count >> (size == 4 ? 2 : 3))
10041 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10042 countreg = ix86_zero_extend_to_Pmode (countreg);
10046 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10047 destreg, srcreg, countreg));
10049 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10050 destreg, srcreg, countreg));
10053 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10054 destreg, srcreg, countreg));
10056 if (size == 8 && (count & 0x04))
10057 emit_insn (gen_strmovsi (destreg, srcreg));
10059 emit_insn (gen_strmovhi (destreg, srcreg));
10061 emit_insn (gen_strmovqi (destreg, srcreg));
10063 /* The generic code based on the glibc implementation:
10064 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10065 allowing accelerated copying there)
10066 - copy the data using rep movsl
10067 - copy the rest. */
10072 int desired_alignment = (TARGET_PENTIUMPRO
10073 && (count == 0 || count >= (unsigned int) 260)
10074 ? 8 : UNITS_PER_WORD);
10076 /* In case we don't know anything about the alignment, default to
10077 library version, since it is usually equally fast and result in
10079 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10085 if (TARGET_SINGLE_STRINGOP)
10086 emit_insn (gen_cld ());
10088 countreg2 = gen_reg_rtx (Pmode);
10089 countreg = copy_to_mode_reg (counter_mode, count_exp);
10091 /* We don't use loops to align destination and to copy parts smaller
10092 than 4 bytes, because gcc is able to optimize such code better (in
10093 the case the destination or the count really is aligned, gcc is often
10094 able to predict the branches) and also it is friendlier to the
10095 hardware branch prediction.
10097 Using loops is benefical for generic case, because we can
10098 handle small counts using the loops. Many CPUs (such as Athlon)
10099 have large REP prefix setup costs.
10101 This is quite costy. Maybe we can revisit this decision later or
10102 add some customizability to this code. */
10104 if (count == 0 && align < desired_alignment)
10106 label = gen_label_rtx ();
10107 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10108 LEU, 0, counter_mode, 1, label);
10112 rtx label = ix86_expand_aligntest (destreg, 1);
10113 emit_insn (gen_strmovqi (destreg, srcreg));
10114 ix86_adjust_counter (countreg, 1);
10115 emit_label (label);
10116 LABEL_NUSES (label) = 1;
10120 rtx label = ix86_expand_aligntest (destreg, 2);
10121 emit_insn (gen_strmovhi (destreg, srcreg));
10122 ix86_adjust_counter (countreg, 2);
10123 emit_label (label);
10124 LABEL_NUSES (label) = 1;
10126 if (align <= 4 && desired_alignment > 4)
10128 rtx label = ix86_expand_aligntest (destreg, 4);
10129 emit_insn (gen_strmovsi (destreg, srcreg));
10130 ix86_adjust_counter (countreg, 4);
10131 emit_label (label);
10132 LABEL_NUSES (label) = 1;
10135 if (label && desired_alignment > 4 && !TARGET_64BIT)
10137 emit_label (label);
10138 LABEL_NUSES (label) = 1;
10141 if (!TARGET_SINGLE_STRINGOP)
10142 emit_insn (gen_cld ());
10145 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10147 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10148 destreg, srcreg, countreg2));
10152 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10153 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10154 destreg, srcreg, countreg2));
10159 emit_label (label);
10160 LABEL_NUSES (label) = 1;
10162 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10163 emit_insn (gen_strmovsi (destreg, srcreg));
10164 if ((align <= 4 || count == 0) && TARGET_64BIT)
10166 rtx label = ix86_expand_aligntest (countreg, 4);
10167 emit_insn (gen_strmovsi (destreg, srcreg));
10168 emit_label (label);
10169 LABEL_NUSES (label) = 1;
10171 if (align > 2 && count != 0 && (count & 2))
10172 emit_insn (gen_strmovhi (destreg, srcreg));
10173 if (align <= 2 || count == 0)
10175 rtx label = ix86_expand_aligntest (countreg, 2);
10176 emit_insn (gen_strmovhi (destreg, srcreg));
10177 emit_label (label);
10178 LABEL_NUSES (label) = 1;
10180 if (align > 1 && count != 0 && (count & 1))
10181 emit_insn (gen_strmovqi (destreg, srcreg));
10182 if (align <= 1 || count == 0)
10184 rtx label = ix86_expand_aligntest (countreg, 1);
10185 emit_insn (gen_strmovqi (destreg, srcreg));
10186 emit_label (label);
10187 LABEL_NUSES (label) = 1;
10191 insns = get_insns ();
10194 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10199 /* Expand string clear operation (bzero). Use i386 string operations when
10200 profitable. expand_movstr contains similar code. */
10202 ix86_expand_clrstr (src, count_exp, align_exp)
10203 rtx src, count_exp, align_exp;
10205 rtx destreg, zeroreg, countreg;
10206 enum machine_mode counter_mode;
10207 HOST_WIDE_INT align = 0;
10208 unsigned HOST_WIDE_INT count = 0;
10210 if (GET_CODE (align_exp) == CONST_INT)
10211 align = INTVAL (align_exp);
10213 /* This simple hack avoids all inlining code and simplifies code below. */
10214 if (!TARGET_ALIGN_STRINGOPS)
10217 if (GET_CODE (count_exp) == CONST_INT)
10218 count = INTVAL (count_exp);
10219 /* Figure out proper mode for counter. For 32bits it is always SImode,
10220 for 64bits use SImode when possible, otherwise DImode.
10221 Set count to number of bytes copied when known at compile time. */
10222 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10223 || x86_64_zero_extended_value (count_exp))
10224 counter_mode = SImode;
10226 counter_mode = DImode;
10228 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10230 emit_insn (gen_cld ());
10232 /* When optimizing for size emit simple rep ; movsb instruction for
10233 counts not divisible by 4. */
10235 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10237 countreg = ix86_zero_extend_to_Pmode (count_exp);
10238 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10240 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10241 destreg, countreg));
10243 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10244 destreg, countreg));
10246 else if (count != 0
10248 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10249 || optimize_size || count < (unsigned int) 64))
10251 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10252 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10253 if (count & ~(size - 1))
10255 countreg = copy_to_mode_reg (counter_mode,
10256 GEN_INT ((count >> (size == 4 ? 2 : 3))
10257 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10258 countreg = ix86_zero_extend_to_Pmode (countreg);
10262 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10263 destreg, countreg));
10265 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10266 destreg, countreg));
10269 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10270 destreg, countreg));
10272 if (size == 8 && (count & 0x04))
10273 emit_insn (gen_strsetsi (destreg,
10274 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10276 emit_insn (gen_strsethi (destreg,
10277 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10279 emit_insn (gen_strsetqi (destreg,
10280 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10286 /* Compute desired alignment of the string operation. */
10287 int desired_alignment = (TARGET_PENTIUMPRO
10288 && (count == 0 || count >= (unsigned int) 260)
10289 ? 8 : UNITS_PER_WORD);
10291 /* In case we don't know anything about the alignment, default to
10292 library version, since it is usually equally fast and result in
10294 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10297 if (TARGET_SINGLE_STRINGOP)
10298 emit_insn (gen_cld ());
10300 countreg2 = gen_reg_rtx (Pmode);
10301 countreg = copy_to_mode_reg (counter_mode, count_exp);
10302 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10304 if (count == 0 && align < desired_alignment)
10306 label = gen_label_rtx ();
10307 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10308 LEU, 0, counter_mode, 1, label);
10312 rtx label = ix86_expand_aligntest (destreg, 1);
10313 emit_insn (gen_strsetqi (destreg,
10314 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10315 ix86_adjust_counter (countreg, 1);
10316 emit_label (label);
10317 LABEL_NUSES (label) = 1;
10321 rtx label = ix86_expand_aligntest (destreg, 2);
10322 emit_insn (gen_strsethi (destreg,
10323 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10324 ix86_adjust_counter (countreg, 2);
10325 emit_label (label);
10326 LABEL_NUSES (label) = 1;
10328 if (align <= 4 && desired_alignment > 4)
10330 rtx label = ix86_expand_aligntest (destreg, 4);
10331 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10332 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10334 ix86_adjust_counter (countreg, 4);
10335 emit_label (label);
10336 LABEL_NUSES (label) = 1;
10339 if (label && desired_alignment > 4 && !TARGET_64BIT)
10341 emit_label (label);
10342 LABEL_NUSES (label) = 1;
10346 if (!TARGET_SINGLE_STRINGOP)
10347 emit_insn (gen_cld ());
10350 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10352 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10353 destreg, countreg2));
10357 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10358 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10359 destreg, countreg2));
10363 emit_label (label);
10364 LABEL_NUSES (label) = 1;
10367 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10368 emit_insn (gen_strsetsi (destreg,
10369 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10370 if (TARGET_64BIT && (align <= 4 || count == 0))
10372 rtx label = ix86_expand_aligntest (countreg, 4);
10373 emit_insn (gen_strsetsi (destreg,
10374 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10375 emit_label (label);
10376 LABEL_NUSES (label) = 1;
10378 if (align > 2 && count != 0 && (count & 2))
10379 emit_insn (gen_strsethi (destreg,
10380 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10381 if (align <= 2 || count == 0)
10383 rtx label = ix86_expand_aligntest (countreg, 2);
10384 emit_insn (gen_strsethi (destreg,
10385 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10386 emit_label (label);
10387 LABEL_NUSES (label) = 1;
10389 if (align > 1 && count != 0 && (count & 1))
10390 emit_insn (gen_strsetqi (destreg,
10391 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10392 if (align <= 1 || count == 0)
10394 rtx label = ix86_expand_aligntest (countreg, 1);
10395 emit_insn (gen_strsetqi (destreg,
10396 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10397 emit_label (label);
10398 LABEL_NUSES (label) = 1;
10403 /* Expand strlen. */
10405 ix86_expand_strlen (out, src, eoschar, align)
10406 rtx out, src, eoschar, align;
10408 rtx addr, scratch1, scratch2, scratch3, scratch4;
10410 /* The generic case of strlen expander is long. Avoid it's
10411 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10413 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10414 && !TARGET_INLINE_ALL_STRINGOPS
10416 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10419 addr = force_reg (Pmode, XEXP (src, 0));
10420 scratch1 = gen_reg_rtx (Pmode);
10422 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10425 /* Well it seems that some optimizer does not combine a call like
10426 foo(strlen(bar), strlen(bar));
10427 when the move and the subtraction is done here. It does calculate
10428 the length just once when these instructions are done inside of
10429 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10430 often used and I use one fewer register for the lifetime of
10431 output_strlen_unroll() this is better. */
10433 emit_move_insn (out, addr);
10435 ix86_expand_strlensi_unroll_1 (out, align);
10437 /* strlensi_unroll_1 returns the address of the zero at the end of
10438 the string, like memchr(), so compute the length by subtracting
10439 the start address. */
10441 emit_insn (gen_subdi3 (out, out, addr));
10443 emit_insn (gen_subsi3 (out, out, addr));
10447 scratch2 = gen_reg_rtx (Pmode);
10448 scratch3 = gen_reg_rtx (Pmode);
10449 scratch4 = force_reg (Pmode, constm1_rtx);
10451 emit_move_insn (scratch3, addr);
10452 eoschar = force_reg (QImode, eoschar);
10454 emit_insn (gen_cld ());
10457 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10458 align, scratch4, scratch3));
10459 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10460 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10464 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10465 align, scratch4, scratch3));
10466 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10467 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10473 /* Expand the appropriate insns for doing strlen if not just doing
10476 out = result, initialized with the start address
10477 align_rtx = alignment of the address.
10478 scratch = scratch register, initialized with the startaddress when
10479 not aligned, otherwise undefined
10481 This is just the body. It needs the initialisations mentioned above and
10482 some address computing at the end. These things are done in i386.md. */
10485 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10486 rtx out, align_rtx;
10490 rtx align_2_label = NULL_RTX;
10491 rtx align_3_label = NULL_RTX;
10492 rtx align_4_label = gen_label_rtx ();
10493 rtx end_0_label = gen_label_rtx ();
10495 rtx tmpreg = gen_reg_rtx (SImode);
10496 rtx scratch = gen_reg_rtx (SImode);
10499 if (GET_CODE (align_rtx) == CONST_INT)
10500 align = INTVAL (align_rtx);
10502 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10504 /* Is there a known alignment and is it less than 4? */
10507 rtx scratch1 = gen_reg_rtx (Pmode);
10508 emit_move_insn (scratch1, out);
10509 /* Is there a known alignment and is it not 2? */
10512 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10513 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10515 /* Leave just the 3 lower bits. */
10516 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10517 NULL_RTX, 0, OPTAB_WIDEN);
10519 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10520 Pmode, 1, align_4_label);
10521 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10522 Pmode, 1, align_2_label);
10523 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10524 Pmode, 1, align_3_label);
10528 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10529 check if is aligned to 4 - byte. */
10531 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10532 NULL_RTX, 0, OPTAB_WIDEN);
10534 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10535 Pmode, 1, align_4_label);
10538 mem = gen_rtx_MEM (QImode, out);
10540 /* Now compare the bytes. */
10542 /* Compare the first n unaligned byte on a byte per byte basis. */
10543 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10544 QImode, 1, end_0_label);
10546 /* Increment the address. */
10548 emit_insn (gen_adddi3 (out, out, const1_rtx));
10550 emit_insn (gen_addsi3 (out, out, const1_rtx));
10552 /* Not needed with an alignment of 2 */
10555 emit_label (align_2_label);
10557 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10561 emit_insn (gen_adddi3 (out, out, const1_rtx));
10563 emit_insn (gen_addsi3 (out, out, const1_rtx));
10565 emit_label (align_3_label);
10568 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10572 emit_insn (gen_adddi3 (out, out, const1_rtx));
10574 emit_insn (gen_addsi3 (out, out, const1_rtx));
10577 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10578 align this loop. It gives only huge programs, but does not help to
10580 emit_label (align_4_label);
10582 mem = gen_rtx_MEM (SImode, out);
10583 emit_move_insn (scratch, mem);
10585 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10587 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10589 /* This formula yields a nonzero result iff one of the bytes is zero.
10590 This saves three branches inside loop and many cycles. */
10592 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10593 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10594 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10595 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10596 gen_int_mode (0x80808080, SImode)));
10597 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10602 rtx reg = gen_reg_rtx (SImode);
10603 rtx reg2 = gen_reg_rtx (Pmode);
10604 emit_move_insn (reg, tmpreg);
10605 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10607 /* If zero is not in the first two bytes, move two bytes forward. */
10608 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10609 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10610 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10611 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10612 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10615 /* Emit lea manually to avoid clobbering of flags. */
10616 emit_insn (gen_rtx_SET (SImode, reg2,
10617 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10619 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10620 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10621 emit_insn (gen_rtx_SET (VOIDmode, out,
10622 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10629 rtx end_2_label = gen_label_rtx ();
10630 /* Is zero in the first two bytes? */
10632 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10633 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10634 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10635 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10636 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10638 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10639 JUMP_LABEL (tmp) = end_2_label;
10641 /* Not in the first two. Move two bytes forward. */
10642 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10644 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10646 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10648 emit_label (end_2_label);
10652 /* Avoid branch in fixing the byte. */
10653 tmpreg = gen_lowpart (QImode, tmpreg);
10654 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10656 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10658 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10660 emit_label (end_0_label);
10664 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10665 rtx retval, fnaddr, callarg1, callarg2, pop;
10667 rtx use = NULL, call;
10669 if (pop == const0_rtx)
10671 if (TARGET_64BIT && pop)
10675 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10676 fnaddr = machopic_indirect_call_target (fnaddr);
10678 /* Static functions and indirect calls don't need the pic register. */
10679 if (! TARGET_64BIT && flag_pic
10680 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10681 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10682 use_reg (&use, pic_offset_table_rtx);
10684 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10686 rtx al = gen_rtx_REG (QImode, 0);
10687 emit_move_insn (al, callarg2);
10688 use_reg (&use, al);
10690 #endif /* TARGET_MACHO */
10692 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10694 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10695 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10698 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10700 call = gen_rtx_SET (VOIDmode, retval, call);
10703 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10704 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10705 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10708 call = emit_call_insn (call);
10710 CALL_INSN_FUNCTION_USAGE (call) = use;
10714 /* Clear stack slot assignments remembered from previous functions.
10715 This is called from INIT_EXPANDERS once before RTL is emitted for each
10718 static struct machine_function *
10719 ix86_init_machine_status ()
10721 return ggc_alloc_cleared (sizeof (struct machine_function));
10724 /* Return a MEM corresponding to a stack slot with mode MODE.
10725 Allocate a new slot if necessary.
10727 The RTL for a function can have several slots available: N is
10728 which slot to use. */
10731 assign_386_stack_local (mode, n)
10732 enum machine_mode mode;
10735 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10738 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10739 ix86_stack_locals[(int) mode][n]
10740 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10742 return ix86_stack_locals[(int) mode][n];
10745 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10747 static GTY(()) rtx ix86_tls_symbol;
10749 ix86_tls_get_addr ()
10752 if (!ix86_tls_symbol)
10754 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10755 ? "___tls_get_addr"
10756 : "__tls_get_addr"));
10759 return ix86_tls_symbol;
10762 /* Calculate the length of the memory address in the instruction
10763 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10766 memory_address_length (addr)
10769 struct ix86_address parts;
10770 rtx base, index, disp;
10773 if (GET_CODE (addr) == PRE_DEC
10774 || GET_CODE (addr) == POST_INC
10775 || GET_CODE (addr) == PRE_MODIFY
10776 || GET_CODE (addr) == POST_MODIFY)
10779 if (! ix86_decompose_address (addr, &parts))
10783 index = parts.index;
10787 /* Register Indirect. */
10788 if (base && !index && !disp)
10790 /* Special cases: ebp and esp need the two-byte modrm form. */
10791 if (addr == stack_pointer_rtx
10792 || addr == arg_pointer_rtx
10793 || addr == frame_pointer_rtx
10794 || addr == hard_frame_pointer_rtx)
10798 /* Direct Addressing. */
10799 else if (disp && !base && !index)
10804 /* Find the length of the displacement constant. */
10807 if (GET_CODE (disp) == CONST_INT
10808 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10814 /* An index requires the two-byte modrm form. */
10822 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10823 is set, expect that insn have 8bit immediate alternative. */
10825 ix86_attr_length_immediate_default (insn, shortform)
10831 extract_insn_cached (insn);
10832 for (i = recog_data.n_operands - 1; i >= 0; --i)
10833 if (CONSTANT_P (recog_data.operand[i]))
10838 && GET_CODE (recog_data.operand[i]) == CONST_INT
10839 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10843 switch (get_attr_mode (insn))
10854 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10859 fatal_insn ("unknown insn mode", insn);
10865 /* Compute default value for "length_address" attribute. */
10867 ix86_attr_length_address_default (insn)
10871 extract_insn_cached (insn);
10872 for (i = recog_data.n_operands - 1; i >= 0; --i)
10873 if (GET_CODE (recog_data.operand[i]) == MEM)
10875 return memory_address_length (XEXP (recog_data.operand[i], 0));
10881 /* Return the maximum number of instructions a cpu can issue. */
10888 case PROCESSOR_PENTIUM:
10892 case PROCESSOR_PENTIUMPRO:
10893 case PROCESSOR_PENTIUM4:
10894 case PROCESSOR_ATHLON:
10902 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10903 by DEP_INSN and nothing set by DEP_INSN. */
10906 ix86_flags_dependant (insn, dep_insn, insn_type)
10907 rtx insn, dep_insn;
10908 enum attr_type insn_type;
10912 /* Simplify the test for uninteresting insns. */
10913 if (insn_type != TYPE_SETCC
10914 && insn_type != TYPE_ICMOV
10915 && insn_type != TYPE_FCMOV
10916 && insn_type != TYPE_IBR)
10919 if ((set = single_set (dep_insn)) != 0)
10921 set = SET_DEST (set);
10924 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10925 && XVECLEN (PATTERN (dep_insn), 0) == 2
10926 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10927 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10929 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10930 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10935 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10938 /* This test is true if the dependent insn reads the flags but
10939 not any other potentially set register. */
10940 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10943 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10949 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10950 address with operands set by DEP_INSN. */
10953 ix86_agi_dependant (insn, dep_insn, insn_type)
10954 rtx insn, dep_insn;
10955 enum attr_type insn_type;
10959 if (insn_type == TYPE_LEA
10962 addr = PATTERN (insn);
10963 if (GET_CODE (addr) == SET)
10965 else if (GET_CODE (addr) == PARALLEL
10966 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10967 addr = XVECEXP (addr, 0, 0);
10970 addr = SET_SRC (addr);
10975 extract_insn_cached (insn);
10976 for (i = recog_data.n_operands - 1; i >= 0; --i)
10977 if (GET_CODE (recog_data.operand[i]) == MEM)
10979 addr = XEXP (recog_data.operand[i], 0);
10986 return modified_in_p (addr, dep_insn);
10990 ix86_adjust_cost (insn, link, dep_insn, cost)
10991 rtx insn, link, dep_insn;
10994 enum attr_type insn_type, dep_insn_type;
10995 enum attr_memory memory, dep_memory;
10997 int dep_insn_code_number;
10999 /* Anti and output depenancies have zero cost on all CPUs. */
11000 if (REG_NOTE_KIND (link) != 0)
11003 dep_insn_code_number = recog_memoized (dep_insn);
11005 /* If we can't recognize the insns, we can't really do anything. */
11006 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11009 insn_type = get_attr_type (insn);
11010 dep_insn_type = get_attr_type (dep_insn);
11014 case PROCESSOR_PENTIUM:
11015 /* Address Generation Interlock adds a cycle of latency. */
11016 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11019 /* ??? Compares pair with jump/setcc. */
11020 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11023 /* Floating point stores require value to be ready one cycle ealier. */
11024 if (insn_type == TYPE_FMOV
11025 && get_attr_memory (insn) == MEMORY_STORE
11026 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11030 case PROCESSOR_PENTIUMPRO:
11031 memory = get_attr_memory (insn);
11032 dep_memory = get_attr_memory (dep_insn);
11034 /* Since we can't represent delayed latencies of load+operation,
11035 increase the cost here for non-imov insns. */
11036 if (dep_insn_type != TYPE_IMOV
11037 && dep_insn_type != TYPE_FMOV
11038 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11041 /* INT->FP conversion is expensive. */
11042 if (get_attr_fp_int_src (dep_insn))
11045 /* There is one cycle extra latency between an FP op and a store. */
11046 if (insn_type == TYPE_FMOV
11047 && (set = single_set (dep_insn)) != NULL_RTX
11048 && (set2 = single_set (insn)) != NULL_RTX
11049 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11050 && GET_CODE (SET_DEST (set2)) == MEM)
11053 /* Show ability of reorder buffer to hide latency of load by executing
11054 in parallel with previous instruction in case
11055 previous instruction is not needed to compute the address. */
11056 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11057 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11059 /* Claim moves to take one cycle, as core can issue one load
11060 at time and the next load can start cycle later. */
11061 if (dep_insn_type == TYPE_IMOV
11062 || dep_insn_type == TYPE_FMOV)
11070 memory = get_attr_memory (insn);
11071 dep_memory = get_attr_memory (dep_insn);
11072 /* The esp dependency is resolved before the instruction is really
11074 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11075 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11078 /* Since we can't represent delayed latencies of load+operation,
11079 increase the cost here for non-imov insns. */
11080 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11081 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11083 /* INT->FP conversion is expensive. */
11084 if (get_attr_fp_int_src (dep_insn))
11087 /* Show ability of reorder buffer to hide latency of load by executing
11088 in parallel with previous instruction in case
11089 previous instruction is not needed to compute the address. */
11090 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11091 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11093 /* Claim moves to take one cycle, as core can issue one load
11094 at time and the next load can start cycle later. */
11095 if (dep_insn_type == TYPE_IMOV
11096 || dep_insn_type == TYPE_FMOV)
11105 case PROCESSOR_ATHLON:
11106 memory = get_attr_memory (insn);
11107 dep_memory = get_attr_memory (dep_insn);
11109 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11111 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11116 /* Show ability of reorder buffer to hide latency of load by executing
11117 in parallel with previous instruction in case
11118 previous instruction is not needed to compute the address. */
11119 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11120 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11122 /* Claim moves to take one cycle, as core can issue one load
11123 at time and the next load can start cycle later. */
11124 if (dep_insn_type == TYPE_IMOV
11125 || dep_insn_type == TYPE_FMOV)
11127 else if (cost >= 3)
11142 struct ppro_sched_data
11145 int issued_this_cycle;
11149 static enum attr_ppro_uops
11150 ix86_safe_ppro_uops (insn)
11153 if (recog_memoized (insn) >= 0)
11154 return get_attr_ppro_uops (insn);
11156 return PPRO_UOPS_MANY;
11160 ix86_dump_ppro_packet (dump)
11163 if (ix86_sched_data.ppro.decode[0])
11165 fprintf (dump, "PPRO packet: %d",
11166 INSN_UID (ix86_sched_data.ppro.decode[0]));
11167 if (ix86_sched_data.ppro.decode[1])
11168 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11169 if (ix86_sched_data.ppro.decode[2])
11170 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11171 fputc ('\n', dump);
11175 /* We're beginning a new block. Initialize data structures as necessary. */
11178 ix86_sched_init (dump, sched_verbose, veclen)
11179 FILE *dump ATTRIBUTE_UNUSED;
11180 int sched_verbose ATTRIBUTE_UNUSED;
11181 int veclen ATTRIBUTE_UNUSED;
11183 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11186 /* Shift INSN to SLOT, and shift everything else down. */
11189 ix86_reorder_insn (insnp, slot)
11196 insnp[0] = insnp[1];
11197 while (++insnp != slot);
11203 ix86_sched_reorder_ppro (ready, e_ready)
11208 enum attr_ppro_uops cur_uops;
11209 int issued_this_cycle;
11213 /* At this point .ppro.decode contains the state of the three
11214 decoders from last "cycle". That is, those insns that were
11215 actually independent. But here we're scheduling for the
11216 decoder, and we may find things that are decodable in the
11219 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11220 issued_this_cycle = 0;
11223 cur_uops = ix86_safe_ppro_uops (*insnp);
11225 /* If the decoders are empty, and we've a complex insn at the
11226 head of the priority queue, let it issue without complaint. */
11227 if (decode[0] == NULL)
11229 if (cur_uops == PPRO_UOPS_MANY)
11231 decode[0] = *insnp;
11235 /* Otherwise, search for a 2-4 uop unsn to issue. */
11236 while (cur_uops != PPRO_UOPS_FEW)
11238 if (insnp == ready)
11240 cur_uops = ix86_safe_ppro_uops (*--insnp);
11243 /* If so, move it to the head of the line. */
11244 if (cur_uops == PPRO_UOPS_FEW)
11245 ix86_reorder_insn (insnp, e_ready);
11247 /* Issue the head of the queue. */
11248 issued_this_cycle = 1;
11249 decode[0] = *e_ready--;
11252 /* Look for simple insns to fill in the other two slots. */
11253 for (i = 1; i < 3; ++i)
11254 if (decode[i] == NULL)
11256 if (ready > e_ready)
11260 cur_uops = ix86_safe_ppro_uops (*insnp);
11261 while (cur_uops != PPRO_UOPS_ONE)
11263 if (insnp == ready)
11265 cur_uops = ix86_safe_ppro_uops (*--insnp);
11268 /* Found one. Move it to the head of the queue and issue it. */
11269 if (cur_uops == PPRO_UOPS_ONE)
11271 ix86_reorder_insn (insnp, e_ready);
11272 decode[i] = *e_ready--;
11273 issued_this_cycle++;
11277 /* ??? Didn't find one. Ideally, here we would do a lazy split
11278 of 2-uop insns, issue one and queue the other. */
11282 if (issued_this_cycle == 0)
11283 issued_this_cycle = 1;
11284 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11287 /* We are about to being issuing insns for this clock cycle.
11288 Override the default sort algorithm to better slot instructions. */
11290 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11291 FILE *dump ATTRIBUTE_UNUSED;
11292 int sched_verbose ATTRIBUTE_UNUSED;
11295 int clock_var ATTRIBUTE_UNUSED;
11297 int n_ready = *n_readyp;
11298 rtx *e_ready = ready + n_ready - 1;
11300 /* Make sure to go ahead and initialize key items in
11301 ix86_sched_data if we are not going to bother trying to
11302 reorder the ready queue. */
11305 ix86_sched_data.ppro.issued_this_cycle = 1;
11314 case PROCESSOR_PENTIUMPRO:
11315 ix86_sched_reorder_ppro (ready, e_ready);
11320 return ix86_issue_rate ();
11323 /* We are about to issue INSN. Return the number of insns left on the
11324 ready queue that can be issued this cycle. */
11327 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11331 int can_issue_more;
11337 return can_issue_more - 1;
11339 case PROCESSOR_PENTIUMPRO:
11341 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11343 if (uops == PPRO_UOPS_MANY)
11346 ix86_dump_ppro_packet (dump);
11347 ix86_sched_data.ppro.decode[0] = insn;
11348 ix86_sched_data.ppro.decode[1] = NULL;
11349 ix86_sched_data.ppro.decode[2] = NULL;
11351 ix86_dump_ppro_packet (dump);
11352 ix86_sched_data.ppro.decode[0] = NULL;
11354 else if (uops == PPRO_UOPS_FEW)
11357 ix86_dump_ppro_packet (dump);
11358 ix86_sched_data.ppro.decode[0] = insn;
11359 ix86_sched_data.ppro.decode[1] = NULL;
11360 ix86_sched_data.ppro.decode[2] = NULL;
11364 for (i = 0; i < 3; ++i)
11365 if (ix86_sched_data.ppro.decode[i] == NULL)
11367 ix86_sched_data.ppro.decode[i] = insn;
11375 ix86_dump_ppro_packet (dump);
11376 ix86_sched_data.ppro.decode[0] = NULL;
11377 ix86_sched_data.ppro.decode[1] = NULL;
11378 ix86_sched_data.ppro.decode[2] = NULL;
11382 return --ix86_sched_data.ppro.issued_this_cycle;
11387 ia32_use_dfa_pipeline_interface ()
11389 if (ix86_cpu == PROCESSOR_PENTIUM)
11394 /* How many alternative schedules to try. This should be as wide as the
11395 scheduling freedom in the DFA, but no wider. Making this value too
11396 large results extra work for the scheduler. */
11399 ia32_multipass_dfa_lookahead ()
11401 if (ix86_cpu == PROCESSOR_PENTIUM)
11408 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11409 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11413 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11415 rtx dstref, srcref, dstreg, srcreg;
11419 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11421 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11425 /* Subroutine of above to actually do the updating by recursively walking
11429 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11431 rtx dstref, srcref, dstreg, srcreg;
11433 enum rtx_code code = GET_CODE (x);
11434 const char *format_ptr = GET_RTX_FORMAT (code);
11437 if (code == MEM && XEXP (x, 0) == dstreg)
11438 MEM_COPY_ATTRIBUTES (x, dstref);
11439 else if (code == MEM && XEXP (x, 0) == srcreg)
11440 MEM_COPY_ATTRIBUTES (x, srcref);
11442 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11444 if (*format_ptr == 'e')
11445 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11447 else if (*format_ptr == 'E')
11448 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11449 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11454 /* Compute the alignment given to a constant that is being placed in memory.
11455 EXP is the constant and ALIGN is the alignment that the object would
11457 The value of this function is used instead of that alignment to align
11461 ix86_constant_alignment (exp, align)
11465 if (TREE_CODE (exp) == REAL_CST)
11467 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11469 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11472 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11479 /* Compute the alignment for a static variable.
11480 TYPE is the data type, and ALIGN is the alignment that
11481 the object would ordinarily have. The value of this function is used
11482 instead of that alignment to align the object. */
11485 ix86_data_alignment (type, align)
11489 if (AGGREGATE_TYPE_P (type)
11490 && TYPE_SIZE (type)
11491 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11492 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11493 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11496 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11497 to 16byte boundary. */
11500 if (AGGREGATE_TYPE_P (type)
11501 && TYPE_SIZE (type)
11502 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11503 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11504 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11508 if (TREE_CODE (type) == ARRAY_TYPE)
11510 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11512 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11515 else if (TREE_CODE (type) == COMPLEX_TYPE)
11518 if (TYPE_MODE (type) == DCmode && align < 64)
11520 if (TYPE_MODE (type) == XCmode && align < 128)
11523 else if ((TREE_CODE (type) == RECORD_TYPE
11524 || TREE_CODE (type) == UNION_TYPE
11525 || TREE_CODE (type) == QUAL_UNION_TYPE)
11526 && TYPE_FIELDS (type))
11528 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11530 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11533 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11534 || TREE_CODE (type) == INTEGER_TYPE)
11536 if (TYPE_MODE (type) == DFmode && align < 64)
11538 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11545 /* Compute the alignment for a local variable.
11546 TYPE is the data type, and ALIGN is the alignment that
11547 the object would ordinarily have. The value of this macro is used
11548 instead of that alignment to align the object. */
11551 ix86_local_alignment (type, align)
11555 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11556 to 16byte boundary. */
11559 if (AGGREGATE_TYPE_P (type)
11560 && TYPE_SIZE (type)
11561 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11562 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11563 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11566 if (TREE_CODE (type) == ARRAY_TYPE)
11568 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11570 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11573 else if (TREE_CODE (type) == COMPLEX_TYPE)
11575 if (TYPE_MODE (type) == DCmode && align < 64)
11577 if (TYPE_MODE (type) == XCmode && align < 128)
11580 else if ((TREE_CODE (type) == RECORD_TYPE
11581 || TREE_CODE (type) == UNION_TYPE
11582 || TREE_CODE (type) == QUAL_UNION_TYPE)
11583 && TYPE_FIELDS (type))
11585 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11587 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11590 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11591 || TREE_CODE (type) == INTEGER_TYPE)
11594 if (TYPE_MODE (type) == DFmode && align < 64)
11596 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11602 /* Emit RTL insns to initialize the variable parts of a trampoline.
11603 FNADDR is an RTX for the address of the function's pure code.
11604 CXT is an RTX for the static chain value for the function. */
11606 x86_initialize_trampoline (tramp, fnaddr, cxt)
11607 rtx tramp, fnaddr, cxt;
11611 /* Compute offset from the end of the jmp to the target function. */
11612 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11613 plus_constant (tramp, 10),
11614 NULL_RTX, 1, OPTAB_DIRECT);
11615 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11616 gen_int_mode (0xb9, QImode));
11617 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11618 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11619 gen_int_mode (0xe9, QImode));
11620 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11625 /* Try to load address using shorter movl instead of movabs.
11626 We may want to support movq for kernel mode, but kernel does not use
11627 trampolines at the moment. */
11628 if (x86_64_zero_extended_value (fnaddr))
11630 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11631 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11632 gen_int_mode (0xbb41, HImode));
11633 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11634 gen_lowpart (SImode, fnaddr));
11639 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11640 gen_int_mode (0xbb49, HImode));
11641 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11645 /* Load static chain using movabs to r10. */
11646 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11647 gen_int_mode (0xba49, HImode));
11648 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11651 /* Jump to the r11 */
11652 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11653 gen_int_mode (0xff49, HImode));
11654 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11655 gen_int_mode (0xe3, QImode));
11657 if (offset > TRAMPOLINE_SIZE)
11662 #define def_builtin(MASK, NAME, TYPE, CODE) \
11664 if ((MASK) & target_flags) \
11665 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11666 NULL, NULL_TREE); \
11669 struct builtin_description
11671 const unsigned int mask;
11672 const enum insn_code icode;
11673 const char *const name;
11674 const enum ix86_builtins code;
11675 const enum rtx_code comparison;
11676 const unsigned int flag;
11679 /* Used for builtins that are enabled both by -msse and -msse2. */
11680 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11682 static const struct builtin_description bdesc_comi[] =
11684 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11685 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11686 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11687 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11688 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11689 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11690 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11691 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11692 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11693 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11694 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11695 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11696 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11697 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11698 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11699 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11700 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11701 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11702 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11703 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11704 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11705 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11706 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11707 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11710 static const struct builtin_description bdesc_2arg[] =
11713 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11714 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11715 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11716 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11717 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11718 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11719 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11720 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11722 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11723 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11724 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11725 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11726 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11727 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11728 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11729 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11730 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11731 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11732 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11733 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11734 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11735 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11736 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11737 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11738 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11739 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11740 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11741 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11742 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11743 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11744 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11745 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11747 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11748 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11749 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11750 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11752 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11753 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11754 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11755 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11756 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11759 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11760 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11761 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11762 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11763 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11764 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11766 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11767 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11768 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11769 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11770 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11771 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11772 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11773 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11775 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11776 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11777 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11779 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11780 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11781 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11782 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11784 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11785 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11787 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11788 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11789 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11790 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11791 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11792 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11794 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11795 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11796 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11797 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11799 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11800 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11801 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11802 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11803 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11804 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11807 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11808 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11809 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11811 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11812 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11814 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11815 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11816 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11817 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11818 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11819 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11821 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11822 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11823 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11824 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11825 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11826 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11828 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11829 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11830 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11831 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11833 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11834 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11837 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11838 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11839 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11840 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11841 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11842 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11843 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11844 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11846 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11847 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11848 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11849 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11850 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11851 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11852 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11853 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11854 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11855 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11856 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11857 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11858 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11859 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11860 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11861 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11862 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11863 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11864 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11865 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11866 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11867 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11868 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11869 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11871 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11872 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11873 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11874 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11876 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11877 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11878 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11879 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11881 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11882 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11883 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11886 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11887 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11888 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11889 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11890 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11891 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11892 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11893 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11895 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11896 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11897 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11898 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11899 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11900 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11901 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11902 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11905 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11909 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11910 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11914 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11915 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11917 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11918 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11919 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11920 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11921 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11922 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11924 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11925 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11926 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11927 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11929 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11930 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11931 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11932 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11933 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11934 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11937 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11938 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11940 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11941 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11945 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11946 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11947 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11948 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11950 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11951 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11952 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11953 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11954 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11955 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11957 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11958 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11959 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11960 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11962 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11964 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11965 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11966 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11969 static const struct builtin_description bdesc_1arg[] =
11971 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11972 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11974 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11975 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11976 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11978 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11979 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11980 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11981 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11983 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11984 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11985 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11987 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11989 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11990 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11992 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11993 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11994 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11995 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11996 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11998 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12000 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12001 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12003 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12004 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12005 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
12009 ix86_init_builtins ()
12012 ix86_init_mmx_sse_builtins ();
12015 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12016 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12019 ix86_init_mmx_sse_builtins ()
12021 const struct builtin_description * d;
12024 tree pchar_type_node = build_pointer_type (char_type_node);
12025 tree pfloat_type_node = build_pointer_type (float_type_node);
12026 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12027 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12028 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12031 tree int_ftype_v4sf_v4sf
12032 = build_function_type_list (integer_type_node,
12033 V4SF_type_node, V4SF_type_node, NULL_TREE);
12034 tree v4si_ftype_v4sf_v4sf
12035 = build_function_type_list (V4SI_type_node,
12036 V4SF_type_node, V4SF_type_node, NULL_TREE);
12037 /* MMX/SSE/integer conversions. */
12038 tree int_ftype_v4sf
12039 = build_function_type_list (integer_type_node,
12040 V4SF_type_node, NULL_TREE);
12041 tree int_ftype_v8qi
12042 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12043 tree v4sf_ftype_v4sf_int
12044 = build_function_type_list (V4SF_type_node,
12045 V4SF_type_node, integer_type_node, NULL_TREE);
12046 tree v4sf_ftype_v4sf_v2si
12047 = build_function_type_list (V4SF_type_node,
12048 V4SF_type_node, V2SI_type_node, NULL_TREE);
12049 tree int_ftype_v4hi_int
12050 = build_function_type_list (integer_type_node,
12051 V4HI_type_node, integer_type_node, NULL_TREE);
12052 tree v4hi_ftype_v4hi_int_int
12053 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12054 integer_type_node, integer_type_node,
12056 /* Miscellaneous. */
12057 tree v8qi_ftype_v4hi_v4hi
12058 = build_function_type_list (V8QI_type_node,
12059 V4HI_type_node, V4HI_type_node, NULL_TREE);
12060 tree v4hi_ftype_v2si_v2si
12061 = build_function_type_list (V4HI_type_node,
12062 V2SI_type_node, V2SI_type_node, NULL_TREE);
12063 tree v4sf_ftype_v4sf_v4sf_int
12064 = build_function_type_list (V4SF_type_node,
12065 V4SF_type_node, V4SF_type_node,
12066 integer_type_node, NULL_TREE);
12067 tree v2si_ftype_v4hi_v4hi
12068 = build_function_type_list (V2SI_type_node,
12069 V4HI_type_node, V4HI_type_node, NULL_TREE);
12070 tree v4hi_ftype_v4hi_int
12071 = build_function_type_list (V4HI_type_node,
12072 V4HI_type_node, integer_type_node, NULL_TREE);
12073 tree v4hi_ftype_v4hi_di
12074 = build_function_type_list (V4HI_type_node,
12075 V4HI_type_node, long_long_unsigned_type_node,
12077 tree v2si_ftype_v2si_di
12078 = build_function_type_list (V2SI_type_node,
12079 V2SI_type_node, long_long_unsigned_type_node,
12081 tree void_ftype_void
12082 = build_function_type (void_type_node, void_list_node);
12083 tree void_ftype_unsigned
12084 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12085 tree unsigned_ftype_void
12086 = build_function_type (unsigned_type_node, void_list_node);
12088 = build_function_type (long_long_unsigned_type_node, void_list_node);
12089 tree v4sf_ftype_void
12090 = build_function_type (V4SF_type_node, void_list_node);
12091 tree v2si_ftype_v4sf
12092 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12093 /* Loads/stores. */
12094 tree void_ftype_v8qi_v8qi_pchar
12095 = build_function_type_list (void_type_node,
12096 V8QI_type_node, V8QI_type_node,
12097 pchar_type_node, NULL_TREE);
12098 tree v4sf_ftype_pfloat
12099 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12100 /* @@@ the type is bogus */
12101 tree v4sf_ftype_v4sf_pv2si
12102 = build_function_type_list (V4SF_type_node,
12103 V4SF_type_node, pv2di_type_node, NULL_TREE);
12104 tree void_ftype_pv2si_v4sf
12105 = build_function_type_list (void_type_node,
12106 pv2di_type_node, V4SF_type_node, NULL_TREE);
12107 tree void_ftype_pfloat_v4sf
12108 = build_function_type_list (void_type_node,
12109 pfloat_type_node, V4SF_type_node, NULL_TREE);
12110 tree void_ftype_pdi_di
12111 = build_function_type_list (void_type_node,
12112 pdi_type_node, long_long_unsigned_type_node,
12114 tree void_ftype_pv2di_v2di
12115 = build_function_type_list (void_type_node,
12116 pv2di_type_node, V2DI_type_node, NULL_TREE);
12117 /* Normal vector unops. */
12118 tree v4sf_ftype_v4sf
12119 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12121 /* Normal vector binops. */
12122 tree v4sf_ftype_v4sf_v4sf
12123 = build_function_type_list (V4SF_type_node,
12124 V4SF_type_node, V4SF_type_node, NULL_TREE);
12125 tree v8qi_ftype_v8qi_v8qi
12126 = build_function_type_list (V8QI_type_node,
12127 V8QI_type_node, V8QI_type_node, NULL_TREE);
12128 tree v4hi_ftype_v4hi_v4hi
12129 = build_function_type_list (V4HI_type_node,
12130 V4HI_type_node, V4HI_type_node, NULL_TREE);
12131 tree v2si_ftype_v2si_v2si
12132 = build_function_type_list (V2SI_type_node,
12133 V2SI_type_node, V2SI_type_node, NULL_TREE);
12134 tree di_ftype_di_di
12135 = build_function_type_list (long_long_unsigned_type_node,
12136 long_long_unsigned_type_node,
12137 long_long_unsigned_type_node, NULL_TREE);
12139 tree v2si_ftype_v2sf
12140 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12141 tree v2sf_ftype_v2si
12142 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12143 tree v2si_ftype_v2si
12144 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12145 tree v2sf_ftype_v2sf
12146 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12147 tree v2sf_ftype_v2sf_v2sf
12148 = build_function_type_list (V2SF_type_node,
12149 V2SF_type_node, V2SF_type_node, NULL_TREE);
12150 tree v2si_ftype_v2sf_v2sf
12151 = build_function_type_list (V2SI_type_node,
12152 V2SF_type_node, V2SF_type_node, NULL_TREE);
12153 tree pint_type_node = build_pointer_type (integer_type_node);
12154 tree pdouble_type_node = build_pointer_type (double_type_node);
12155 tree int_ftype_v2df_v2df
12156 = build_function_type_list (integer_type_node,
12157 V2DF_type_node, V2DF_type_node, NULL_TREE);
12160 = build_function_type (intTI_type_node, void_list_node);
12161 tree ti_ftype_ti_ti
12162 = build_function_type_list (intTI_type_node,
12163 intTI_type_node, intTI_type_node, NULL_TREE);
12164 tree void_ftype_pvoid
12165 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12167 = build_function_type_list (V2DI_type_node,
12168 long_long_unsigned_type_node, NULL_TREE);
12169 tree v4sf_ftype_v4si
12170 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12171 tree v4si_ftype_v4sf
12172 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12173 tree v2df_ftype_v4si
12174 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12175 tree v4si_ftype_v2df
12176 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12177 tree v2si_ftype_v2df
12178 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12179 tree v4sf_ftype_v2df
12180 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12181 tree v2df_ftype_v2si
12182 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12183 tree v2df_ftype_v4sf
12184 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12185 tree int_ftype_v2df
12186 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12187 tree v2df_ftype_v2df_int
12188 = build_function_type_list (V2DF_type_node,
12189 V2DF_type_node, integer_type_node, NULL_TREE);
12190 tree v4sf_ftype_v4sf_v2df
12191 = build_function_type_list (V4SF_type_node,
12192 V4SF_type_node, V2DF_type_node, NULL_TREE);
12193 tree v2df_ftype_v2df_v4sf
12194 = build_function_type_list (V2DF_type_node,
12195 V2DF_type_node, V4SF_type_node, NULL_TREE);
12196 tree v2df_ftype_v2df_v2df_int
12197 = build_function_type_list (V2DF_type_node,
12198 V2DF_type_node, V2DF_type_node,
12201 tree v2df_ftype_v2df_pv2si
12202 = build_function_type_list (V2DF_type_node,
12203 V2DF_type_node, pv2si_type_node, NULL_TREE);
12204 tree void_ftype_pv2si_v2df
12205 = build_function_type_list (void_type_node,
12206 pv2si_type_node, V2DF_type_node, NULL_TREE);
12207 tree void_ftype_pdouble_v2df
12208 = build_function_type_list (void_type_node,
12209 pdouble_type_node, V2DF_type_node, NULL_TREE);
12210 tree void_ftype_pint_int
12211 = build_function_type_list (void_type_node,
12212 pint_type_node, integer_type_node, NULL_TREE);
12213 tree void_ftype_v16qi_v16qi_pchar
12214 = build_function_type_list (void_type_node,
12215 V16QI_type_node, V16QI_type_node,
12216 pchar_type_node, NULL_TREE);
12217 tree v2df_ftype_pdouble
12218 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12219 tree v2df_ftype_v2df_v2df
12220 = build_function_type_list (V2DF_type_node,
12221 V2DF_type_node, V2DF_type_node, NULL_TREE);
12222 tree v16qi_ftype_v16qi_v16qi
12223 = build_function_type_list (V16QI_type_node,
12224 V16QI_type_node, V16QI_type_node, NULL_TREE);
12225 tree v8hi_ftype_v8hi_v8hi
12226 = build_function_type_list (V8HI_type_node,
12227 V8HI_type_node, V8HI_type_node, NULL_TREE);
12228 tree v4si_ftype_v4si_v4si
12229 = build_function_type_list (V4SI_type_node,
12230 V4SI_type_node, V4SI_type_node, NULL_TREE);
12231 tree v2di_ftype_v2di_v2di
12232 = build_function_type_list (V2DI_type_node,
12233 V2DI_type_node, V2DI_type_node, NULL_TREE);
12234 tree v2di_ftype_v2df_v2df
12235 = build_function_type_list (V2DI_type_node,
12236 V2DF_type_node, V2DF_type_node, NULL_TREE);
12237 tree v2df_ftype_v2df
12238 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12239 tree v2df_ftype_double
12240 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12241 tree v2df_ftype_double_double
12242 = build_function_type_list (V2DF_type_node,
12243 double_type_node, double_type_node, NULL_TREE);
12244 tree int_ftype_v8hi_int
12245 = build_function_type_list (integer_type_node,
12246 V8HI_type_node, integer_type_node, NULL_TREE);
12247 tree v8hi_ftype_v8hi_int_int
12248 = build_function_type_list (V8HI_type_node,
12249 V8HI_type_node, integer_type_node,
12250 integer_type_node, NULL_TREE);
12251 tree v2di_ftype_v2di_int
12252 = build_function_type_list (V2DI_type_node,
12253 V2DI_type_node, integer_type_node, NULL_TREE);
12254 tree v4si_ftype_v4si_int
12255 = build_function_type_list (V4SI_type_node,
12256 V4SI_type_node, integer_type_node, NULL_TREE);
12257 tree v8hi_ftype_v8hi_int
12258 = build_function_type_list (V8HI_type_node,
12259 V8HI_type_node, integer_type_node, NULL_TREE);
12260 tree v8hi_ftype_v8hi_v2di
12261 = build_function_type_list (V8HI_type_node,
12262 V8HI_type_node, V2DI_type_node, NULL_TREE);
12263 tree v4si_ftype_v4si_v2di
12264 = build_function_type_list (V4SI_type_node,
12265 V4SI_type_node, V2DI_type_node, NULL_TREE);
12266 tree v4si_ftype_v8hi_v8hi
12267 = build_function_type_list (V4SI_type_node,
12268 V8HI_type_node, V8HI_type_node, NULL_TREE);
12269 tree di_ftype_v8qi_v8qi
12270 = build_function_type_list (long_long_unsigned_type_node,
12271 V8QI_type_node, V8QI_type_node, NULL_TREE);
12272 tree v2di_ftype_v16qi_v16qi
12273 = build_function_type_list (V2DI_type_node,
12274 V16QI_type_node, V16QI_type_node, NULL_TREE);
12275 tree int_ftype_v16qi
12276 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12278 /* Add all builtins that are more or less simple operations on two
12280 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12282 /* Use one of the operands; the target can have a different mode for
12283 mask-generating compares. */
12284 enum machine_mode mode;
12289 mode = insn_data[d->icode].operand[1].mode;
12294 type = v16qi_ftype_v16qi_v16qi;
12297 type = v8hi_ftype_v8hi_v8hi;
12300 type = v4si_ftype_v4si_v4si;
12303 type = v2di_ftype_v2di_v2di;
12306 type = v2df_ftype_v2df_v2df;
12309 type = ti_ftype_ti_ti;
12312 type = v4sf_ftype_v4sf_v4sf;
12315 type = v8qi_ftype_v8qi_v8qi;
12318 type = v4hi_ftype_v4hi_v4hi;
12321 type = v2si_ftype_v2si_v2si;
12324 type = di_ftype_di_di;
12331 /* Override for comparisons. */
12332 if (d->icode == CODE_FOR_maskcmpv4sf3
12333 || d->icode == CODE_FOR_maskncmpv4sf3
12334 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12335 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12336 type = v4si_ftype_v4sf_v4sf;
12338 if (d->icode == CODE_FOR_maskcmpv2df3
12339 || d->icode == CODE_FOR_maskncmpv2df3
12340 || d->icode == CODE_FOR_vmmaskcmpv2df3
12341 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12342 type = v2di_ftype_v2df_v2df;
12344 def_builtin (d->mask, d->name, type, d->code);
12347 /* Add the remaining MMX insns with somewhat more complicated types. */
12348 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12349 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12350 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12351 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12352 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12353 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12354 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12356 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12357 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12358 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12360 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12361 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12363 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12364 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12366 /* comi/ucomi insns. */
12367 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12368 if (d->mask == MASK_SSE2)
12369 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12371 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12373 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12374 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12375 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12377 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12378 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12379 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12380 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12381 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12382 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12384 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12385 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12386 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12387 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12389 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12390 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12392 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12394 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12395 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12396 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12397 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12398 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12399 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12401 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12402 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12403 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12404 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12406 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12407 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12408 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12409 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12411 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12413 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12415 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12416 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12417 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12418 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12419 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12420 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12422 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12424 /* Original 3DNow! */
12425 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12426 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12427 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12428 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12429 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12430 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12431 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12432 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12433 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12434 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12435 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12436 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12437 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12438 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12439 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12440 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12441 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12442 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12443 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12444 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12446 /* 3DNow! extension as used in the Athlon CPU. */
12447 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12448 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12449 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12450 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12451 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12452 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12454 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12457 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12458 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12460 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12461 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12463 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12464 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12465 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12466 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12467 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12468 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12470 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12471 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12472 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12473 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12475 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12476 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12477 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12478 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12479 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12481 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12482 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12483 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12484 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12486 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12487 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12489 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12491 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12492 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12494 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12495 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12496 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12497 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12498 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12500 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12502 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12503 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12505 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12506 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12507 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12509 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12510 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12511 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12513 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12514 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12515 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12516 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12517 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12518 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12519 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12521 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12522 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12523 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12525 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12526 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12527 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12529 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12530 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12531 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12533 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12534 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12536 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12537 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12538 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12540 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12541 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12542 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12544 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12545 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12547 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12550 /* Errors in the source file can cause expand_expr to return const0_rtx
12551 where we expect a vector. To avoid crashing, use one of the vector
12552 clear instructions. */
12554 safe_vector_operand (x, mode)
12556 enum machine_mode mode;
12558 if (x != const0_rtx)
12560 x = gen_reg_rtx (mode);
12562 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12563 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12564 : gen_rtx_SUBREG (DImode, x, 0)));
12566 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12567 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12571 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12574 ix86_expand_binop_builtin (icode, arglist, target)
12575 enum insn_code icode;
12580 tree arg0 = TREE_VALUE (arglist);
12581 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12582 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12583 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12584 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12585 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12586 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12588 if (VECTOR_MODE_P (mode0))
12589 op0 = safe_vector_operand (op0, mode0);
12590 if (VECTOR_MODE_P (mode1))
12591 op1 = safe_vector_operand (op1, mode1);
12594 || GET_MODE (target) != tmode
12595 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12596 target = gen_reg_rtx (tmode);
12598 /* In case the insn wants input operands in modes different from
12599 the result, abort. */
12600 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12603 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12604 op0 = copy_to_mode_reg (mode0, op0);
12605 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12606 op1 = copy_to_mode_reg (mode1, op1);
12608 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12609 yet one of the two must not be a memory. This is normally enforced
12610 by expanders, but we didn't bother to create one here. */
12611 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12612 op0 = copy_to_mode_reg (mode0, op0);
12614 pat = GEN_FCN (icode) (target, op0, op1);
12621 /* In type_for_mode we restrict the ability to create TImode types
12622 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12623 to have a V4SFmode signature. Convert them in-place to TImode. */
12626 ix86_expand_timode_binop_builtin (icode, arglist, target)
12627 enum insn_code icode;
12632 tree arg0 = TREE_VALUE (arglist);
12633 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12634 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12635 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12637 op0 = gen_lowpart (TImode, op0);
12638 op1 = gen_lowpart (TImode, op1);
12639 target = gen_reg_rtx (TImode);
12641 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12642 op0 = copy_to_mode_reg (TImode, op0);
12643 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12644 op1 = copy_to_mode_reg (TImode, op1);
12646 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12647 yet one of the two must not be a memory. This is normally enforced
12648 by expanders, but we didn't bother to create one here. */
12649 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12650 op0 = copy_to_mode_reg (TImode, op0);
12652 pat = GEN_FCN (icode) (target, op0, op1);
12657 return gen_lowpart (V4SFmode, target);
12660 /* Subroutine of ix86_expand_builtin to take care of stores. */
12663 ix86_expand_store_builtin (icode, arglist)
12664 enum insn_code icode;
12668 tree arg0 = TREE_VALUE (arglist);
12669 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12670 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12671 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12672 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12673 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12675 if (VECTOR_MODE_P (mode1))
12676 op1 = safe_vector_operand (op1, mode1);
12678 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12680 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12681 op1 = copy_to_mode_reg (mode1, op1);
12683 pat = GEN_FCN (icode) (op0, op1);
12689 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12692 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12693 enum insn_code icode;
12699 tree arg0 = TREE_VALUE (arglist);
12700 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12701 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12702 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12705 || GET_MODE (target) != tmode
12706 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12707 target = gen_reg_rtx (tmode);
12709 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12712 if (VECTOR_MODE_P (mode0))
12713 op0 = safe_vector_operand (op0, mode0);
12715 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12716 op0 = copy_to_mode_reg (mode0, op0);
12719 pat = GEN_FCN (icode) (target, op0);
12726 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12727 sqrtss, rsqrtss, rcpss. */
12730 ix86_expand_unop1_builtin (icode, arglist, target)
12731 enum insn_code icode;
12736 tree arg0 = TREE_VALUE (arglist);
12737 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12738 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12739 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12742 || GET_MODE (target) != tmode
12743 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12744 target = gen_reg_rtx (tmode);
12746 if (VECTOR_MODE_P (mode0))
12747 op0 = safe_vector_operand (op0, mode0);
12749 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12750 op0 = copy_to_mode_reg (mode0, op0);
12753 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12754 op1 = copy_to_mode_reg (mode0, op1);
12756 pat = GEN_FCN (icode) (target, op0, op1);
12763 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12766 ix86_expand_sse_compare (d, arglist, target)
12767 const struct builtin_description *d;
12772 tree arg0 = TREE_VALUE (arglist);
12773 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12774 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12775 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12777 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12778 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12779 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12780 enum rtx_code comparison = d->comparison;
12782 if (VECTOR_MODE_P (mode0))
12783 op0 = safe_vector_operand (op0, mode0);
12784 if (VECTOR_MODE_P (mode1))
12785 op1 = safe_vector_operand (op1, mode1);
12787 /* Swap operands if we have a comparison that isn't available in
12791 rtx tmp = gen_reg_rtx (mode1);
12792 emit_move_insn (tmp, op1);
12798 || GET_MODE (target) != tmode
12799 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12800 target = gen_reg_rtx (tmode);
12802 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12803 op0 = copy_to_mode_reg (mode0, op0);
12804 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12805 op1 = copy_to_mode_reg (mode1, op1);
12807 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12808 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12815 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12818 ix86_expand_sse_comi (d, arglist, target)
12819 const struct builtin_description *d;
12824 tree arg0 = TREE_VALUE (arglist);
12825 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12826 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12827 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12829 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12830 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12831 enum rtx_code comparison = d->comparison;
12833 if (VECTOR_MODE_P (mode0))
12834 op0 = safe_vector_operand (op0, mode0);
12835 if (VECTOR_MODE_P (mode1))
12836 op1 = safe_vector_operand (op1, mode1);
12838 /* Swap operands if we have a comparison that isn't available in
12847 target = gen_reg_rtx (SImode);
12848 emit_move_insn (target, const0_rtx);
12849 target = gen_rtx_SUBREG (QImode, target, 0);
12851 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12852 op0 = copy_to_mode_reg (mode0, op0);
12853 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12854 op1 = copy_to_mode_reg (mode1, op1);
12856 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12857 pat = GEN_FCN (d->icode) (op0, op1, op2);
12861 emit_insn (gen_rtx_SET (VOIDmode,
12862 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12863 gen_rtx_fmt_ee (comparison, QImode,
12864 gen_rtx_REG (CCmode, FLAGS_REG),
12867 return SUBREG_REG (target);
12870 /* Expand an expression EXP that calls a built-in function,
12871 with result going to TARGET if that's convenient
12872 (and in mode MODE if that's convenient).
12873 SUBTARGET may be used as the target for computing one of EXP's operands.
12874 IGNORE is nonzero if the value is to be ignored. */
12877 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12880 rtx subtarget ATTRIBUTE_UNUSED;
12881 enum machine_mode mode ATTRIBUTE_UNUSED;
12882 int ignore ATTRIBUTE_UNUSED;
12884 const struct builtin_description *d;
12886 enum insn_code icode;
12887 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12888 tree arglist = TREE_OPERAND (exp, 1);
12889 tree arg0, arg1, arg2;
12890 rtx op0, op1, op2, pat;
12891 enum machine_mode tmode, mode0, mode1, mode2;
12892 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12896 case IX86_BUILTIN_EMMS:
12897 emit_insn (gen_emms ());
12900 case IX86_BUILTIN_SFENCE:
12901 emit_insn (gen_sfence ());
12904 case IX86_BUILTIN_PEXTRW:
12905 case IX86_BUILTIN_PEXTRW128:
12906 icode = (fcode == IX86_BUILTIN_PEXTRW
12907 ? CODE_FOR_mmx_pextrw
12908 : CODE_FOR_sse2_pextrw);
12909 arg0 = TREE_VALUE (arglist);
12910 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12911 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12912 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12913 tmode = insn_data[icode].operand[0].mode;
12914 mode0 = insn_data[icode].operand[1].mode;
12915 mode1 = insn_data[icode].operand[2].mode;
12917 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12918 op0 = copy_to_mode_reg (mode0, op0);
12919 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12921 /* @@@ better error message */
12922 error ("selector must be an immediate");
12923 return gen_reg_rtx (tmode);
12926 || GET_MODE (target) != tmode
12927 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12928 target = gen_reg_rtx (tmode);
12929 pat = GEN_FCN (icode) (target, op0, op1);
12935 case IX86_BUILTIN_PINSRW:
12936 case IX86_BUILTIN_PINSRW128:
12937 icode = (fcode == IX86_BUILTIN_PINSRW
12938 ? CODE_FOR_mmx_pinsrw
12939 : CODE_FOR_sse2_pinsrw);
12940 arg0 = TREE_VALUE (arglist);
12941 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12942 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12943 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12944 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12945 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12946 tmode = insn_data[icode].operand[0].mode;
12947 mode0 = insn_data[icode].operand[1].mode;
12948 mode1 = insn_data[icode].operand[2].mode;
12949 mode2 = insn_data[icode].operand[3].mode;
12951 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12952 op0 = copy_to_mode_reg (mode0, op0);
12953 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12954 op1 = copy_to_mode_reg (mode1, op1);
12955 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12957 /* @@@ better error message */
12958 error ("selector must be an immediate");
12962 || GET_MODE (target) != tmode
12963 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12964 target = gen_reg_rtx (tmode);
12965 pat = GEN_FCN (icode) (target, op0, op1, op2);
12971 case IX86_BUILTIN_MASKMOVQ:
12972 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12973 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12974 : CODE_FOR_sse2_maskmovdqu);
12975 /* Note the arg order is different from the operand order. */
12976 arg1 = TREE_VALUE (arglist);
12977 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12978 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12979 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12980 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12981 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12982 mode0 = insn_data[icode].operand[0].mode;
12983 mode1 = insn_data[icode].operand[1].mode;
12984 mode2 = insn_data[icode].operand[2].mode;
12986 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12987 op0 = copy_to_mode_reg (mode0, op0);
12988 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12989 op1 = copy_to_mode_reg (mode1, op1);
12990 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12991 op2 = copy_to_mode_reg (mode2, op2);
12992 pat = GEN_FCN (icode) (op0, op1, op2);
12998 case IX86_BUILTIN_SQRTSS:
12999 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13000 case IX86_BUILTIN_RSQRTSS:
13001 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13002 case IX86_BUILTIN_RCPSS:
13003 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13005 case IX86_BUILTIN_ANDPS:
13006 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
13008 case IX86_BUILTIN_ANDNPS:
13009 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
13011 case IX86_BUILTIN_ORPS:
13012 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
13014 case IX86_BUILTIN_XORPS:
13015 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
13018 case IX86_BUILTIN_LOADAPS:
13019 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13021 case IX86_BUILTIN_LOADUPS:
13022 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13024 case IX86_BUILTIN_STOREAPS:
13025 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13026 case IX86_BUILTIN_STOREUPS:
13027 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13029 case IX86_BUILTIN_LOADSS:
13030 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13032 case IX86_BUILTIN_STORESS:
13033 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13035 case IX86_BUILTIN_LOADHPS:
13036 case IX86_BUILTIN_LOADLPS:
13037 case IX86_BUILTIN_LOADHPD:
13038 case IX86_BUILTIN_LOADLPD:
13039 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13040 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13041 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13042 : CODE_FOR_sse2_movlpd);
13043 arg0 = TREE_VALUE (arglist);
13044 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13045 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13046 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13047 tmode = insn_data[icode].operand[0].mode;
13048 mode0 = insn_data[icode].operand[1].mode;
13049 mode1 = insn_data[icode].operand[2].mode;
13051 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13052 op0 = copy_to_mode_reg (mode0, op0);
13053 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13055 || GET_MODE (target) != tmode
13056 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13057 target = gen_reg_rtx (tmode);
13058 pat = GEN_FCN (icode) (target, op0, op1);
13064 case IX86_BUILTIN_STOREHPS:
13065 case IX86_BUILTIN_STORELPS:
13066 case IX86_BUILTIN_STOREHPD:
13067 case IX86_BUILTIN_STORELPD:
13068 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13069 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13070 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13071 : CODE_FOR_sse2_movlpd);
13072 arg0 = TREE_VALUE (arglist);
13073 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13074 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13075 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13076 mode0 = insn_data[icode].operand[1].mode;
13077 mode1 = insn_data[icode].operand[2].mode;
13079 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13080 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13081 op1 = copy_to_mode_reg (mode1, op1);
13083 pat = GEN_FCN (icode) (op0, op0, op1);
13089 case IX86_BUILTIN_MOVNTPS:
13090 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13091 case IX86_BUILTIN_MOVNTQ:
13092 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13094 case IX86_BUILTIN_LDMXCSR:
13095 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13096 target = assign_386_stack_local (SImode, 0);
13097 emit_move_insn (target, op0);
13098 emit_insn (gen_ldmxcsr (target));
13101 case IX86_BUILTIN_STMXCSR:
13102 target = assign_386_stack_local (SImode, 0);
13103 emit_insn (gen_stmxcsr (target));
13104 return copy_to_mode_reg (SImode, target);
13106 case IX86_BUILTIN_SHUFPS:
13107 case IX86_BUILTIN_SHUFPD:
13108 icode = (fcode == IX86_BUILTIN_SHUFPS
13109 ? CODE_FOR_sse_shufps
13110 : CODE_FOR_sse2_shufpd);
13111 arg0 = TREE_VALUE (arglist);
13112 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13113 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13114 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13115 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13116 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13117 tmode = insn_data[icode].operand[0].mode;
13118 mode0 = insn_data[icode].operand[1].mode;
13119 mode1 = insn_data[icode].operand[2].mode;
13120 mode2 = insn_data[icode].operand[3].mode;
13122 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13123 op0 = copy_to_mode_reg (mode0, op0);
13124 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13125 op1 = copy_to_mode_reg (mode1, op1);
13126 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13128 /* @@@ better error message */
13129 error ("mask must be an immediate");
13130 return gen_reg_rtx (tmode);
13133 || GET_MODE (target) != tmode
13134 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13135 target = gen_reg_rtx (tmode);
13136 pat = GEN_FCN (icode) (target, op0, op1, op2);
13142 case IX86_BUILTIN_PSHUFW:
13143 case IX86_BUILTIN_PSHUFD:
13144 case IX86_BUILTIN_PSHUFHW:
13145 case IX86_BUILTIN_PSHUFLW:
13146 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13147 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13148 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13149 : CODE_FOR_mmx_pshufw);
13150 arg0 = TREE_VALUE (arglist);
13151 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13152 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13153 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13154 tmode = insn_data[icode].operand[0].mode;
13155 mode1 = insn_data[icode].operand[1].mode;
13156 mode2 = insn_data[icode].operand[2].mode;
13158 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13159 op0 = copy_to_mode_reg (mode1, op0);
13160 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13162 /* @@@ better error message */
13163 error ("mask must be an immediate");
13167 || GET_MODE (target) != tmode
13168 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13169 target = gen_reg_rtx (tmode);
13170 pat = GEN_FCN (icode) (target, op0, op1);
13176 case IX86_BUILTIN_FEMMS:
13177 emit_insn (gen_femms ());
13180 case IX86_BUILTIN_PAVGUSB:
13181 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13183 case IX86_BUILTIN_PF2ID:
13184 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13186 case IX86_BUILTIN_PFACC:
13187 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13189 case IX86_BUILTIN_PFADD:
13190 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13192 case IX86_BUILTIN_PFCMPEQ:
13193 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13195 case IX86_BUILTIN_PFCMPGE:
13196 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13198 case IX86_BUILTIN_PFCMPGT:
13199 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13201 case IX86_BUILTIN_PFMAX:
13202 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13204 case IX86_BUILTIN_PFMIN:
13205 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13207 case IX86_BUILTIN_PFMUL:
13208 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13210 case IX86_BUILTIN_PFRCP:
13211 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13213 case IX86_BUILTIN_PFRCPIT1:
13214 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13216 case IX86_BUILTIN_PFRCPIT2:
13217 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13219 case IX86_BUILTIN_PFRSQIT1:
13220 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13222 case IX86_BUILTIN_PFRSQRT:
13223 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13225 case IX86_BUILTIN_PFSUB:
13226 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13228 case IX86_BUILTIN_PFSUBR:
13229 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13231 case IX86_BUILTIN_PI2FD:
13232 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13234 case IX86_BUILTIN_PMULHRW:
13235 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13237 case IX86_BUILTIN_PF2IW:
13238 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13240 case IX86_BUILTIN_PFNACC:
13241 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13243 case IX86_BUILTIN_PFPNACC:
13244 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13246 case IX86_BUILTIN_PI2FW:
13247 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13249 case IX86_BUILTIN_PSWAPDSI:
13250 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13252 case IX86_BUILTIN_PSWAPDSF:
13253 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13255 case IX86_BUILTIN_SSE_ZERO:
13256 target = gen_reg_rtx (V4SFmode);
13257 emit_insn (gen_sse_clrv4sf (target));
13260 case IX86_BUILTIN_MMX_ZERO:
13261 target = gen_reg_rtx (DImode);
13262 emit_insn (gen_mmx_clrdi (target));
13265 case IX86_BUILTIN_SQRTSD:
13266 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13267 case IX86_BUILTIN_LOADAPD:
13268 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13269 case IX86_BUILTIN_LOADUPD:
13270 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13272 case IX86_BUILTIN_STOREAPD:
13273 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13274 case IX86_BUILTIN_STOREUPD:
13275 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13277 case IX86_BUILTIN_LOADSD:
13278 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13280 case IX86_BUILTIN_STORESD:
13281 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13283 case IX86_BUILTIN_SETPD1:
13284 target = assign_386_stack_local (DFmode, 0);
13285 arg0 = TREE_VALUE (arglist);
13286 emit_move_insn (adjust_address (target, DFmode, 0),
13287 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13288 op0 = gen_reg_rtx (V2DFmode);
13289 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13290 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13293 case IX86_BUILTIN_SETPD:
13294 target = assign_386_stack_local (V2DFmode, 0);
13295 arg0 = TREE_VALUE (arglist);
13296 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13297 emit_move_insn (adjust_address (target, DFmode, 0),
13298 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13299 emit_move_insn (adjust_address (target, DFmode, 8),
13300 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13301 op0 = gen_reg_rtx (V2DFmode);
13302 emit_insn (gen_sse2_movapd (op0, target));
13305 case IX86_BUILTIN_LOADRPD:
13306 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13307 gen_reg_rtx (V2DFmode), 1);
13308 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13311 case IX86_BUILTIN_LOADPD1:
13312 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13313 gen_reg_rtx (V2DFmode), 1);
13314 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13317 case IX86_BUILTIN_STOREPD1:
13318 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13319 case IX86_BUILTIN_STORERPD:
13320 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13322 case IX86_BUILTIN_MFENCE:
13323 emit_insn (gen_sse2_mfence ());
13325 case IX86_BUILTIN_LFENCE:
13326 emit_insn (gen_sse2_lfence ());
13329 case IX86_BUILTIN_CLFLUSH:
13330 arg0 = TREE_VALUE (arglist);
13331 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13332 icode = CODE_FOR_sse2_clflush;
13333 mode0 = insn_data[icode].operand[0].mode;
13334 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13335 op0 = copy_to_mode_reg (mode0, op0);
13337 emit_insn (gen_sse2_clflush (op0));
13340 case IX86_BUILTIN_MOVNTPD:
13341 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13342 case IX86_BUILTIN_MOVNTDQ:
13343 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13344 case IX86_BUILTIN_MOVNTI:
13345 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13351 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13352 if (d->code == fcode)
13354 /* Compares are treated specially. */
13355 if (d->icode == CODE_FOR_maskcmpv4sf3
13356 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13357 || d->icode == CODE_FOR_maskncmpv4sf3
13358 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13359 || d->icode == CODE_FOR_maskcmpv2df3
13360 || d->icode == CODE_FOR_vmmaskcmpv2df3
13361 || d->icode == CODE_FOR_maskncmpv2df3
13362 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13363 return ix86_expand_sse_compare (d, arglist, target);
13365 return ix86_expand_binop_builtin (d->icode, arglist, target);
13368 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13369 if (d->code == fcode)
13370 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13372 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13373 if (d->code == fcode)
13374 return ix86_expand_sse_comi (d, arglist, target);
13376 /* @@@ Should really do something sensible here. */
13380 /* Store OPERAND to the memory after reload is completed. This means
13381 that we can't easily use assign_stack_local. */
13383 ix86_force_to_memory (mode, operand)
13384 enum machine_mode mode;
13388 if (!reload_completed)
13390 if (TARGET_64BIT && TARGET_RED_ZONE)
13392 result = gen_rtx_MEM (mode,
13393 gen_rtx_PLUS (Pmode,
13395 GEN_INT (-RED_ZONE_SIZE)));
13396 emit_move_insn (result, operand);
13398 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13404 operand = gen_lowpart (DImode, operand);
13408 gen_rtx_SET (VOIDmode,
13409 gen_rtx_MEM (DImode,
13410 gen_rtx_PRE_DEC (DImode,
13411 stack_pointer_rtx)),
13417 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13426 split_di (&operand, 1, operands, operands + 1);
13428 gen_rtx_SET (VOIDmode,
13429 gen_rtx_MEM (SImode,
13430 gen_rtx_PRE_DEC (Pmode,
13431 stack_pointer_rtx)),
13434 gen_rtx_SET (VOIDmode,
13435 gen_rtx_MEM (SImode,
13436 gen_rtx_PRE_DEC (Pmode,
13437 stack_pointer_rtx)),
13442 /* It is better to store HImodes as SImodes. */
13443 if (!TARGET_PARTIAL_REG_STALL)
13444 operand = gen_lowpart (SImode, operand);
13448 gen_rtx_SET (VOIDmode,
13449 gen_rtx_MEM (GET_MODE (operand),
13450 gen_rtx_PRE_DEC (SImode,
13451 stack_pointer_rtx)),
13457 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13462 /* Free operand from the memory. */
13464 ix86_free_from_memory (mode)
13465 enum machine_mode mode;
13467 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13471 if (mode == DImode || TARGET_64BIT)
13473 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13477 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13478 to pop or add instruction if registers are available. */
13479 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13480 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13485 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13486 QImode must go into class Q_REGS.
13487 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13488 movdf to do mem-to-mem moves through integer regs. */
13490 ix86_preferred_reload_class (x, class)
13492 enum reg_class class;
13494 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13496 /* SSE can't load any constant directly yet. */
13497 if (SSE_CLASS_P (class))
13499 /* Floats can load 0 and 1. */
13500 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13502 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13503 if (MAYBE_SSE_CLASS_P (class))
13504 return (reg_class_subset_p (class, GENERAL_REGS)
13505 ? GENERAL_REGS : FLOAT_REGS);
13509 /* General regs can load everything. */
13510 if (reg_class_subset_p (class, GENERAL_REGS))
13511 return GENERAL_REGS;
13512 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13513 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13516 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13518 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13523 /* If we are copying between general and FP registers, we need a memory
13524 location. The same is true for SSE and MMX registers.
13526 The macro can't work reliably when one of the CLASSES is class containing
13527 registers from multiple units (SSE, MMX, integer). We avoid this by never
13528 combining those units in single alternative in the machine description.
13529 Ensure that this constraint holds to avoid unexpected surprises.
13531 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13532 enforce these sanity checks. */
13534 ix86_secondary_memory_needed (class1, class2, mode, strict)
13535 enum reg_class class1, class2;
13536 enum machine_mode mode;
13539 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13540 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13541 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13542 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13543 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13544 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13551 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13552 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13553 && (mode) != SImode)
13554 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13555 && (mode) != SImode));
13557 /* Return the cost of moving data from a register in class CLASS1 to
13558 one in class CLASS2.
13560 It is not required that the cost always equal 2 when FROM is the same as TO;
13561 on some machines it is expensive to move between registers if they are not
13562 general registers. */
13564 ix86_register_move_cost (mode, class1, class2)
13565 enum machine_mode mode;
13566 enum reg_class class1, class2;
13568 /* In case we require secondary memory, compute cost of the store followed
13569 by load. In case of copying from general_purpose_register we may emit
13570 multiple stores followed by single load causing memory size mismatch
13571 stall. Count this as arbitarily high cost of 20. */
13572 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13575 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13577 return (MEMORY_MOVE_COST (mode, class1, 0)
13578 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13580 /* Moves between SSE/MMX and integer unit are expensive. */
13581 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13582 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13583 return ix86_cost->mmxsse_to_integer;
13584 if (MAYBE_FLOAT_CLASS_P (class1))
13585 return ix86_cost->fp_move;
13586 if (MAYBE_SSE_CLASS_P (class1))
13587 return ix86_cost->sse_move;
13588 if (MAYBE_MMX_CLASS_P (class1))
13589 return ix86_cost->mmx_move;
13593 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13595 ix86_hard_regno_mode_ok (regno, mode)
13597 enum machine_mode mode;
13599 /* Flags and only flags can only hold CCmode values. */
13600 if (CC_REGNO_P (regno))
13601 return GET_MODE_CLASS (mode) == MODE_CC;
13602 if (GET_MODE_CLASS (mode) == MODE_CC
13603 || GET_MODE_CLASS (mode) == MODE_RANDOM
13604 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13606 if (FP_REGNO_P (regno))
13607 return VALID_FP_MODE_P (mode);
13608 if (SSE_REGNO_P (regno))
13609 return VALID_SSE_REG_MODE (mode);
13610 if (MMX_REGNO_P (regno))
13611 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13612 /* We handle both integer and floats in the general purpose registers.
13613 In future we should be able to handle vector modes as well. */
13614 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13616 /* Take care for QImode values - they can be in non-QI regs, but then
13617 they do cause partial register stalls. */
13618 if (regno < 4 || mode != QImode || TARGET_64BIT)
13620 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13623 /* Return the cost of moving data of mode M between a
13624 register and memory. A value of 2 is the default; this cost is
13625 relative to those in `REGISTER_MOVE_COST'.
13627 If moving between registers and memory is more expensive than
13628 between two registers, you should define this macro to express the
13631 Model also increased moving costs of QImode registers in non
13635 ix86_memory_move_cost (mode, class, in)
13636 enum machine_mode mode;
13637 enum reg_class class;
13640 if (FLOAT_CLASS_P (class))
13658 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13660 if (SSE_CLASS_P (class))
13663 switch (GET_MODE_SIZE (mode))
13677 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13679 if (MMX_CLASS_P (class))
13682 switch (GET_MODE_SIZE (mode))
13693 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13695 switch (GET_MODE_SIZE (mode))
13699 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13700 : ix86_cost->movzbl_load);
13702 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13703 : ix86_cost->int_store[0] + 4);
13706 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13708 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13709 if (mode == TFmode)
13711 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13712 * (int) GET_MODE_SIZE (mode) / 4);
13716 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13718 ix86_svr3_asm_out_constructor (symbol, priority)
13720 int priority ATTRIBUTE_UNUSED;
13723 fputs ("\tpushl $", asm_out_file);
13724 assemble_name (asm_out_file, XSTR (symbol, 0));
13725 fputc ('\n', asm_out_file);
13731 static int current_machopic_label_num;
13733 /* Given a symbol name and its associated stub, write out the
13734 definition of the stub. */
13737 machopic_output_stub (file, symb, stub)
13739 const char *symb, *stub;
13741 unsigned int length;
13742 char *binder_name, *symbol_name, lazy_ptr_name[32];
13743 int label = ++current_machopic_label_num;
13745 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13746 symb = (*targetm.strip_name_encoding) (symb);
13748 length = strlen (stub);
13749 binder_name = alloca (length + 32);
13750 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13752 length = strlen (symb);
13753 symbol_name = alloca (length + 32);
13754 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13756 sprintf (lazy_ptr_name, "L%d$lz", label);
13759 machopic_picsymbol_stub_section ();
13761 machopic_symbol_stub_section ();
13763 fprintf (file, "%s:\n", stub);
13764 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13768 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13769 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13770 fprintf (file, "\tjmp %%edx\n");
13773 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13775 fprintf (file, "%s:\n", binder_name);
13779 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13780 fprintf (file, "\tpushl %%eax\n");
13783 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13785 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13787 machopic_lazy_symbol_ptr_section ();
13788 fprintf (file, "%s:\n", lazy_ptr_name);
13789 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13790 fprintf (file, "\t.long %s\n", binder_name);
13792 #endif /* TARGET_MACHO */
13794 /* Order the registers for register allocator. */
13797 x86_order_regs_for_local_alloc ()
13802 /* First allocate the local general purpose registers. */
13803 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13804 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13805 reg_alloc_order [pos++] = i;
13807 /* Global general purpose registers. */
13808 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13809 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13810 reg_alloc_order [pos++] = i;
13812 /* x87 registers come first in case we are doing FP math
13814 if (!TARGET_SSE_MATH)
13815 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13816 reg_alloc_order [pos++] = i;
13818 /* SSE registers. */
13819 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13820 reg_alloc_order [pos++] = i;
13821 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13822 reg_alloc_order [pos++] = i;
13824 /* x87 registerts. */
13825 if (TARGET_SSE_MATH)
13826 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13827 reg_alloc_order [pos++] = i;
13829 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13830 reg_alloc_order [pos++] = i;
13832 /* Initialize the rest of array as we do not allocate some registers
13834 while (pos < FIRST_PSEUDO_REGISTER)
13835 reg_alloc_order [pos++] = 0;
13839 x86_output_mi_thunk (file, delta, function)
13847 if (ix86_regparm > 0)
13848 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13851 for (; parm; parm = TREE_CHAIN (parm))
13852 if (TREE_VALUE (parm) == void_type_node)
13855 xops[0] = GEN_INT (delta);
13858 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13859 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13860 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13863 fprintf (file, "\tjmp *");
13864 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13865 fprintf (file, "@GOTPCREL(%%rip)\n");
13869 fprintf (file, "\tjmp ");
13870 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13871 fprintf (file, "\n");
13877 xops[1] = gen_rtx_REG (SImode, 0);
13878 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13879 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13881 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13882 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13886 xops[0] = pic_offset_table_rtx;
13887 xops[1] = gen_label_rtx ();
13888 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13890 if (ix86_regparm > 2)
13892 output_asm_insn ("push{l}\t%0", xops);
13893 output_asm_insn ("call\t%P1", xops);
13894 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13895 output_asm_insn ("pop{l}\t%0", xops);
13897 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13898 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13900 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13901 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13902 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13906 fprintf (file, "\tjmp ");
13907 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13908 fprintf (file, "\n");
13914 x86_field_alignment (field, computed)
13918 enum machine_mode mode;
13919 tree type = TREE_TYPE (field);
13921 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13923 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13924 ? get_inner_array_type (type) : type);
13925 if (mode == DFmode || mode == DCmode
13926 || GET_MODE_CLASS (mode) == MODE_INT
13927 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13928 return MIN (32, computed);
13932 /* Implement machine specific optimizations.
13933 At the moment we implement single transformation: AMD Athlon works faster
13934 when RET is not destination of conditional jump or directly preceeded
13935 by other jump instruction. We avoid the penalty by inserting NOP just
13936 before the RET instructions in such cases. */
13938 x86_machine_dependent_reorg (first)
13939 rtx first ATTRIBUTE_UNUSED;
13943 if (!TARGET_ATHLON || !optimize || optimize_size)
13945 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13947 basic_block bb = e->src;
13950 bool insert = false;
13952 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13954 prev = prev_nonnote_insn (ret);
13955 if (prev && GET_CODE (prev) == CODE_LABEL)
13958 for (e = bb->pred; e; e = e->pred_next)
13959 if (EDGE_FREQUENCY (e) && e->src->index > 0
13960 && !(e->flags & EDGE_FALLTHRU))
13965 prev = prev_real_insn (ret);
13966 if (prev && GET_CODE (prev) == JUMP_INSN
13967 && any_condjump_p (prev))
13971 emit_insn_before (gen_nop (), ret);
13975 #include "gt-i386.h"