1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs *ix86_cost = &pentium_cost;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
380 const int x86_single_stringop = m_386 | m_PENT4;
381 const int x86_qimode_math = ~(0);
382 const int x86_promote_qi_regs = 0;
383 const int x86_himode_math = ~(m_PPRO);
384 const int x86_promote_hi_regs = m_PPRO;
385 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
386 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
387 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
388 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
389 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
390 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
391 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
392 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_decompose_lea = m_PENT4;
396 const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
398 /* In case the avreage insn count for single function invocation is
399 lower than this constant, emit fast (but longer) prologue and
401 #define FAST_PROLOGUE_INSN_COUNT 30
402 /* Set by prologue expander and used by epilogue expander to determine
404 static int use_fast_prologue_epilogue;
406 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
408 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
409 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
410 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
412 /* Array of the smallest class containing reg number REGNO, indexed by
413 REGNO. Used by REGNO_REG_CLASS in i386.h. */
415 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
418 AREG, DREG, CREG, BREG,
420 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
422 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
423 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
426 /* flags, fpsr, dirflag, frame */
427 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
428 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
430 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
432 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
433 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
434 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
438 /* The "default" register map used in 32bit mode. */
440 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
442 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
443 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
445 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
446 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
447 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
448 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
451 static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
452 1 /*RDX*/, 2 /*RCX*/,
453 FIRST_REX_INT_REG /*R8 */,
454 FIRST_REX_INT_REG + 1 /*R9 */};
455 static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
457 /* The "default" register map used in 64bit mode. */
458 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
460 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
461 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
462 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
463 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
464 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
465 8,9,10,11,12,13,14,15, /* extended integer registers */
466 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
469 /* Define the register numbers to be used in Dwarf debugging information.
470 The SVR4 reference port C compiler uses the following register numbers
471 in its Dwarf output code:
472 0 for %eax (gcc regno = 0)
473 1 for %ecx (gcc regno = 2)
474 2 for %edx (gcc regno = 1)
475 3 for %ebx (gcc regno = 3)
476 4 for %esp (gcc regno = 7)
477 5 for %ebp (gcc regno = 6)
478 6 for %esi (gcc regno = 4)
479 7 for %edi (gcc regno = 5)
480 The following three DWARF register numbers are never generated by
481 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
482 believes these numbers have these meanings.
483 8 for %eip (no gcc equivalent)
484 9 for %eflags (gcc regno = 17)
485 10 for %trapno (no gcc equivalent)
486 It is not at all clear how we should number the FP stack registers
487 for the x86 architecture. If the version of SDB on x86/svr4 were
488 a bit less brain dead with respect to floating-point then we would
489 have a precedent to follow with respect to DWARF register numbers
490 for x86 FP registers, but the SDB on x86/svr4 is so completely
491 broken with respect to FP registers that it is hardly worth thinking
492 of it as something to strive for compatibility with.
493 The version of x86/svr4 SDB I have at the moment does (partially)
494 seem to believe that DWARF register number 11 is associated with
495 the x86 register %st(0), but that's about all. Higher DWARF
496 register numbers don't seem to be associated with anything in
497 particular, and even for DWARF regno 11, SDB only seems to under-
498 stand that it should say that a variable lives in %st(0) (when
499 asked via an `=' command) if we said it was in DWARF regno 11,
500 but SDB still prints garbage when asked for the value of the
501 variable in question (via a `/' command).
502 (Also note that the labels SDB prints for various FP stack regs
503 when doing an `x' command are all wrong.)
504 Note that these problems generally don't affect the native SVR4
505 C compiler because it doesn't allow the use of -O with -g and
506 because when it is *not* optimizing, it allocates a memory
507 location for each floating-point variable, and the memory
508 location is what gets described in the DWARF AT_location
509 attribute for the variable in question.
510 Regardless of the severe mental illness of the x86/svr4 SDB, we
511 do something sensible here and we use the following DWARF
512 register numbers. Note that these are all stack-top-relative
514 11 for %st(0) (gcc regno = 8)
515 12 for %st(1) (gcc regno = 9)
516 13 for %st(2) (gcc regno = 10)
517 14 for %st(3) (gcc regno = 11)
518 15 for %st(4) (gcc regno = 12)
519 16 for %st(5) (gcc regno = 13)
520 17 for %st(6) (gcc regno = 14)
521 18 for %st(7) (gcc regno = 15)
523 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
525 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
526 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
527 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
528 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
529 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
530 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
531 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
534 /* Test and compare insns in i386.md store the information needed to
535 generate branch and scc insns here. */
537 rtx ix86_compare_op0 = NULL_RTX;
538 rtx ix86_compare_op1 = NULL_RTX;
540 #define MAX_386_STACK_LOCALS 3
541 /* Size of the register save area. */
542 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
544 /* Define the structure for the machine field in struct function. */
545 struct machine_function
547 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
548 int save_varrargs_registers;
549 int accesses_prev_frame;
552 #define ix86_stack_locals (cfun->machine->stack_locals)
553 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
555 /* Structure describing stack frame layout.
556 Stack grows downward:
562 saved frame pointer if frame_pointer_needed
563 <- HARD_FRAME_POINTER
569 > to_allocate <- FRAME_POINTER
581 int outgoing_arguments_size;
584 HOST_WIDE_INT to_allocate;
585 /* The offsets relative to ARG_POINTER. */
586 HOST_WIDE_INT frame_pointer_offset;
587 HOST_WIDE_INT hard_frame_pointer_offset;
588 HOST_WIDE_INT stack_pointer_offset;
591 /* Used to enable/disable debugging features. */
592 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
593 /* Code model option as passed by user. */
594 const char *ix86_cmodel_string;
596 enum cmodel ix86_cmodel;
598 const char *ix86_asm_string;
599 enum asm_dialect ix86_asm_dialect = ASM_ATT;
601 /* which cpu are we scheduling for */
602 enum processor_type ix86_cpu;
604 /* which unit we are generating floating point math for */
605 enum fpmath_unit ix86_fpmath;
607 /* which instruction set architecture to use. */
610 /* Strings to hold which cpu and instruction set architecture to use. */
611 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
612 const char *ix86_arch_string; /* for -march=<xxx> */
613 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
615 /* # of registers to use to pass arguments. */
616 const char *ix86_regparm_string;
618 /* true if sse prefetch instruction is not NOOP. */
619 int x86_prefetch_sse;
621 /* ix86_regparm_string as a number */
624 /* Alignment to use for loops and jumps: */
626 /* Power of two alignment for loops. */
627 const char *ix86_align_loops_string;
629 /* Power of two alignment for non-loop jumps. */
630 const char *ix86_align_jumps_string;
632 /* Power of two alignment for stack boundary in bytes. */
633 const char *ix86_preferred_stack_boundary_string;
635 /* Preferred alignment for stack boundary in bits. */
636 int ix86_preferred_stack_boundary;
638 /* Values 1-5: see jump.c */
639 int ix86_branch_cost;
640 const char *ix86_branch_cost_string;
642 /* Power of two alignment for functions. */
643 const char *ix86_align_funcs_string;
645 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
646 static char internal_label_prefix[16];
647 static int internal_label_prefix_len;
649 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
650 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
651 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
653 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
654 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
656 static rtx gen_push PARAMS ((rtx));
657 static int memory_address_length PARAMS ((rtx addr));
658 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
659 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
660 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
661 static void ix86_dump_ppro_packet PARAMS ((FILE *));
662 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
663 static void ix86_init_machine_status PARAMS ((struct function *));
664 static void ix86_mark_machine_status PARAMS ((struct function *));
665 static void ix86_free_machine_status PARAMS ((struct function *));
666 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
667 static int ix86_nsaved_regs PARAMS ((void));
668 static void ix86_emit_save_regs PARAMS ((void));
669 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
670 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
671 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
672 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
673 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
674 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
675 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
676 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
677 static int ix86_issue_rate PARAMS ((void));
678 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
679 static void ix86_sched_init PARAMS ((FILE *, int, int));
680 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
681 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
682 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
683 static int ia32_multipass_dfa_lookahead PARAMS ((void));
684 static void ix86_init_mmx_sse_builtins PARAMS ((void));
688 rtx base, index, disp;
692 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
694 struct builtin_description;
695 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
697 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
699 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
700 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
701 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
702 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
704 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
705 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
706 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
707 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
711 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
713 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
714 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
715 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
716 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
717 static int ix86_save_reg PARAMS ((unsigned int, int));
718 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
719 static int ix86_comp_type_attributes PARAMS ((tree, tree));
720 const struct attribute_spec ix86_attribute_table[];
721 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
722 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
724 #ifdef DO_GLOBAL_CTORS_BODY
725 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
728 /* Register class used for passing given 64bit part of the argument.
729 These represent classes as documented by the PS ABI, with the exception
730 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
731 use SF or DFmode move instead of DImode to avoid reformating penalties.
733 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
734 whenever possible (upper half does contain padding).
736 enum x86_64_reg_class
739 X86_64_INTEGER_CLASS,
740 X86_64_INTEGERSI_CLASS,
749 static const char * const x86_64_reg_class_name[] =
750 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
752 #define MAX_CLASSES 4
753 static int classify_argument PARAMS ((enum machine_mode, tree,
754 enum x86_64_reg_class [MAX_CLASSES],
756 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
758 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
760 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
761 enum x86_64_reg_class));
763 /* Initialize the GCC target structure. */
764 #undef TARGET_ATTRIBUTE_TABLE
765 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
766 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
767 # undef TARGET_MERGE_DECL_ATTRIBUTES
768 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
771 #undef TARGET_COMP_TYPE_ATTRIBUTES
772 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
774 #undef TARGET_INIT_BUILTINS
775 #define TARGET_INIT_BUILTINS ix86_init_builtins
777 #undef TARGET_EXPAND_BUILTIN
778 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
780 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
781 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
783 # undef TARGET_ASM_FUNCTION_PROLOGUE
784 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
787 #undef TARGET_ASM_OPEN_PAREN
788 #define TARGET_ASM_OPEN_PAREN ""
789 #undef TARGET_ASM_CLOSE_PAREN
790 #define TARGET_ASM_CLOSE_PAREN ""
792 #undef TARGET_ASM_ALIGNED_HI_OP
793 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
794 #undef TARGET_ASM_ALIGNED_SI_OP
795 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
797 #undef TARGET_ASM_ALIGNED_DI_OP
798 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
801 #undef TARGET_ASM_UNALIGNED_HI_OP
802 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
803 #undef TARGET_ASM_UNALIGNED_SI_OP
804 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
805 #undef TARGET_ASM_UNALIGNED_DI_OP
806 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
808 #undef TARGET_SCHED_ADJUST_COST
809 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
810 #undef TARGET_SCHED_ISSUE_RATE
811 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
812 #undef TARGET_SCHED_VARIABLE_ISSUE
813 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
814 #undef TARGET_SCHED_INIT
815 #define TARGET_SCHED_INIT ix86_sched_init
816 #undef TARGET_SCHED_REORDER
817 #define TARGET_SCHED_REORDER ix86_sched_reorder
818 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
819 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
820 ia32_use_dfa_pipeline_interface
821 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
822 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
823 ia32_multipass_dfa_lookahead
825 struct gcc_target targetm = TARGET_INITIALIZER;
827 /* Sometimes certain combinations of command options do not make
828 sense on a particular target machine. You can define a macro
829 `OVERRIDE_OPTIONS' to take account of this. This macro, if
830 defined, is executed once just after all the command options have
833 Don't use this macro to turn on various extra optimizations for
834 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
840 /* Comes from final.c -- no real reason to change it. */
841 #define MAX_CODE_ALIGN 16
845 const struct processor_costs *cost; /* Processor costs */
846 const int target_enable; /* Target flags to enable. */
847 const int target_disable; /* Target flags to disable. */
848 const int align_loop; /* Default alignments. */
849 const int align_loop_max_skip;
850 const int align_jump;
851 const int align_jump_max_skip;
852 const int align_func;
853 const int branch_cost;
855 const processor_target_table[PROCESSOR_max] =
857 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
858 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
859 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
860 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
861 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
862 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
863 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
866 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
869 const char *const name; /* processor name or nickname. */
870 const enum processor_type processor;
876 PTA_PREFETCH_SSE = 8,
881 const processor_alias_table[] =
883 {"i386", PROCESSOR_I386, 0},
884 {"i486", PROCESSOR_I486, 0},
885 {"i586", PROCESSOR_PENTIUM, 0},
886 {"pentium", PROCESSOR_PENTIUM, 0},
887 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
888 {"i686", PROCESSOR_PENTIUMPRO, 0},
889 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
890 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
891 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
892 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
893 PTA_MMX | PTA_PREFETCH_SSE},
894 {"k6", PROCESSOR_K6, PTA_MMX},
895 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
896 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
897 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
899 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
900 | PTA_3DNOW | PTA_3DNOW_A},
901 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
902 | PTA_3DNOW_A | PTA_SSE},
903 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
904 | PTA_3DNOW_A | PTA_SSE},
905 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
906 | PTA_3DNOW_A | PTA_SSE},
909 int const pta_size = ARRAY_SIZE (processor_alias_table);
911 #ifdef SUBTARGET_OVERRIDE_OPTIONS
912 SUBTARGET_OVERRIDE_OPTIONS;
915 if (!ix86_cpu_string && ix86_arch_string)
916 ix86_cpu_string = ix86_arch_string;
917 if (!ix86_cpu_string)
918 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
919 if (!ix86_arch_string)
920 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
922 if (ix86_cmodel_string != 0)
924 if (!strcmp (ix86_cmodel_string, "small"))
925 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
927 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
928 else if (!strcmp (ix86_cmodel_string, "32"))
930 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
931 ix86_cmodel = CM_KERNEL;
932 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
933 ix86_cmodel = CM_MEDIUM;
934 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
935 ix86_cmodel = CM_LARGE;
937 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
943 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
945 if (ix86_asm_string != 0)
947 if (!strcmp (ix86_asm_string, "intel"))
948 ix86_asm_dialect = ASM_INTEL;
949 else if (!strcmp (ix86_asm_string, "att"))
950 ix86_asm_dialect = ASM_ATT;
952 error ("bad value (%s) for -masm= switch", ix86_asm_string);
954 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
955 error ("code model `%s' not supported in the %s bit mode",
956 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
957 if (ix86_cmodel == CM_LARGE)
958 sorry ("code model `large' not supported yet");
959 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
960 sorry ("%i-bit mode not compiled in",
961 (target_flags & MASK_64BIT) ? 64 : 32);
963 for (i = 0; i < pta_size; i++)
964 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
966 ix86_arch = processor_alias_table[i].processor;
967 /* Default cpu tuning to the architecture. */
968 ix86_cpu = ix86_arch;
969 if (processor_alias_table[i].flags & PTA_MMX
970 && !(target_flags & MASK_MMX_SET))
971 target_flags |= MASK_MMX;
972 if (processor_alias_table[i].flags & PTA_3DNOW
973 && !(target_flags & MASK_3DNOW_SET))
974 target_flags |= MASK_3DNOW;
975 if (processor_alias_table[i].flags & PTA_3DNOW_A
976 && !(target_flags & MASK_3DNOW_A_SET))
977 target_flags |= MASK_3DNOW_A;
978 if (processor_alias_table[i].flags & PTA_SSE
979 && !(target_flags & MASK_SSE_SET))
980 target_flags |= MASK_SSE;
981 if (processor_alias_table[i].flags & PTA_SSE2
982 && !(target_flags & MASK_SSE2_SET))
983 target_flags |= MASK_SSE2;
984 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
985 x86_prefetch_sse = true;
990 error ("bad value (%s) for -march= switch", ix86_arch_string);
992 for (i = 0; i < pta_size; i++)
993 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
995 ix86_cpu = processor_alias_table[i].processor;
998 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
999 x86_prefetch_sse = true;
1001 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1004 ix86_cost = &size_cost;
1006 ix86_cost = processor_target_table[ix86_cpu].cost;
1007 target_flags |= processor_target_table[ix86_cpu].target_enable;
1008 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1010 /* Arrange to set up i386_stack_locals for all functions. */
1011 init_machine_status = ix86_init_machine_status;
1012 mark_machine_status = ix86_mark_machine_status;
1013 free_machine_status = ix86_free_machine_status;
1015 /* Validate -mregparm= value. */
1016 if (ix86_regparm_string)
1018 i = atoi (ix86_regparm_string);
1019 if (i < 0 || i > REGPARM_MAX)
1020 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1026 ix86_regparm = REGPARM_MAX;
1028 /* If the user has provided any of the -malign-* options,
1029 warn and use that value only if -falign-* is not set.
1030 Remove this code in GCC 3.2 or later. */
1031 if (ix86_align_loops_string)
1033 warning ("-malign-loops is obsolete, use -falign-loops");
1034 if (align_loops == 0)
1036 i = atoi (ix86_align_loops_string);
1037 if (i < 0 || i > MAX_CODE_ALIGN)
1038 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1040 align_loops = 1 << i;
1044 if (ix86_align_jumps_string)
1046 warning ("-malign-jumps is obsolete, use -falign-jumps");
1047 if (align_jumps == 0)
1049 i = atoi (ix86_align_jumps_string);
1050 if (i < 0 || i > MAX_CODE_ALIGN)
1051 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1053 align_jumps = 1 << i;
1057 if (ix86_align_funcs_string)
1059 warning ("-malign-functions is obsolete, use -falign-functions");
1060 if (align_functions == 0)
1062 i = atoi (ix86_align_funcs_string);
1063 if (i < 0 || i > MAX_CODE_ALIGN)
1064 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1066 align_functions = 1 << i;
1070 /* Default align_* from the processor table. */
1071 if (align_loops == 0)
1073 align_loops = processor_target_table[ix86_cpu].align_loop;
1074 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1076 if (align_jumps == 0)
1078 align_jumps = processor_target_table[ix86_cpu].align_jump;
1079 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1081 if (align_functions == 0)
1083 align_functions = processor_target_table[ix86_cpu].align_func;
1086 /* Validate -mpreferred-stack-boundary= value, or provide default.
1087 The default of 128 bits is for Pentium III's SSE __m128, but we
1088 don't want additional code to keep the stack aligned when
1089 optimizing for code size. */
1090 ix86_preferred_stack_boundary = (optimize_size
1091 ? TARGET_64BIT ? 64 : 32
1093 if (ix86_preferred_stack_boundary_string)
1095 i = atoi (ix86_preferred_stack_boundary_string);
1096 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1097 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1098 TARGET_64BIT ? 3 : 2);
1100 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1103 /* Validate -mbranch-cost= value, or provide default. */
1104 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1105 if (ix86_branch_cost_string)
1107 i = atoi (ix86_branch_cost_string);
1109 error ("-mbranch-cost=%d is not between 0 and 5", i);
1111 ix86_branch_cost = i;
1114 /* Keep nonleaf frame pointers. */
1115 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1116 flag_omit_frame_pointer = 1;
1118 /* If we're doing fast math, we don't care about comparison order
1119 wrt NaNs. This lets us use a shorter comparison sequence. */
1120 if (flag_unsafe_math_optimizations)
1121 target_flags &= ~MASK_IEEE_FP;
1123 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1124 since the insns won't need emulation. */
1125 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1126 target_flags &= ~MASK_NO_FANCY_MATH_387;
1130 if (TARGET_ALIGN_DOUBLE)
1131 error ("-malign-double makes no sense in the 64bit mode");
1133 error ("-mrtd calling convention not supported in the 64bit mode");
1134 /* Enable by default the SSE and MMX builtins. */
1135 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1136 ix86_fpmath = FPMATH_SSE;
1139 ix86_fpmath = FPMATH_387;
1141 if (ix86_fpmath_string != 0)
1143 if (! strcmp (ix86_fpmath_string, "387"))
1144 ix86_fpmath = FPMATH_387;
1145 else if (! strcmp (ix86_fpmath_string, "sse"))
1149 warning ("SSE instruction set disabled, using 387 arithmetics");
1150 ix86_fpmath = FPMATH_387;
1153 ix86_fpmath = FPMATH_SSE;
1155 else if (! strcmp (ix86_fpmath_string, "387,sse")
1156 || ! strcmp (ix86_fpmath_string, "sse,387"))
1160 warning ("SSE instruction set disabled, using 387 arithmetics");
1161 ix86_fpmath = FPMATH_387;
1163 else if (!TARGET_80387)
1165 warning ("387 instruction set disabled, using SSE arithmetics");
1166 ix86_fpmath = FPMATH_SSE;
1169 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1172 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1175 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1179 target_flags |= MASK_MMX;
1180 x86_prefetch_sse = true;
1183 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1186 target_flags |= MASK_MMX;
1187 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1188 extensions it adds. */
1189 if (x86_3dnow_a & (1 << ix86_arch))
1190 target_flags |= MASK_3DNOW_A;
1192 if ((x86_accumulate_outgoing_args & CPUMASK)
1193 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1195 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1197 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1200 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1201 p = strchr (internal_label_prefix, 'X');
1202 internal_label_prefix_len = p - internal_label_prefix;
1208 optimization_options (level, size)
1210 int size ATTRIBUTE_UNUSED;
1212 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1213 make the problem with not enough registers even worse. */
1214 #ifdef INSN_SCHEDULING
1216 flag_schedule_insns = 0;
1218 if (TARGET_64BIT && optimize >= 1)
1219 flag_omit_frame_pointer = 1;
1222 flag_pcc_struct_return = 0;
1223 flag_asynchronous_unwind_tables = 1;
1227 /* Table of valid machine attributes. */
1228 const struct attribute_spec ix86_attribute_table[] =
1230 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1231 /* Stdcall attribute says callee is responsible for popping arguments
1232 if they are not variable. */
1233 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1234 /* Cdecl attribute says the callee is a normal C declaration */
1235 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1236 /* Regparm attribute specifies how many integer arguments are to be
1237 passed in registers. */
1238 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1239 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1240 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1241 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1242 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1244 { NULL, 0, 0, false, false, false, NULL }
1247 /* Handle a "cdecl" or "stdcall" attribute;
1248 arguments as in struct attribute_spec.handler. */
1250 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1253 tree args ATTRIBUTE_UNUSED;
1254 int flags ATTRIBUTE_UNUSED;
1257 if (TREE_CODE (*node) != FUNCTION_TYPE
1258 && TREE_CODE (*node) != METHOD_TYPE
1259 && TREE_CODE (*node) != FIELD_DECL
1260 && TREE_CODE (*node) != TYPE_DECL)
1262 warning ("`%s' attribute only applies to functions",
1263 IDENTIFIER_POINTER (name));
1264 *no_add_attrs = true;
1269 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1270 *no_add_attrs = true;
1276 /* Handle a "regparm" attribute;
1277 arguments as in struct attribute_spec.handler. */
1279 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1283 int flags ATTRIBUTE_UNUSED;
1286 if (TREE_CODE (*node) != FUNCTION_TYPE
1287 && TREE_CODE (*node) != METHOD_TYPE
1288 && TREE_CODE (*node) != FIELD_DECL
1289 && TREE_CODE (*node) != TYPE_DECL)
1291 warning ("`%s' attribute only applies to functions",
1292 IDENTIFIER_POINTER (name));
1293 *no_add_attrs = true;
1299 cst = TREE_VALUE (args);
1300 if (TREE_CODE (cst) != INTEGER_CST)
1302 warning ("`%s' attribute requires an integer constant argument",
1303 IDENTIFIER_POINTER (name));
1304 *no_add_attrs = true;
1306 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1308 warning ("argument to `%s' attribute larger than %d",
1309 IDENTIFIER_POINTER (name), REGPARM_MAX);
1310 *no_add_attrs = true;
1317 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1319 /* Generate the assembly code for function entry. FILE is a stdio
1320 stream to output the code to. SIZE is an int: how many units of
1321 temporary storage to allocate.
1323 Refer to the array `regs_ever_live' to determine which registers to
1324 save; `regs_ever_live[I]' is nonzero if register number I is ever
1325 used in the function. This function is responsible for knowing
1326 which registers should not be saved even if used.
1328 We override it here to allow for the new profiling code to go before
1329 the prologue and the old mcount code to go after the prologue (and
1330 after %ebx has been set up for ELF shared library support). */
1333 ix86_osf_output_function_prologue (file, size)
1337 const char *prefix = "";
1338 const char *const lprefix = LPREFIX;
1339 int labelno = current_function_profile_label_no;
1343 if (TARGET_UNDERSCORES)
1346 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1348 if (!flag_pic && !HALF_PIC_P ())
1350 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1351 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1354 else if (HALF_PIC_P ())
1358 HALF_PIC_EXTERNAL ("_mcount_ptr");
1359 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1362 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1363 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1365 fprintf (file, "\tcall *(%%eax)\n");
1370 static int call_no = 0;
1372 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1373 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1374 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1375 lprefix, call_no++);
1376 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1378 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1380 fprintf (file, "\tcall *(%%eax)\n");
1386 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1390 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1391 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1396 static int call_no = 0;
1398 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1399 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1400 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1401 lprefix, call_no++);
1402 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1404 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1406 fprintf (file, "\tcall *(%%eax)\n");
1409 #endif /* !OSF_OS */
1411 function_prologue (file, size);
1414 #endif /* OSF_OS || TARGET_OSF1ELF */
1416 /* Return 0 if the attributes for two types are incompatible, 1 if they
1417 are compatible, and 2 if they are nearly compatible (which causes a
1418 warning to be generated). */
1421 ix86_comp_type_attributes (type1, type2)
1425 /* Check for mismatch of non-default calling convention. */
1426 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1428 if (TREE_CODE (type1) != FUNCTION_TYPE)
1431 /* Check for mismatched return types (cdecl vs stdcall). */
1432 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1433 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1438 /* Value is the number of bytes of arguments automatically
1439 popped when returning from a subroutine call.
1440 FUNDECL is the declaration node of the function (as a tree),
1441 FUNTYPE is the data type of the function (as a tree),
1442 or for a library call it is an identifier node for the subroutine name.
1443 SIZE is the number of bytes of arguments passed on the stack.
1445 On the 80386, the RTD insn may be used to pop them if the number
1446 of args is fixed, but if the number is variable then the caller
1447 must pop them all. RTD can't be used for library calls now
1448 because the library is compiled with the Unix compiler.
1449 Use of RTD is a selectable option, since it is incompatible with
1450 standard Unix calling sequences. If the option is not selected,
1451 the caller must always pop the args.
1453 The attribute stdcall is equivalent to RTD on a per module basis. */
1456 ix86_return_pops_args (fundecl, funtype, size)
1461 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1463 /* Cdecl functions override -mrtd, and never pop the stack. */
1464 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1466 /* Stdcall functions will pop the stack if not variable args. */
1467 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1471 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1472 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1473 == void_type_node)))
1477 /* Lose any fake structure return argument if it is passed on the stack. */
1478 if (aggregate_value_p (TREE_TYPE (funtype))
1481 int nregs = ix86_regparm;
1485 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1488 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1492 return GET_MODE_SIZE (Pmode);
1498 /* Argument support functions. */
1500 /* Return true when register may be used to pass function parameters. */
1502 ix86_function_arg_regno_p (regno)
1507 return (regno < REGPARM_MAX
1508 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1509 if (SSE_REGNO_P (regno) && TARGET_SSE)
1511 /* RAX is used as hidden argument to va_arg functions. */
1514 for (i = 0; i < REGPARM_MAX; i++)
1515 if (regno == x86_64_int_parameter_registers[i])
1520 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1521 for a call to a function whose data type is FNTYPE.
1522 For a library call, FNTYPE is 0. */
1525 init_cumulative_args (cum, fntype, libname)
1526 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1527 tree fntype; /* tree ptr for function decl */
1528 rtx libname; /* SYMBOL_REF of library name or 0 */
1530 static CUMULATIVE_ARGS zero_cum;
1531 tree param, next_param;
1533 if (TARGET_DEBUG_ARG)
1535 fprintf (stderr, "\ninit_cumulative_args (");
1537 fprintf (stderr, "fntype code = %s, ret code = %s",
1538 tree_code_name[(int) TREE_CODE (fntype)],
1539 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1541 fprintf (stderr, "no fntype");
1544 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1549 /* Set up the number of registers to use for passing arguments. */
1550 cum->nregs = ix86_regparm;
1551 cum->sse_nregs = SSE_REGPARM_MAX;
1552 if (fntype && !TARGET_64BIT)
1554 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1557 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1559 cum->maybe_vaarg = false;
1561 /* Determine if this function has variable arguments. This is
1562 indicated by the last argument being 'void_type_mode' if there
1563 are no variable arguments. If there are variable arguments, then
1564 we won't pass anything in registers */
1568 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1569 param != 0; param = next_param)
1571 next_param = TREE_CHAIN (param);
1572 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1576 cum->maybe_vaarg = true;
1580 if ((!fntype && !libname)
1581 || (fntype && !TYPE_ARG_TYPES (fntype)))
1582 cum->maybe_vaarg = 1;
1584 if (TARGET_DEBUG_ARG)
1585 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1590 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1591 of this code is to classify each 8bytes of incoming argument by the register
1592 class and assign registers accordingly. */
1594 /* Return the union class of CLASS1 and CLASS2.
1595 See the x86-64 PS ABI for details. */
1597 static enum x86_64_reg_class
1598 merge_classes (class1, class2)
1599 enum x86_64_reg_class class1, class2;
1601 /* Rule #1: If both classes are equal, this is the resulting class. */
1602 if (class1 == class2)
1605 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1607 if (class1 == X86_64_NO_CLASS)
1609 if (class2 == X86_64_NO_CLASS)
1612 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1613 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1614 return X86_64_MEMORY_CLASS;
1616 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1617 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1618 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1619 return X86_64_INTEGERSI_CLASS;
1620 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1621 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1622 return X86_64_INTEGER_CLASS;
1624 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1625 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1626 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1627 return X86_64_MEMORY_CLASS;
1629 /* Rule #6: Otherwise class SSE is used. */
1630 return X86_64_SSE_CLASS;
1633 /* Classify the argument of type TYPE and mode MODE.
1634 CLASSES will be filled by the register class used to pass each word
1635 of the operand. The number of words is returned. In case the parameter
1636 should be passed in memory, 0 is returned. As a special case for zero
1637 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1639 BIT_OFFSET is used internally for handling records and specifies offset
1640 of the offset in bits modulo 256 to avoid overflow cases.
1642 See the x86-64 PS ABI for details.
1646 classify_argument (mode, type, classes, bit_offset)
1647 enum machine_mode mode;
1649 enum x86_64_reg_class classes[MAX_CLASSES];
1653 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1654 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1656 if (type && AGGREGATE_TYPE_P (type))
1660 enum x86_64_reg_class subclasses[MAX_CLASSES];
1662 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1666 for (i = 0; i < words; i++)
1667 classes[i] = X86_64_NO_CLASS;
1669 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1670 signalize memory class, so handle it as special case. */
1673 classes[0] = X86_64_NO_CLASS;
1677 /* Classify each field of record and merge classes. */
1678 if (TREE_CODE (type) == RECORD_TYPE)
1680 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1682 if (TREE_CODE (field) == FIELD_DECL)
1686 /* Bitfields are always classified as integer. Handle them
1687 early, since later code would consider them to be
1688 misaligned integers. */
1689 if (DECL_BIT_FIELD (field))
1691 for (i = int_bit_position (field) / 8 / 8;
1692 i < (int_bit_position (field)
1693 + tree_low_cst (DECL_SIZE (field), 0)
1696 merge_classes (X86_64_INTEGER_CLASS,
1701 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1702 TREE_TYPE (field), subclasses,
1703 (int_bit_position (field)
1704 + bit_offset) % 256);
1707 for (i = 0; i < num; i++)
1710 (int_bit_position (field) + bit_offset) / 8 / 8;
1712 merge_classes (subclasses[i], classes[i + pos]);
1718 /* Arrays are handled as small records. */
1719 else if (TREE_CODE (type) == ARRAY_TYPE)
1722 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1723 TREE_TYPE (type), subclasses, bit_offset);
1727 /* The partial classes are now full classes. */
1728 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1729 subclasses[0] = X86_64_SSE_CLASS;
1730 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1731 subclasses[0] = X86_64_INTEGER_CLASS;
1733 for (i = 0; i < words; i++)
1734 classes[i] = subclasses[i % num];
1736 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1737 else if (TREE_CODE (type) == UNION_TYPE
1738 || TREE_CODE (type) == QUAL_UNION_TYPE)
1740 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1742 if (TREE_CODE (field) == FIELD_DECL)
1745 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1746 TREE_TYPE (field), subclasses,
1750 for (i = 0; i < num; i++)
1751 classes[i] = merge_classes (subclasses[i], classes[i]);
1758 /* Final merger cleanup. */
1759 for (i = 0; i < words; i++)
1761 /* If one class is MEMORY, everything should be passed in
1763 if (classes[i] == X86_64_MEMORY_CLASS)
1766 /* The X86_64_SSEUP_CLASS should be always preceded by
1767 X86_64_SSE_CLASS. */
1768 if (classes[i] == X86_64_SSEUP_CLASS
1769 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1770 classes[i] = X86_64_SSE_CLASS;
1772 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1773 if (classes[i] == X86_64_X87UP_CLASS
1774 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1775 classes[i] = X86_64_SSE_CLASS;
1780 /* Compute alignment needed. We align all types to natural boundaries with
1781 exception of XFmode that is aligned to 64bits. */
1782 if (mode != VOIDmode && mode != BLKmode)
1784 int mode_alignment = GET_MODE_BITSIZE (mode);
1787 mode_alignment = 128;
1788 else if (mode == XCmode)
1789 mode_alignment = 256;
1790 /* Misaligned fields are always returned in memory. */
1791 if (bit_offset % mode_alignment)
1795 /* Classification of atomic types. */
1805 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1806 classes[0] = X86_64_INTEGERSI_CLASS;
1808 classes[0] = X86_64_INTEGER_CLASS;
1812 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1815 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1816 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1819 if (!(bit_offset % 64))
1820 classes[0] = X86_64_SSESF_CLASS;
1822 classes[0] = X86_64_SSE_CLASS;
1825 classes[0] = X86_64_SSEDF_CLASS;
1828 classes[0] = X86_64_X87_CLASS;
1829 classes[1] = X86_64_X87UP_CLASS;
1832 classes[0] = X86_64_X87_CLASS;
1833 classes[1] = X86_64_X87UP_CLASS;
1834 classes[2] = X86_64_X87_CLASS;
1835 classes[3] = X86_64_X87UP_CLASS;
1838 classes[0] = X86_64_SSEDF_CLASS;
1839 classes[1] = X86_64_SSEDF_CLASS;
1842 classes[0] = X86_64_SSE_CLASS;
1846 classes[0] = X86_64_SSE_CLASS;
1847 classes[1] = X86_64_SSEUP_CLASS;
1853 classes[0] = X86_64_SSE_CLASS;
1863 /* Examine the argument and return set number of register required in each
1864 class. Return 0 iff parameter should be passed in memory. */
1866 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1867 enum machine_mode mode;
1869 int *int_nregs, *sse_nregs;
1872 enum x86_64_reg_class class[MAX_CLASSES];
1873 int n = classify_argument (mode, type, class, 0);
1879 for (n--; n >= 0; n--)
1882 case X86_64_INTEGER_CLASS:
1883 case X86_64_INTEGERSI_CLASS:
1886 case X86_64_SSE_CLASS:
1887 case X86_64_SSESF_CLASS:
1888 case X86_64_SSEDF_CLASS:
1891 case X86_64_NO_CLASS:
1892 case X86_64_SSEUP_CLASS:
1894 case X86_64_X87_CLASS:
1895 case X86_64_X87UP_CLASS:
1899 case X86_64_MEMORY_CLASS:
1904 /* Construct container for the argument used by GCC interface. See
1905 FUNCTION_ARG for the detailed description. */
1907 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1908 enum machine_mode mode;
1911 int nintregs, nsseregs;
1915 enum machine_mode tmpmode;
1917 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1918 enum x86_64_reg_class class[MAX_CLASSES];
1922 int needed_sseregs, needed_intregs;
1923 rtx exp[MAX_CLASSES];
1926 n = classify_argument (mode, type, class, 0);
1927 if (TARGET_DEBUG_ARG)
1930 fprintf (stderr, "Memory class\n");
1933 fprintf (stderr, "Classes:");
1934 for (i = 0; i < n; i++)
1936 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1938 fprintf (stderr, "\n");
1943 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1945 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1948 /* First construct simple cases. Avoid SCmode, since we want to use
1949 single register to pass this type. */
1950 if (n == 1 && mode != SCmode)
1953 case X86_64_INTEGER_CLASS:
1954 case X86_64_INTEGERSI_CLASS:
1955 return gen_rtx_REG (mode, intreg[0]);
1956 case X86_64_SSE_CLASS:
1957 case X86_64_SSESF_CLASS:
1958 case X86_64_SSEDF_CLASS:
1959 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1960 case X86_64_X87_CLASS:
1961 return gen_rtx_REG (mode, FIRST_STACK_REG);
1962 case X86_64_NO_CLASS:
1963 /* Zero sized array, struct or class. */
1968 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1969 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1971 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1972 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1973 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1974 && class[1] == X86_64_INTEGER_CLASS
1975 && (mode == CDImode || mode == TImode)
1976 && intreg[0] + 1 == intreg[1])
1977 return gen_rtx_REG (mode, intreg[0]);
1979 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1980 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1981 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1983 /* Otherwise figure out the entries of the PARALLEL. */
1984 for (i = 0; i < n; i++)
1988 case X86_64_NO_CLASS:
1990 case X86_64_INTEGER_CLASS:
1991 case X86_64_INTEGERSI_CLASS:
1992 /* Merge TImodes on aligned occassions here too. */
1993 if (i * 8 + 8 > bytes)
1994 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1995 else if (class[i] == X86_64_INTEGERSI_CLASS)
1999 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2000 if (tmpmode == BLKmode)
2002 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2003 gen_rtx_REG (tmpmode, *intreg),
2007 case X86_64_SSESF_CLASS:
2008 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2009 gen_rtx_REG (SFmode,
2010 SSE_REGNO (sse_regno)),
2014 case X86_64_SSEDF_CLASS:
2015 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2016 gen_rtx_REG (DFmode,
2017 SSE_REGNO (sse_regno)),
2021 case X86_64_SSE_CLASS:
2022 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2023 tmpmode = TImode, i++;
2026 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2027 gen_rtx_REG (tmpmode,
2028 SSE_REGNO (sse_regno)),
2036 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2037 for (i = 0; i < nexps; i++)
2038 XVECEXP (ret, 0, i) = exp [i];
2042 /* Update the data in CUM to advance over an argument
2043 of mode MODE and data type TYPE.
2044 (TYPE is null for libcalls where that information may not be available.) */
2047 function_arg_advance (cum, mode, type, named)
2048 CUMULATIVE_ARGS *cum; /* current arg information */
2049 enum machine_mode mode; /* current arg mode */
2050 tree type; /* type of the argument or 0 if lib support */
2051 int named; /* whether or not the argument was named */
2054 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2055 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2057 if (TARGET_DEBUG_ARG)
2059 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2060 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2063 int int_nregs, sse_nregs;
2064 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2065 cum->words += words;
2066 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2068 cum->nregs -= int_nregs;
2069 cum->sse_nregs -= sse_nregs;
2070 cum->regno += int_nregs;
2071 cum->sse_regno += sse_nregs;
2074 cum->words += words;
2078 if (TARGET_SSE && mode == TImode)
2080 cum->sse_words += words;
2081 cum->sse_nregs -= 1;
2082 cum->sse_regno += 1;
2083 if (cum->sse_nregs <= 0)
2091 cum->words += words;
2092 cum->nregs -= words;
2093 cum->regno += words;
2095 if (cum->nregs <= 0)
2105 /* Define where to put the arguments to a function.
2106 Value is zero to push the argument on the stack,
2107 or a hard register in which to store the argument.
2109 MODE is the argument's machine mode.
2110 TYPE is the data type of the argument (as a tree).
2111 This is null for libcalls where that information may
2113 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2114 the preceding args and about the function being called.
2115 NAMED is nonzero if this argument is a named parameter
2116 (otherwise it is an extra parameter matching an ellipsis). */
2119 function_arg (cum, mode, type, named)
2120 CUMULATIVE_ARGS *cum; /* current arg information */
2121 enum machine_mode mode; /* current arg mode */
2122 tree type; /* type of the argument or 0 if lib support */
2123 int named; /* != 0 for normal args, == 0 for ... args */
2127 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2128 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2130 /* Handle an hidden AL argument containing number of registers for varargs
2131 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2133 if (mode == VOIDmode)
2136 return GEN_INT (cum->maybe_vaarg
2137 ? (cum->sse_nregs < 0
2145 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2146 &x86_64_int_parameter_registers [cum->regno],
2151 /* For now, pass fp/complex values on the stack. */
2160 if (words <= cum->nregs)
2161 ret = gen_rtx_REG (mode, cum->regno);
2165 ret = gen_rtx_REG (mode, cum->sse_regno);
2169 if (TARGET_DEBUG_ARG)
2172 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2173 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2176 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2178 fprintf (stderr, ", stack");
2180 fprintf (stderr, " )\n");
2186 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2190 ix86_function_arg_boundary (mode, type)
2191 enum machine_mode mode;
2196 return PARM_BOUNDARY;
2198 align = TYPE_ALIGN (type);
2200 align = GET_MODE_ALIGNMENT (mode);
2201 if (align < PARM_BOUNDARY)
2202 align = PARM_BOUNDARY;
2208 /* Return true if N is a possible register number of function value. */
2210 ix86_function_value_regno_p (regno)
2215 return ((regno) == 0
2216 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2217 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2219 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2220 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2221 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2224 /* Define how to find the value returned by a function.
2225 VALTYPE is the data type of the value (as a tree).
2226 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2227 otherwise, FUNC is 0. */
2229 ix86_function_value (valtype)
2234 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2235 REGPARM_MAX, SSE_REGPARM_MAX,
2236 x86_64_int_return_registers, 0);
2237 /* For zero sized structures, construct_continer return NULL, but we need
2238 to keep rest of compiler happy by returning meaningfull value. */
2240 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2244 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2247 /* Return false iff type is returned in memory. */
2249 ix86_return_in_memory (type)
2252 int needed_intregs, needed_sseregs;
2255 return !examine_argument (TYPE_MODE (type), type, 1,
2256 &needed_intregs, &needed_sseregs);
2260 if (TYPE_MODE (type) == BLKmode
2261 || (VECTOR_MODE_P (TYPE_MODE (type))
2262 && int_size_in_bytes (type) == 8)
2263 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2264 && TYPE_MODE (type) != TFmode
2265 && !VECTOR_MODE_P (TYPE_MODE (type))))
2271 /* Define how to find the value returned by a library function
2272 assuming the value has mode MODE. */
2274 ix86_libcall_value (mode)
2275 enum machine_mode mode;
2285 return gen_rtx_REG (mode, FIRST_SSE_REG);
2288 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2290 return gen_rtx_REG (mode, 0);
2294 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2297 /* Create the va_list data type. */
2300 ix86_build_va_list ()
2302 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2304 /* For i386 we use plain pointer to argument area. */
2306 return build_pointer_type (char_type_node);
2308 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2309 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2311 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2312 unsigned_type_node);
2313 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2314 unsigned_type_node);
2315 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2317 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2320 DECL_FIELD_CONTEXT (f_gpr) = record;
2321 DECL_FIELD_CONTEXT (f_fpr) = record;
2322 DECL_FIELD_CONTEXT (f_ovf) = record;
2323 DECL_FIELD_CONTEXT (f_sav) = record;
2325 TREE_CHAIN (record) = type_decl;
2326 TYPE_NAME (record) = type_decl;
2327 TYPE_FIELDS (record) = f_gpr;
2328 TREE_CHAIN (f_gpr) = f_fpr;
2329 TREE_CHAIN (f_fpr) = f_ovf;
2330 TREE_CHAIN (f_ovf) = f_sav;
2332 layout_type (record);
2334 /* The correct type is an array type of one element. */
2335 return build_array_type (record, build_index_type (size_zero_node));
2338 /* Perform any needed actions needed for a function that is receiving a
2339 variable number of arguments.
2343 MODE and TYPE are the mode and type of the current parameter.
2345 PRETEND_SIZE is a variable that should be set to the amount of stack
2346 that must be pushed by the prolog to pretend that our caller pushed
2349 Normally, this macro will push all remaining incoming registers on the
2350 stack and set PRETEND_SIZE to the length of the registers pushed. */
2353 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2354 CUMULATIVE_ARGS *cum;
2355 enum machine_mode mode;
2357 int *pretend_size ATTRIBUTE_UNUSED;
2361 CUMULATIVE_ARGS next_cum;
2362 rtx save_area = NULL_RTX, mem;
2375 /* Indicate to allocate space on the stack for varargs save area. */
2376 ix86_save_varrargs_registers = 1;
2378 fntype = TREE_TYPE (current_function_decl);
2379 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2380 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2381 != void_type_node));
2383 /* For varargs, we do not want to skip the dummy va_dcl argument.
2384 For stdargs, we do want to skip the last named argument. */
2387 function_arg_advance (&next_cum, mode, type, 1);
2390 save_area = frame_pointer_rtx;
2392 set = get_varargs_alias_set ();
2394 for (i = next_cum.regno; i < ix86_regparm; i++)
2396 mem = gen_rtx_MEM (Pmode,
2397 plus_constant (save_area, i * UNITS_PER_WORD));
2398 set_mem_alias_set (mem, set);
2399 emit_move_insn (mem, gen_rtx_REG (Pmode,
2400 x86_64_int_parameter_registers[i]));
2403 if (next_cum.sse_nregs)
2405 /* Now emit code to save SSE registers. The AX parameter contains number
2406 of SSE parameter regsiters used to call this function. We use
2407 sse_prologue_save insn template that produces computed jump across
2408 SSE saves. We need some preparation work to get this working. */
2410 label = gen_label_rtx ();
2411 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2413 /* Compute address to jump to :
2414 label - 5*eax + nnamed_sse_arguments*5 */
2415 tmp_reg = gen_reg_rtx (Pmode);
2416 nsse_reg = gen_reg_rtx (Pmode);
2417 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2418 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2419 gen_rtx_MULT (Pmode, nsse_reg,
2421 if (next_cum.sse_regno)
2424 gen_rtx_CONST (DImode,
2425 gen_rtx_PLUS (DImode,
2427 GEN_INT (next_cum.sse_regno * 4))));
2429 emit_move_insn (nsse_reg, label_ref);
2430 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2432 /* Compute address of memory block we save into. We always use pointer
2433 pointing 127 bytes after first byte to store - this is needed to keep
2434 instruction size limited by 4 bytes. */
2435 tmp_reg = gen_reg_rtx (Pmode);
2436 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2437 plus_constant (save_area,
2438 8 * REGPARM_MAX + 127)));
2439 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2440 set_mem_alias_set (mem, set);
2441 set_mem_align (mem, BITS_PER_WORD);
2443 /* And finally do the dirty job! */
2444 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2445 GEN_INT (next_cum.sse_regno), label));
2450 /* Implement va_start. */
2453 ix86_va_start (stdarg_p, valist, nextarg)
2458 HOST_WIDE_INT words, n_gpr, n_fpr;
2459 tree f_gpr, f_fpr, f_ovf, f_sav;
2460 tree gpr, fpr, ovf, sav, t;
2462 /* Only 64bit target needs something special. */
2465 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2469 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2470 f_fpr = TREE_CHAIN (f_gpr);
2471 f_ovf = TREE_CHAIN (f_fpr);
2472 f_sav = TREE_CHAIN (f_ovf);
2474 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2475 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2476 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2477 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2478 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2480 /* Count number of gp and fp argument registers used. */
2481 words = current_function_args_info.words;
2482 n_gpr = current_function_args_info.regno;
2483 n_fpr = current_function_args_info.sse_regno;
2485 if (TARGET_DEBUG_ARG)
2486 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2487 (int) words, (int) n_gpr, (int) n_fpr);
2489 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2490 build_int_2 (n_gpr * 8, 0));
2491 TREE_SIDE_EFFECTS (t) = 1;
2492 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2494 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2495 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2496 TREE_SIDE_EFFECTS (t) = 1;
2497 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2499 /* Find the overflow area. */
2500 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2502 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2503 build_int_2 (words * UNITS_PER_WORD, 0));
2504 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2505 TREE_SIDE_EFFECTS (t) = 1;
2506 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2508 /* Find the register save area.
2509 Prologue of the function save it right above stack frame. */
2510 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2511 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2512 TREE_SIDE_EFFECTS (t) = 1;
2513 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2516 /* Implement va_arg. */
2518 ix86_va_arg (valist, type)
2521 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2522 tree f_gpr, f_fpr, f_ovf, f_sav;
2523 tree gpr, fpr, ovf, sav, t;
2525 rtx lab_false, lab_over = NULL_RTX;
2529 /* Only 64bit target needs something special. */
2532 return std_expand_builtin_va_arg (valist, type);
2535 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2536 f_fpr = TREE_CHAIN (f_gpr);
2537 f_ovf = TREE_CHAIN (f_fpr);
2538 f_sav = TREE_CHAIN (f_ovf);
2540 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2541 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2542 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2543 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2544 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2546 size = int_size_in_bytes (type);
2547 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2549 container = construct_container (TYPE_MODE (type), type, 0,
2550 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2552 * Pull the value out of the saved registers ...
2555 addr_rtx = gen_reg_rtx (Pmode);
2559 rtx int_addr_rtx, sse_addr_rtx;
2560 int needed_intregs, needed_sseregs;
2563 lab_over = gen_label_rtx ();
2564 lab_false = gen_label_rtx ();
2566 examine_argument (TYPE_MODE (type), type, 0,
2567 &needed_intregs, &needed_sseregs);
2570 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2571 || TYPE_ALIGN (type) > 128);
2573 /* In case we are passing structure, verify that it is consetuctive block
2574 on the register save area. If not we need to do moves. */
2575 if (!need_temp && !REG_P (container))
2577 /* Verify that all registers are strictly consetuctive */
2578 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2582 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2584 rtx slot = XVECEXP (container, 0, i);
2585 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2586 || INTVAL (XEXP (slot, 1)) != i * 16)
2594 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2596 rtx slot = XVECEXP (container, 0, i);
2597 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2598 || INTVAL (XEXP (slot, 1)) != i * 8)
2605 int_addr_rtx = addr_rtx;
2606 sse_addr_rtx = addr_rtx;
2610 int_addr_rtx = gen_reg_rtx (Pmode);
2611 sse_addr_rtx = gen_reg_rtx (Pmode);
2613 /* First ensure that we fit completely in registers. */
2616 emit_cmp_and_jump_insns (expand_expr
2617 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2618 GEN_INT ((REGPARM_MAX - needed_intregs +
2619 1) * 8), GE, const1_rtx, SImode,
2624 emit_cmp_and_jump_insns (expand_expr
2625 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2626 GEN_INT ((SSE_REGPARM_MAX -
2627 needed_sseregs + 1) * 16 +
2628 REGPARM_MAX * 8), GE, const1_rtx,
2629 SImode, 1, lab_false);
2632 /* Compute index to start of area used for integer regs. */
2635 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2636 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2637 if (r != int_addr_rtx)
2638 emit_move_insn (int_addr_rtx, r);
2642 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2643 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2644 if (r != sse_addr_rtx)
2645 emit_move_insn (sse_addr_rtx, r);
2652 /* Never use the memory itself, as it has the alias set. */
2653 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2654 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2655 set_mem_alias_set (mem, get_varargs_alias_set ());
2656 set_mem_align (mem, BITS_PER_UNIT);
2658 for (i = 0; i < XVECLEN (container, 0); i++)
2660 rtx slot = XVECEXP (container, 0, i);
2661 rtx reg = XEXP (slot, 0);
2662 enum machine_mode mode = GET_MODE (reg);
2668 if (SSE_REGNO_P (REGNO (reg)))
2670 src_addr = sse_addr_rtx;
2671 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2675 src_addr = int_addr_rtx;
2676 src_offset = REGNO (reg) * 8;
2678 src_mem = gen_rtx_MEM (mode, src_addr);
2679 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2680 src_mem = adjust_address (src_mem, mode, src_offset);
2681 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2682 emit_move_insn (dest_mem, src_mem);
2689 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2690 build_int_2 (needed_intregs * 8, 0));
2691 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2692 TREE_SIDE_EFFECTS (t) = 1;
2693 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2698 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2699 build_int_2 (needed_sseregs * 16, 0));
2700 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2701 TREE_SIDE_EFFECTS (t) = 1;
2702 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2705 emit_jump_insn (gen_jump (lab_over));
2707 emit_label (lab_false);
2710 /* ... otherwise out of the overflow area. */
2712 /* Care for on-stack alignment if needed. */
2713 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2717 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2718 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2719 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2723 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2725 emit_move_insn (addr_rtx, r);
2728 build (PLUS_EXPR, TREE_TYPE (t), t,
2729 build_int_2 (rsize * UNITS_PER_WORD, 0));
2730 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2731 TREE_SIDE_EFFECTS (t) = 1;
2732 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2735 emit_label (lab_over);
2740 /* Return nonzero if OP is general operand representable on x86_64. */
2743 x86_64_general_operand (op, mode)
2745 enum machine_mode mode;
2748 return general_operand (op, mode);
2749 if (nonimmediate_operand (op, mode))
2751 return x86_64_sign_extended_value (op);
2754 /* Return nonzero if OP is general operand representable on x86_64
2755 as either sign extended or zero extended constant. */
2758 x86_64_szext_general_operand (op, mode)
2760 enum machine_mode mode;
2763 return general_operand (op, mode);
2764 if (nonimmediate_operand (op, mode))
2766 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2769 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2772 x86_64_nonmemory_operand (op, mode)
2774 enum machine_mode mode;
2777 return nonmemory_operand (op, mode);
2778 if (register_operand (op, mode))
2780 return x86_64_sign_extended_value (op);
2783 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2786 x86_64_movabs_operand (op, mode)
2788 enum machine_mode mode;
2790 if (!TARGET_64BIT || !flag_pic)
2791 return nonmemory_operand (op, mode);
2792 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2794 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2799 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2802 x86_64_szext_nonmemory_operand (op, mode)
2804 enum machine_mode mode;
2807 return nonmemory_operand (op, mode);
2808 if (register_operand (op, mode))
2810 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2813 /* Return nonzero if OP is immediate operand representable on x86_64. */
2816 x86_64_immediate_operand (op, mode)
2818 enum machine_mode mode;
2821 return immediate_operand (op, mode);
2822 return x86_64_sign_extended_value (op);
2825 /* Return nonzero if OP is immediate operand representable on x86_64. */
2828 x86_64_zext_immediate_operand (op, mode)
2830 enum machine_mode mode ATTRIBUTE_UNUSED;
2832 return x86_64_zero_extended_value (op);
2835 /* Return nonzero if OP is (const_int 1), else return zero. */
2838 const_int_1_operand (op, mode)
2840 enum machine_mode mode ATTRIBUTE_UNUSED;
2842 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2845 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2846 reference and a constant. */
2849 symbolic_operand (op, mode)
2851 enum machine_mode mode ATTRIBUTE_UNUSED;
2853 switch (GET_CODE (op))
2861 if (GET_CODE (op) == SYMBOL_REF
2862 || GET_CODE (op) == LABEL_REF
2863 || (GET_CODE (op) == UNSPEC
2864 && (XINT (op, 1) == UNSPEC_GOT
2865 || XINT (op, 1) == UNSPEC_GOTOFF
2866 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2868 if (GET_CODE (op) != PLUS
2869 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2873 if (GET_CODE (op) == SYMBOL_REF
2874 || GET_CODE (op) == LABEL_REF)
2876 /* Only @GOTOFF gets offsets. */
2877 if (GET_CODE (op) != UNSPEC
2878 || XINT (op, 1) != UNSPEC_GOTOFF)
2881 op = XVECEXP (op, 0, 0);
2882 if (GET_CODE (op) == SYMBOL_REF
2883 || GET_CODE (op) == LABEL_REF)
2892 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2895 pic_symbolic_operand (op, mode)
2897 enum machine_mode mode ATTRIBUTE_UNUSED;
2899 if (GET_CODE (op) != CONST)
2904 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2909 if (GET_CODE (op) == UNSPEC)
2911 if (GET_CODE (op) != PLUS
2912 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2915 if (GET_CODE (op) == UNSPEC)
2921 /* Return true if OP is a symbolic operand that resolves locally. */
2924 local_symbolic_operand (op, mode)
2926 enum machine_mode mode ATTRIBUTE_UNUSED;
2928 if (GET_CODE (op) == LABEL_REF)
2931 if (GET_CODE (op) == CONST
2932 && GET_CODE (XEXP (op, 0)) == PLUS
2933 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2934 op = XEXP (XEXP (op, 0), 0);
2936 if (GET_CODE (op) != SYMBOL_REF)
2939 /* These we've been told are local by varasm and encode_section_info
2941 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2944 /* There is, however, a not insubstantial body of code in the rest of
2945 the compiler that assumes it can just stick the results of
2946 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2947 /* ??? This is a hack. Should update the body of the compiler to
2948 always create a DECL an invoke ENCODE_SECTION_INFO. */
2949 if (strncmp (XSTR (op, 0), internal_label_prefix,
2950 internal_label_prefix_len) == 0)
2956 /* Test for a valid operand for a call instruction. Don't allow the
2957 arg pointer register or virtual regs since they may decay into
2958 reg + const, which the patterns can't handle. */
2961 call_insn_operand (op, mode)
2963 enum machine_mode mode ATTRIBUTE_UNUSED;
2965 /* Disallow indirect through a virtual register. This leads to
2966 compiler aborts when trying to eliminate them. */
2967 if (GET_CODE (op) == REG
2968 && (op == arg_pointer_rtx
2969 || op == frame_pointer_rtx
2970 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2971 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2974 /* Disallow `call 1234'. Due to varying assembler lameness this
2975 gets either rejected or translated to `call .+1234'. */
2976 if (GET_CODE (op) == CONST_INT)
2979 /* Explicitly allow SYMBOL_REF even if pic. */
2980 if (GET_CODE (op) == SYMBOL_REF)
2983 /* Half-pic doesn't allow anything but registers and constants.
2984 We've just taken care of the later. */
2986 return register_operand (op, Pmode);
2988 /* Otherwise we can allow any general_operand in the address. */
2989 return general_operand (op, Pmode);
2993 constant_call_address_operand (op, mode)
2995 enum machine_mode mode ATTRIBUTE_UNUSED;
2997 if (GET_CODE (op) == CONST
2998 && GET_CODE (XEXP (op, 0)) == PLUS
2999 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3000 op = XEXP (XEXP (op, 0), 0);
3001 return GET_CODE (op) == SYMBOL_REF;
3004 /* Match exactly zero and one. */
3007 const0_operand (op, mode)
3009 enum machine_mode mode;
3011 return op == CONST0_RTX (mode);
3015 const1_operand (op, mode)
3017 enum machine_mode mode ATTRIBUTE_UNUSED;
3019 return op == const1_rtx;
3022 /* Match 2, 4, or 8. Used for leal multiplicands. */
3025 const248_operand (op, mode)
3027 enum machine_mode mode ATTRIBUTE_UNUSED;
3029 return (GET_CODE (op) == CONST_INT
3030 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3033 /* True if this is a constant appropriate for an increment or decremenmt. */
3036 incdec_operand (op, mode)
3038 enum machine_mode mode ATTRIBUTE_UNUSED;
3040 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3041 registers, since carry flag is not set. */
3042 if (TARGET_PENTIUM4 && !optimize_size)
3044 return op == const1_rtx || op == constm1_rtx;
3047 /* Return nonzero if OP is acceptable as operand of DImode shift
3051 shiftdi_operand (op, mode)
3053 enum machine_mode mode ATTRIBUTE_UNUSED;
3056 return nonimmediate_operand (op, mode);
3058 return register_operand (op, mode);
3061 /* Return false if this is the stack pointer, or any other fake
3062 register eliminable to the stack pointer. Otherwise, this is
3065 This is used to prevent esp from being used as an index reg.
3066 Which would only happen in pathological cases. */
3069 reg_no_sp_operand (op, mode)
3071 enum machine_mode mode;
3074 if (GET_CODE (t) == SUBREG)
3076 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3079 return register_operand (op, mode);
3083 mmx_reg_operand (op, mode)
3085 enum machine_mode mode ATTRIBUTE_UNUSED;
3087 return MMX_REG_P (op);
3090 /* Return false if this is any eliminable register. Otherwise
3094 general_no_elim_operand (op, mode)
3096 enum machine_mode mode;
3099 if (GET_CODE (t) == SUBREG)
3101 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3102 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3103 || t == virtual_stack_dynamic_rtx)
3106 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3107 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3110 return general_operand (op, mode);
3113 /* Return false if this is any eliminable register. Otherwise
3114 register_operand or const_int. */
3117 nonmemory_no_elim_operand (op, mode)
3119 enum machine_mode mode;
3122 if (GET_CODE (t) == SUBREG)
3124 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3125 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3126 || t == virtual_stack_dynamic_rtx)
3129 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3132 /* Return true if op is a Q_REGS class register. */
3135 q_regs_operand (op, mode)
3137 enum machine_mode mode;
3139 if (mode != VOIDmode && GET_MODE (op) != mode)
3141 if (GET_CODE (op) == SUBREG)
3142 op = SUBREG_REG (op);
3143 return ANY_QI_REG_P (op);
3146 /* Return true if op is a NON_Q_REGS class register. */
3149 non_q_regs_operand (op, mode)
3151 enum machine_mode mode;
3153 if (mode != VOIDmode && GET_MODE (op) != mode)
3155 if (GET_CODE (op) == SUBREG)
3156 op = SUBREG_REG (op);
3157 return NON_QI_REG_P (op);
3160 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3163 sse_comparison_operator (op, mode)
3165 enum machine_mode mode ATTRIBUTE_UNUSED;
3167 enum rtx_code code = GET_CODE (op);
3170 /* Operations supported directly. */
3180 /* These are equivalent to ones above in non-IEEE comparisons. */
3187 return !TARGET_IEEE_FP;
3192 /* Return 1 if OP is a valid comparison operator in valid mode. */
3194 ix86_comparison_operator (op, mode)
3196 enum machine_mode mode;
3198 enum machine_mode inmode;
3199 enum rtx_code code = GET_CODE (op);
3200 if (mode != VOIDmode && GET_MODE (op) != mode)
3202 if (GET_RTX_CLASS (code) != '<')
3204 inmode = GET_MODE (XEXP (op, 0));
3206 if (inmode == CCFPmode || inmode == CCFPUmode)
3208 enum rtx_code second_code, bypass_code;
3209 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3210 return (bypass_code == NIL && second_code == NIL);
3217 if (inmode == CCmode || inmode == CCGCmode
3218 || inmode == CCGOCmode || inmode == CCNOmode)
3221 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3222 if (inmode == CCmode)
3226 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3234 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3237 fcmov_comparison_operator (op, mode)
3239 enum machine_mode mode;
3241 enum machine_mode inmode;
3242 enum rtx_code code = GET_CODE (op);
3243 if (mode != VOIDmode && GET_MODE (op) != mode)
3245 if (GET_RTX_CLASS (code) != '<')
3247 inmode = GET_MODE (XEXP (op, 0));
3248 if (inmode == CCFPmode || inmode == CCFPUmode)
3250 enum rtx_code second_code, bypass_code;
3251 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3252 if (bypass_code != NIL || second_code != NIL)
3254 code = ix86_fp_compare_code_to_integer (code);
3256 /* i387 supports just limited amount of conditional codes. */
3259 case LTU: case GTU: case LEU: case GEU:
3260 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3263 case ORDERED: case UNORDERED:
3271 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3274 promotable_binary_operator (op, mode)
3276 enum machine_mode mode ATTRIBUTE_UNUSED;
3278 switch (GET_CODE (op))
3281 /* Modern CPUs have same latency for HImode and SImode multiply,
3282 but 386 and 486 do HImode multiply faster. */
3283 return ix86_cpu > PROCESSOR_I486;
3295 /* Nearly general operand, but accept any const_double, since we wish
3296 to be able to drop them into memory rather than have them get pulled
3300 cmp_fp_expander_operand (op, mode)
3302 enum machine_mode mode;
3304 if (mode != VOIDmode && mode != GET_MODE (op))
3306 if (GET_CODE (op) == CONST_DOUBLE)
3308 return general_operand (op, mode);
3311 /* Match an SI or HImode register for a zero_extract. */
3314 ext_register_operand (op, mode)
3316 enum machine_mode mode ATTRIBUTE_UNUSED;
3319 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3320 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3323 if (!register_operand (op, VOIDmode))
3326 /* Be curefull to accept only registers having upper parts. */
3327 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3328 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3331 /* Return 1 if this is a valid binary floating-point operation.
3332 OP is the expression matched, and MODE is its mode. */
3335 binary_fp_operator (op, mode)
3337 enum machine_mode mode;
3339 if (mode != VOIDmode && mode != GET_MODE (op))
3342 switch (GET_CODE (op))
3348 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3356 mult_operator (op, mode)
3358 enum machine_mode mode ATTRIBUTE_UNUSED;
3360 return GET_CODE (op) == MULT;
3364 div_operator (op, mode)
3366 enum machine_mode mode ATTRIBUTE_UNUSED;
3368 return GET_CODE (op) == DIV;
3372 arith_or_logical_operator (op, mode)
3374 enum machine_mode mode;
3376 return ((mode == VOIDmode || GET_MODE (op) == mode)
3377 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3378 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3381 /* Returns 1 if OP is memory operand with a displacement. */
3384 memory_displacement_operand (op, mode)
3386 enum machine_mode mode;
3388 struct ix86_address parts;
3390 if (! memory_operand (op, mode))
3393 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3396 return parts.disp != NULL_RTX;
3399 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3400 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3402 ??? It seems likely that this will only work because cmpsi is an
3403 expander, and no actual insns use this. */
3406 cmpsi_operand (op, mode)
3408 enum machine_mode mode;
3410 if (nonimmediate_operand (op, mode))
3413 if (GET_CODE (op) == AND
3414 && GET_MODE (op) == SImode
3415 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3416 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3417 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3418 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3419 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3420 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3426 /* Returns 1 if OP is memory operand that can not be represented by the
3430 long_memory_operand (op, mode)
3432 enum machine_mode mode;
3434 if (! memory_operand (op, mode))
3437 return memory_address_length (op) != 0;
3440 /* Return nonzero if the rtx is known aligned. */
3443 aligned_operand (op, mode)
3445 enum machine_mode mode;
3447 struct ix86_address parts;
3449 if (!general_operand (op, mode))
3452 /* Registers and immediate operands are always "aligned". */
3453 if (GET_CODE (op) != MEM)
3456 /* Don't even try to do any aligned optimizations with volatiles. */
3457 if (MEM_VOLATILE_P (op))
3462 /* Pushes and pops are only valid on the stack pointer. */
3463 if (GET_CODE (op) == PRE_DEC
3464 || GET_CODE (op) == POST_INC)
3467 /* Decode the address. */
3468 if (! ix86_decompose_address (op, &parts))
3471 if (parts.base && GET_CODE (parts.base) == SUBREG)
3472 parts.base = SUBREG_REG (parts.base);
3473 if (parts.index && GET_CODE (parts.index) == SUBREG)
3474 parts.index = SUBREG_REG (parts.index);
3476 /* Look for some component that isn't known to be aligned. */
3480 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3485 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3490 if (GET_CODE (parts.disp) != CONST_INT
3491 || (INTVAL (parts.disp) & 3) != 0)
3495 /* Didn't find one -- this must be an aligned address. */
3499 /* Return true if the constant is something that can be loaded with
3500 a special instruction. Only handle 0.0 and 1.0; others are less
3504 standard_80387_constant_p (x)
3507 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3509 /* Note that on the 80387, other constants, such as pi, that we should support
3510 too. On some machines, these are much slower to load as standard constant,
3511 than to load from doubles in memory. */
3512 if (x == CONST0_RTX (GET_MODE (x)))
3514 if (x == CONST1_RTX (GET_MODE (x)))
3519 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3522 standard_sse_constant_p (x)
3525 if (GET_CODE (x) != CONST_DOUBLE)
3527 return (x == CONST0_RTX (GET_MODE (x)));
3530 /* Returns 1 if OP contains a symbol reference */
3533 symbolic_reference_mentioned_p (op)
3536 register const char *fmt;
3539 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3542 fmt = GET_RTX_FORMAT (GET_CODE (op));
3543 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3549 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3550 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3554 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3561 /* Return 1 if it is appropriate to emit `ret' instructions in the
3562 body of a function. Do this only if the epilogue is simple, needing a
3563 couple of insns. Prior to reloading, we can't tell how many registers
3564 must be saved, so return 0 then. Return 0 if there is no frame
3565 marker to de-allocate.
3567 If NON_SAVING_SETJMP is defined and true, then it is not possible
3568 for the epilogue to be simple, so return 0. This is a special case
3569 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3570 until final, but jump_optimize may need to know sooner if a
3574 ix86_can_use_return_insn_p ()
3576 struct ix86_frame frame;
3578 #ifdef NON_SAVING_SETJMP
3579 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3583 if (! reload_completed || frame_pointer_needed)
3586 /* Don't allow more than 32 pop, since that's all we can do
3587 with one instruction. */
3588 if (current_function_pops_args
3589 && current_function_args_size >= 32768)
3592 ix86_compute_frame_layout (&frame);
3593 return frame.to_allocate == 0 && frame.nregs == 0;
3596 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3598 x86_64_sign_extended_value (value)
3601 switch (GET_CODE (value))
3603 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3604 to be at least 32 and this all acceptable constants are
3605 represented as CONST_INT. */
3607 if (HOST_BITS_PER_WIDE_INT == 32)
3611 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3612 return trunc_int_for_mode (val, SImode) == val;
3616 /* For certain code models, the symbolic references are known to fit. */
3618 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3620 /* For certain code models, the code is near as well. */
3622 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3624 /* We also may accept the offsetted memory references in certain special
3627 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3628 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3630 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3632 rtx op1 = XEXP (XEXP (value, 0), 0);
3633 rtx op2 = XEXP (XEXP (value, 0), 1);
3634 HOST_WIDE_INT offset;
3636 if (ix86_cmodel == CM_LARGE)
3638 if (GET_CODE (op2) != CONST_INT)
3640 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3641 switch (GET_CODE (op1))
3644 /* For CM_SMALL assume that latest object is 1MB before
3645 end of 31bits boundary. We may also accept pretty
3646 large negative constants knowing that all objects are
3647 in the positive half of address space. */
3648 if (ix86_cmodel == CM_SMALL
3649 && offset < 1024*1024*1024
3650 && trunc_int_for_mode (offset, SImode) == offset)
3652 /* For CM_KERNEL we know that all object resist in the
3653 negative half of 32bits address space. We may not
3654 accept negative offsets, since they may be just off
3655 and we may accept pretty large positive ones. */
3656 if (ix86_cmodel == CM_KERNEL
3658 && trunc_int_for_mode (offset, SImode) == offset)
3662 /* These conditions are similar to SYMBOL_REF ones, just the
3663 constraints for code models differ. */
3664 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3665 && offset < 1024*1024*1024
3666 && trunc_int_for_mode (offset, SImode) == offset)
3668 if (ix86_cmodel == CM_KERNEL
3670 && trunc_int_for_mode (offset, SImode) == offset)
3683 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3685 x86_64_zero_extended_value (value)
3688 switch (GET_CODE (value))
3691 if (HOST_BITS_PER_WIDE_INT == 32)
3692 return (GET_MODE (value) == VOIDmode
3693 && !CONST_DOUBLE_HIGH (value));
3697 if (HOST_BITS_PER_WIDE_INT == 32)
3698 return INTVAL (value) >= 0;
3700 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3703 /* For certain code models, the symbolic references are known to fit. */
3705 return ix86_cmodel == CM_SMALL;
3707 /* For certain code models, the code is near as well. */
3709 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3711 /* We also may accept the offsetted memory references in certain special
3714 if (GET_CODE (XEXP (value, 0)) == PLUS)
3716 rtx op1 = XEXP (XEXP (value, 0), 0);
3717 rtx op2 = XEXP (XEXP (value, 0), 1);
3719 if (ix86_cmodel == CM_LARGE)
3721 switch (GET_CODE (op1))
3725 /* For small code model we may accept pretty large positive
3726 offsets, since one bit is available for free. Negative
3727 offsets are limited by the size of NULL pointer area
3728 specified by the ABI. */
3729 if (ix86_cmodel == CM_SMALL
3730 && GET_CODE (op2) == CONST_INT
3731 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3732 && (trunc_int_for_mode (INTVAL (op2), SImode)
3735 /* ??? For the kernel, we may accept adjustment of
3736 -0x10000000, since we know that it will just convert
3737 negative address space to positive, but perhaps this
3738 is not worthwhile. */
3741 /* These conditions are similar to SYMBOL_REF ones, just the
3742 constraints for code models differ. */
3743 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3744 && GET_CODE (op2) == CONST_INT
3745 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3746 && (trunc_int_for_mode (INTVAL (op2), SImode)
3760 /* Value should be nonzero if functions must have frame pointers.
3761 Zero means the frame pointer need not be set up (and parms may
3762 be accessed via the stack pointer) in functions that seem suitable. */
3765 ix86_frame_pointer_required ()
3767 /* If we accessed previous frames, then the generated code expects
3768 to be able to access the saved ebp value in our frame. */
3769 if (cfun->machine->accesses_prev_frame)
3772 /* Several x86 os'es need a frame pointer for other reasons,
3773 usually pertaining to setjmp. */
3774 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3777 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3778 the frame pointer by default. Turn it back on now if we've not
3779 got a leaf function. */
3780 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3786 /* Record that the current function accesses previous call frames. */
3789 ix86_setup_frame_addresses ()
3791 cfun->machine->accesses_prev_frame = 1;
3794 static char pic_label_name[32];
3796 /* This function generates code for -fpic that loads %ebx with
3797 the return address of the caller and then returns. */
3800 ix86_asm_file_end (file)
3805 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3808 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3809 to updating relocations to a section being discarded such that this
3810 doesn't work. Ought to detect this at configure time. */
3812 /* The trick here is to create a linkonce section containing the
3813 pic label thunk, but to refer to it with an internal label.
3814 Because the label is internal, we don't have inter-dso name
3815 binding issues on hosts that don't support ".hidden".
3817 In order to use these macros, however, we must create a fake
3819 if (targetm.have_named_sections)
3821 tree decl = build_decl (FUNCTION_DECL,
3822 get_identifier ("i686.get_pc_thunk"),
3824 DECL_ONE_ONLY (decl) = 1;
3825 (*targetm.asm_out.unique_section) (decl, 0);
3826 named_section (decl, NULL);
3833 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3834 internal (non-global) label that's being emitted, it didn't make
3835 sense to have .type information for local labels. This caused
3836 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3837 me debug info for a label that you're declaring non-global?) this
3838 was changed to call ASM_OUTPUT_LABEL() instead. */
3840 ASM_OUTPUT_LABEL (file, pic_label_name);
3842 xops[0] = pic_offset_table_rtx;
3843 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3844 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3845 output_asm_insn ("ret", xops);
3849 load_pic_register ()
3856 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3858 if (TARGET_DEEP_BRANCH_PREDICTION)
3860 if (! pic_label_name[0])
3861 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3862 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3866 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3869 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3871 if (! TARGET_DEEP_BRANCH_PREDICTION)
3872 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3874 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3877 /* Generate an "push" pattern for input ARG. */
3883 return gen_rtx_SET (VOIDmode,
3885 gen_rtx_PRE_DEC (Pmode,
3886 stack_pointer_rtx)),
3890 /* Return 1 if we need to save REGNO. */
3892 ix86_save_reg (regno, maybe_eh_return)
3894 int maybe_eh_return;
3896 if (regno == PIC_OFFSET_TABLE_REGNUM
3897 && (current_function_uses_pic_offset_table
3898 || current_function_uses_const_pool
3899 || current_function_calls_eh_return))
3902 if (current_function_calls_eh_return && maybe_eh_return)
3907 unsigned test = EH_RETURN_DATA_REGNO (i);
3908 if (test == INVALID_REGNUM)
3915 return (regs_ever_live[regno]
3916 && !call_used_regs[regno]
3917 && !fixed_regs[regno]
3918 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3921 /* Return number of registers to be saved on the stack. */
3929 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3930 if (ix86_save_reg (regno, true))
3935 /* Return the offset between two registers, one to be eliminated, and the other
3936 its replacement, at the start of a routine. */
3939 ix86_initial_elimination_offset (from, to)
3943 struct ix86_frame frame;
3944 ix86_compute_frame_layout (&frame);
3946 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3947 return frame.hard_frame_pointer_offset;
3948 else if (from == FRAME_POINTER_REGNUM
3949 && to == HARD_FRAME_POINTER_REGNUM)
3950 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3953 if (to != STACK_POINTER_REGNUM)
3955 else if (from == ARG_POINTER_REGNUM)
3956 return frame.stack_pointer_offset;
3957 else if (from != FRAME_POINTER_REGNUM)
3960 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3964 /* Fill structure ix86_frame about frame of currently computed function. */
3967 ix86_compute_frame_layout (frame)
3968 struct ix86_frame *frame;
3970 HOST_WIDE_INT total_size;
3971 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3973 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3974 HOST_WIDE_INT size = get_frame_size ();
3976 frame->nregs = ix86_nsaved_regs ();
3979 /* Skip return address and saved base pointer. */
3980 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3982 frame->hard_frame_pointer_offset = offset;
3984 /* Do some sanity checking of stack_alignment_needed and
3985 preferred_alignment, since i386 port is the only using those features
3986 that may break easily. */
3988 if (size && !stack_alignment_needed)
3990 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3992 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3994 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3997 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3998 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4000 /* Register save area */
4001 offset += frame->nregs * UNITS_PER_WORD;
4004 if (ix86_save_varrargs_registers)
4006 offset += X86_64_VARARGS_SIZE;
4007 frame->va_arg_size = X86_64_VARARGS_SIZE;
4010 frame->va_arg_size = 0;
4012 /* Align start of frame for local function. */
4013 frame->padding1 = ((offset + stack_alignment_needed - 1)
4014 & -stack_alignment_needed) - offset;
4016 offset += frame->padding1;
4018 /* Frame pointer points here. */
4019 frame->frame_pointer_offset = offset;
4023 /* Add outgoing arguments area. */
4024 if (ACCUMULATE_OUTGOING_ARGS)
4026 offset += current_function_outgoing_args_size;
4027 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4030 frame->outgoing_arguments_size = 0;
4032 /* Align stack boundary. */
4033 frame->padding2 = ((offset + preferred_alignment - 1)
4034 & -preferred_alignment) - offset;
4036 offset += frame->padding2;
4038 /* We've reached end of stack frame. */
4039 frame->stack_pointer_offset = offset;
4041 /* Size prologue needs to allocate. */
4042 frame->to_allocate =
4043 (size + frame->padding1 + frame->padding2
4044 + frame->outgoing_arguments_size + frame->va_arg_size);
4046 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4047 && current_function_is_leaf)
4049 frame->red_zone_size = frame->to_allocate;
4050 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4051 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4054 frame->red_zone_size = 0;
4055 frame->to_allocate -= frame->red_zone_size;
4056 frame->stack_pointer_offset -= frame->red_zone_size;
4058 fprintf (stderr, "nregs: %i\n", frame->nregs);
4059 fprintf (stderr, "size: %i\n", size);
4060 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4061 fprintf (stderr, "padding1: %i\n", frame->padding1);
4062 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4063 fprintf (stderr, "padding2: %i\n", frame->padding2);
4064 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4065 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4066 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4067 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4068 frame->hard_frame_pointer_offset);
4069 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4073 /* Emit code to save registers in the prologue. */
4076 ix86_emit_save_regs ()
4081 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4082 if (ix86_save_reg (regno, true))
4084 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4085 RTX_FRAME_RELATED_P (insn) = 1;
4089 /* Emit code to save registers using MOV insns. First register
4090 is restored from POINTER + OFFSET. */
4092 ix86_emit_save_regs_using_mov (pointer, offset)
4094 HOST_WIDE_INT offset;
4099 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4100 if (ix86_save_reg (regno, true))
4102 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4104 gen_rtx_REG (Pmode, regno));
4105 RTX_FRAME_RELATED_P (insn) = 1;
4106 offset += UNITS_PER_WORD;
4110 /* Expand the prologue into a bunch of separate insns. */
4113 ix86_expand_prologue ()
4116 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4117 || current_function_uses_const_pool)
4119 struct ix86_frame frame;
4121 HOST_WIDE_INT allocate;
4125 use_fast_prologue_epilogue
4126 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4127 if (TARGET_PROLOGUE_USING_MOVE)
4128 use_mov = use_fast_prologue_epilogue;
4130 ix86_compute_frame_layout (&frame);
4132 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4133 slower on all targets. Also sdb doesn't like it. */
4135 if (frame_pointer_needed)
4137 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4138 RTX_FRAME_RELATED_P (insn) = 1;
4140 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4141 RTX_FRAME_RELATED_P (insn) = 1;
4144 allocate = frame.to_allocate;
4145 /* In case we are dealing only with single register and empty frame,
4146 push is equivalent of the mov+add sequence. */
4147 if (allocate == 0 && frame.nregs <= 1)
4151 ix86_emit_save_regs ();
4153 allocate += frame.nregs * UNITS_PER_WORD;
4157 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4159 insn = emit_insn (gen_pro_epilogue_adjust_stack
4160 (stack_pointer_rtx, stack_pointer_rtx,
4161 GEN_INT (-allocate)));
4162 RTX_FRAME_RELATED_P (insn) = 1;
4166 /* ??? Is this only valid for Win32? */
4173 arg0 = gen_rtx_REG (SImode, 0);
4174 emit_move_insn (arg0, GEN_INT (allocate));
4176 sym = gen_rtx_MEM (FUNCTION_MODE,
4177 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4178 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4180 CALL_INSN_FUNCTION_USAGE (insn)
4181 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4182 CALL_INSN_FUNCTION_USAGE (insn));
4186 if (!frame_pointer_needed || !frame.to_allocate)
4187 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4189 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4190 -frame.nregs * UNITS_PER_WORD);
4193 #ifdef SUBTARGET_PROLOGUE
4198 load_pic_register ();
4200 /* If we are profiling, make sure no instructions are scheduled before
4201 the call to mcount. However, if -fpic, the above call will have
4203 if (current_function_profile && ! pic_reg_used)
4204 emit_insn (gen_blockage ());
4207 /* Emit code to restore saved registers using MOV insns. First register
4208 is restored from POINTER + OFFSET. */
4210 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4213 int maybe_eh_return;
4217 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4218 if (ix86_save_reg (regno, maybe_eh_return))
4220 emit_move_insn (gen_rtx_REG (Pmode, regno),
4221 adjust_address (gen_rtx_MEM (Pmode, pointer),
4223 offset += UNITS_PER_WORD;
4227 /* Restore function stack, frame, and registers. */
4230 ix86_expand_epilogue (style)
4234 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4235 struct ix86_frame frame;
4236 HOST_WIDE_INT offset;
4238 ix86_compute_frame_layout (&frame);
4240 /* Calculate start of saved registers relative to ebp. Special care
4241 must be taken for the normal return case of a function using
4242 eh_return: the eax and edx registers are marked as saved, but not
4243 restored along this path. */
4244 offset = frame.nregs;
4245 if (current_function_calls_eh_return && style != 2)
4247 offset *= -UNITS_PER_WORD;
4249 /* If we're only restoring one register and sp is not valid then
4250 using a move instruction to restore the register since it's
4251 less work than reloading sp and popping the register.
4253 The default code result in stack adjustment using add/lea instruction,
4254 while this code results in LEAVE instruction (or discrete equivalent),
4255 so it is profitable in some other cases as well. Especially when there
4256 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4257 and there is exactly one register to pop. This heruistic may need some
4258 tuning in future. */
4259 if ((!sp_valid && frame.nregs <= 1)
4260 || (TARGET_EPILOGUE_USING_MOVE
4261 && use_fast_prologue_epilogue
4262 && (frame.nregs > 1 || frame.to_allocate))
4263 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4264 || (frame_pointer_needed && TARGET_USE_LEAVE
4265 && use_fast_prologue_epilogue && frame.nregs == 1)
4266 || current_function_calls_eh_return)
4268 /* Restore registers. We can use ebp or esp to address the memory
4269 locations. If both are available, default to ebp, since offsets
4270 are known to be small. Only exception is esp pointing directly to the
4271 end of block of saved registers, where we may simplify addressing
4274 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4275 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4276 frame.to_allocate, style == 2);
4278 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4279 offset, style == 2);
4281 /* eh_return epilogues need %ecx added to the stack pointer. */
4284 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4286 if (frame_pointer_needed)
4288 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4289 tmp = plus_constant (tmp, UNITS_PER_WORD);
4290 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4292 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4293 emit_move_insn (hard_frame_pointer_rtx, tmp);
4295 emit_insn (gen_pro_epilogue_adjust_stack
4296 (stack_pointer_rtx, sa, const0_rtx));
4300 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4301 tmp = plus_constant (tmp, (frame.to_allocate
4302 + frame.nregs * UNITS_PER_WORD));
4303 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4306 else if (!frame_pointer_needed)
4307 emit_insn (gen_pro_epilogue_adjust_stack
4308 (stack_pointer_rtx, stack_pointer_rtx,
4309 GEN_INT (frame.to_allocate
4310 + frame.nregs * UNITS_PER_WORD)));
4311 /* If not an i386, mov & pop is faster than "leave". */
4312 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4313 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4316 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4317 hard_frame_pointer_rtx,
4320 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4322 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4327 /* First step is to deallocate the stack frame so that we can
4328 pop the registers. */
4331 if (!frame_pointer_needed)
4333 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4334 hard_frame_pointer_rtx,
4337 else if (frame.to_allocate)
4338 emit_insn (gen_pro_epilogue_adjust_stack
4339 (stack_pointer_rtx, stack_pointer_rtx,
4340 GEN_INT (frame.to_allocate)));
4342 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4343 if (ix86_save_reg (regno, false))
4346 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4348 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4350 if (frame_pointer_needed)
4352 /* Leave results in shorter dependency chains on CPUs that are
4353 able to grok it fast. */
4354 if (TARGET_USE_LEAVE)
4355 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4356 else if (TARGET_64BIT)
4357 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4359 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4363 /* Sibcall epilogues don't want a return instruction. */
4367 if (current_function_pops_args && current_function_args_size)
4369 rtx popc = GEN_INT (current_function_pops_args);
4371 /* i386 can only pop 64K bytes. If asked to pop more, pop
4372 return address, do explicit add, and jump indirectly to the
4375 if (current_function_pops_args >= 65536)
4377 rtx ecx = gen_rtx_REG (SImode, 2);
4379 /* There are is no "pascal" calling convention in 64bit ABI. */
4383 emit_insn (gen_popsi1 (ecx));
4384 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4385 emit_jump_insn (gen_return_indirect_internal (ecx));
4388 emit_jump_insn (gen_return_pop_internal (popc));
4391 emit_jump_insn (gen_return_internal ());
4394 /* Extract the parts of an RTL expression that is a valid memory address
4395 for an instruction. Return 0 if the structure of the address is
4396 grossly off. Return -1 if the address contains ASHIFT, so it is not
4397 strictly valid, but still used for computing length of lea instruction.
4401 ix86_decompose_address (addr, out)
4403 struct ix86_address *out;
4405 rtx base = NULL_RTX;
4406 rtx index = NULL_RTX;
4407 rtx disp = NULL_RTX;
4408 HOST_WIDE_INT scale = 1;
4409 rtx scale_rtx = NULL_RTX;
4412 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4414 else if (GET_CODE (addr) == PLUS)
4416 rtx op0 = XEXP (addr, 0);
4417 rtx op1 = XEXP (addr, 1);
4418 enum rtx_code code0 = GET_CODE (op0);
4419 enum rtx_code code1 = GET_CODE (op1);
4421 if (code0 == REG || code0 == SUBREG)
4423 if (code1 == REG || code1 == SUBREG)
4424 index = op0, base = op1; /* index + base */
4426 base = op0, disp = op1; /* base + displacement */
4428 else if (code0 == MULT)
4430 index = XEXP (op0, 0);
4431 scale_rtx = XEXP (op0, 1);
4432 if (code1 == REG || code1 == SUBREG)
4433 base = op1; /* index*scale + base */
4435 disp = op1; /* index*scale + disp */
4437 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4439 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4440 scale_rtx = XEXP (XEXP (op0, 0), 1);
4441 base = XEXP (op0, 1);
4444 else if (code0 == PLUS)
4446 index = XEXP (op0, 0); /* index + base + disp */
4447 base = XEXP (op0, 1);
4453 else if (GET_CODE (addr) == MULT)
4455 index = XEXP (addr, 0); /* index*scale */
4456 scale_rtx = XEXP (addr, 1);
4458 else if (GET_CODE (addr) == ASHIFT)
4462 /* We're called for lea too, which implements ashift on occasion. */
4463 index = XEXP (addr, 0);
4464 tmp = XEXP (addr, 1);
4465 if (GET_CODE (tmp) != CONST_INT)
4467 scale = INTVAL (tmp);
4468 if ((unsigned HOST_WIDE_INT) scale > 3)
4474 disp = addr; /* displacement */
4476 /* Extract the integral value of scale. */
4479 if (GET_CODE (scale_rtx) != CONST_INT)
4481 scale = INTVAL (scale_rtx);
4484 /* Allow arg pointer and stack pointer as index if there is not scaling */
4485 if (base && index && scale == 1
4486 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4487 || index == stack_pointer_rtx))
4494 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4495 if ((base == hard_frame_pointer_rtx
4496 || base == frame_pointer_rtx
4497 || base == arg_pointer_rtx) && !disp)
4500 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4501 Avoid this by transforming to [%esi+0]. */
4502 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4503 && base && !index && !disp
4505 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4508 /* Special case: encode reg+reg instead of reg*2. */
4509 if (!base && index && scale && scale == 2)
4510 base = index, scale = 1;
4512 /* Special case: scaling cannot be encoded without base or displacement. */
4513 if (!base && !disp && index && scale != 1)
4524 /* Return cost of the memory address x.
4525 For i386, it is better to use a complex address than let gcc copy
4526 the address into a reg and make a new pseudo. But not if the address
4527 requires to two regs - that would mean more pseudos with longer
4530 ix86_address_cost (x)
4533 struct ix86_address parts;
4536 if (!ix86_decompose_address (x, &parts))
4539 if (parts.base && GET_CODE (parts.base) == SUBREG)
4540 parts.base = SUBREG_REG (parts.base);
4541 if (parts.index && GET_CODE (parts.index) == SUBREG)
4542 parts.index = SUBREG_REG (parts.index);
4544 /* More complex memory references are better. */
4545 if (parts.disp && parts.disp != const0_rtx)
4548 /* Attempt to minimize number of registers in the address. */
4550 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4552 && (!REG_P (parts.index)
4553 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4557 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4559 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4560 && parts.base != parts.index)
4563 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4564 since it's predecode logic can't detect the length of instructions
4565 and it degenerates to vector decoded. Increase cost of such
4566 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4567 to split such addresses or even refuse such addresses at all.
4569 Following addressing modes are affected:
4574 The first and last case may be avoidable by explicitly coding the zero in
4575 memory address, but I don't have AMD-K6 machine handy to check this
4579 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4580 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4581 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4587 /* If X is a machine specific address (i.e. a symbol or label being
4588 referenced as a displacement from the GOT implemented using an
4589 UNSPEC), then return the base term. Otherwise return X. */
4592 ix86_find_base_term (x)
4599 if (GET_CODE (x) != CONST)
4602 if (GET_CODE (term) == PLUS
4603 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4604 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4605 term = XEXP (term, 0);
4606 if (GET_CODE (term) != UNSPEC
4607 || XINT (term, 1) != UNSPEC_GOTPCREL)
4610 term = XVECEXP (term, 0, 0);
4612 if (GET_CODE (term) != SYMBOL_REF
4613 && GET_CODE (term) != LABEL_REF)
4619 if (GET_CODE (x) != PLUS
4620 || XEXP (x, 0) != pic_offset_table_rtx
4621 || GET_CODE (XEXP (x, 1)) != CONST)
4624 term = XEXP (XEXP (x, 1), 0);
4626 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4627 term = XEXP (term, 0);
4629 if (GET_CODE (term) != UNSPEC
4630 || XINT (term, 1) != UNSPEC_GOTOFF)
4633 term = XVECEXP (term, 0, 0);
4635 if (GET_CODE (term) != SYMBOL_REF
4636 && GET_CODE (term) != LABEL_REF)
4642 /* Determine if a given CONST RTX is a valid memory displacement
4646 legitimate_pic_address_disp_p (disp)
4649 /* In 64bit mode we can allow direct addresses of symbols and labels
4650 when they are not dynamic symbols. */
4654 if (GET_CODE (disp) == CONST)
4656 /* ??? Handle PIC code models */
4657 if (GET_CODE (x) == PLUS
4658 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4659 && ix86_cmodel == CM_SMALL_PIC
4660 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4661 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4663 if (local_symbolic_operand (x, Pmode))
4666 if (GET_CODE (disp) != CONST)
4668 disp = XEXP (disp, 0);
4672 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4673 of GOT tables. We should not need these anyway. */
4674 if (GET_CODE (disp) != UNSPEC
4675 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4678 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4679 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4684 if (GET_CODE (disp) == PLUS)
4686 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4688 disp = XEXP (disp, 0);
4691 if (GET_CODE (disp) != UNSPEC)
4694 /* Must be @GOT or @GOTOFF. */
4695 switch (XINT (disp, 1))
4698 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4700 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4706 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4707 memory address for an instruction. The MODE argument is the machine mode
4708 for the MEM expression that wants to use this address.
4710 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4711 convert common non-canonical forms to canonical form so that they will
4715 legitimate_address_p (mode, addr, strict)
4716 enum machine_mode mode;
4720 struct ix86_address parts;
4721 rtx base, index, disp;
4722 HOST_WIDE_INT scale;
4723 const char *reason = NULL;
4724 rtx reason_rtx = NULL_RTX;
4726 if (TARGET_DEBUG_ADDR)
4729 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4730 GET_MODE_NAME (mode), strict);
4734 if (ix86_decompose_address (addr, &parts) <= 0)
4736 reason = "decomposition failed";
4741 index = parts.index;
4743 scale = parts.scale;
4745 /* Validate base register.
4747 Don't allow SUBREG's here, it can lead to spill failures when the base
4748 is one word out of a two word structure, which is represented internally
4756 if (GET_CODE (base) == SUBREG)
4757 reg = SUBREG_REG (base);
4761 if (GET_CODE (reg) != REG)
4763 reason = "base is not a register";
4767 if (GET_MODE (base) != Pmode)
4769 reason = "base is not in Pmode";
4773 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
4774 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
4776 reason = "base is not valid";
4781 /* Validate index register.
4783 Don't allow SUBREG's here, it can lead to spill failures when the index
4784 is one word out of a two word structure, which is represented internally
4792 if (GET_CODE (index) == SUBREG)
4793 reg = SUBREG_REG (index);
4797 if (GET_CODE (reg) != REG)
4799 reason = "index is not a register";
4803 if (GET_MODE (index) != Pmode)
4805 reason = "index is not in Pmode";
4809 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
4810 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
4812 reason = "index is not valid";
4817 /* Validate scale factor. */
4820 reason_rtx = GEN_INT (scale);
4823 reason = "scale without index";
4827 if (scale != 2 && scale != 4 && scale != 8)
4829 reason = "scale is not a valid multiplier";
4834 /* Validate displacement. */
4839 if (!CONSTANT_ADDRESS_P (disp))
4841 reason = "displacement is not constant";
4847 if (!x86_64_sign_extended_value (disp))
4849 reason = "displacement is out of range";
4855 if (GET_CODE (disp) == CONST_DOUBLE)
4857 reason = "displacement is a const_double";
4862 if (flag_pic && SYMBOLIC_CONST (disp))
4864 if (TARGET_64BIT && (index || base))
4866 reason = "non-constant pic memory reference";
4869 if (! legitimate_pic_address_disp_p (disp))
4871 reason = "displacement is an invalid pic construct";
4875 /* This code used to verify that a symbolic pic displacement
4876 includes the pic_offset_table_rtx register.
4878 While this is good idea, unfortunately these constructs may
4879 be created by "adds using lea" optimization for incorrect
4888 This code is nonsensical, but results in addressing
4889 GOT table with pic_offset_table_rtx base. We can't
4890 just refuse it easily, since it gets matched by
4891 "addsi3" pattern, that later gets split to lea in the
4892 case output register differs from input. While this
4893 can be handled by separate addsi pattern for this case
4894 that never results in lea, this seems to be easier and
4895 correct fix for crash to disable this test. */
4897 else if (HALF_PIC_P ())
4899 if (! HALF_PIC_ADDRESS_P (disp)
4900 || (base != NULL_RTX || index != NULL_RTX))
4902 reason = "displacement is an invalid half-pic reference";
4908 /* Everything looks valid. */
4909 if (TARGET_DEBUG_ADDR)
4910 fprintf (stderr, "Success.\n");
4914 if (TARGET_DEBUG_ADDR)
4916 fprintf (stderr, "Error: %s\n", reason);
4917 debug_rtx (reason_rtx);
4922 /* Return an unique alias set for the GOT. */
4924 static HOST_WIDE_INT
4925 ix86_GOT_alias_set ()
4927 static HOST_WIDE_INT set = -1;
4929 set = new_alias_set ();
4933 /* Return a legitimate reference for ORIG (an address) using the
4934 register REG. If REG is 0, a new pseudo is generated.
4936 There are two types of references that must be handled:
4938 1. Global data references must load the address from the GOT, via
4939 the PIC reg. An insn is emitted to do this load, and the reg is
4942 2. Static data references, constant pool addresses, and code labels
4943 compute the address as an offset from the GOT, whose base is in
4944 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4945 differentiate them from global data objects. The returned
4946 address is the PIC reg + an unspec constant.
4948 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4949 reg also appears in the address. */
4952 legitimize_pic_address (orig, reg)
4960 if (local_symbolic_operand (addr, Pmode))
4962 /* In 64bit mode we can address such objects directly. */
4967 /* This symbol may be referenced via a displacement from the PIC
4968 base address (@GOTOFF). */
4970 current_function_uses_pic_offset_table = 1;
4971 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4972 new = gen_rtx_CONST (Pmode, new);
4973 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4977 emit_move_insn (reg, new);
4982 else if (GET_CODE (addr) == SYMBOL_REF)
4986 current_function_uses_pic_offset_table = 1;
4987 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
4988 new = gen_rtx_CONST (Pmode, new);
4989 new = gen_rtx_MEM (Pmode, new);
4990 RTX_UNCHANGING_P (new) = 1;
4991 set_mem_alias_set (new, ix86_GOT_alias_set ());
4994 reg = gen_reg_rtx (Pmode);
4995 /* Use directly gen_movsi, otherwise the address is loaded
4996 into register for CSE. We don't want to CSE this addresses,
4997 instead we CSE addresses from the GOT table, so skip this. */
4998 emit_insn (gen_movsi (reg, new));
5003 /* This symbol must be referenced via a load from the
5004 Global Offset Table (@GOT). */
5006 current_function_uses_pic_offset_table = 1;
5007 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5008 new = gen_rtx_CONST (Pmode, new);
5009 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5010 new = gen_rtx_MEM (Pmode, new);
5011 RTX_UNCHANGING_P (new) = 1;
5012 set_mem_alias_set (new, ix86_GOT_alias_set ());
5015 reg = gen_reg_rtx (Pmode);
5016 emit_move_insn (reg, new);
5022 if (GET_CODE (addr) == CONST)
5024 addr = XEXP (addr, 0);
5026 /* We must match stuff we generate before. Assume the only
5027 unspecs that can get here are ours. Not that we could do
5028 anything with them anyway... */
5029 if (GET_CODE (addr) == UNSPEC
5030 || (GET_CODE (addr) == PLUS
5031 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5033 if (GET_CODE (addr) != PLUS)
5036 if (GET_CODE (addr) == PLUS)
5038 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5040 /* Check first to see if this is a constant offset from a @GOTOFF
5041 symbol reference. */
5042 if (local_symbolic_operand (op0, Pmode)
5043 && GET_CODE (op1) == CONST_INT)
5047 current_function_uses_pic_offset_table = 1;
5048 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5050 new = gen_rtx_PLUS (Pmode, new, op1);
5051 new = gen_rtx_CONST (Pmode, new);
5052 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5056 emit_move_insn (reg, new);
5062 /* ??? We need to limit offsets here. */
5067 base = legitimize_pic_address (XEXP (addr, 0), reg);
5068 new = legitimize_pic_address (XEXP (addr, 1),
5069 base == reg ? NULL_RTX : reg);
5071 if (GET_CODE (new) == CONST_INT)
5072 new = plus_constant (base, INTVAL (new));
5075 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5077 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5078 new = XEXP (new, 1);
5080 new = gen_rtx_PLUS (Pmode, base, new);
5088 /* Try machine-dependent ways of modifying an illegitimate address
5089 to be legitimate. If we find one, return the new, valid address.
5090 This macro is used in only one place: `memory_address' in explow.c.
5092 OLDX is the address as it was before break_out_memory_refs was called.
5093 In some cases it is useful to look at this to decide what needs to be done.
5095 MODE and WIN are passed so that this macro can use
5096 GO_IF_LEGITIMATE_ADDRESS.
5098 It is always safe for this macro to do nothing. It exists to recognize
5099 opportunities to optimize the output.
5101 For the 80386, we handle X+REG by loading X into a register R and
5102 using R+REG. R will go in a general reg and indexing will be used.
5103 However, if REG is a broken-out memory address or multiplication,
5104 nothing needs to be done because REG can certainly go in a general reg.
5106 When -fpic is used, special handling is needed for symbolic references.
5107 See comments by legitimize_pic_address in i386.c for details. */
5110 legitimize_address (x, oldx, mode)
5112 register rtx oldx ATTRIBUTE_UNUSED;
5113 enum machine_mode mode;
5118 if (TARGET_DEBUG_ADDR)
5120 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5121 GET_MODE_NAME (mode));
5125 if (flag_pic && SYMBOLIC_CONST (x))
5126 return legitimize_pic_address (x, 0);
5128 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5129 if (GET_CODE (x) == ASHIFT
5130 && GET_CODE (XEXP (x, 1)) == CONST_INT
5131 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5134 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5135 GEN_INT (1 << log));
5138 if (GET_CODE (x) == PLUS)
5140 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5142 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5143 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5144 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5147 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5148 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5149 GEN_INT (1 << log));
5152 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5153 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5154 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5157 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5158 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5159 GEN_INT (1 << log));
5162 /* Put multiply first if it isn't already. */
5163 if (GET_CODE (XEXP (x, 1)) == MULT)
5165 rtx tmp = XEXP (x, 0);
5166 XEXP (x, 0) = XEXP (x, 1);
5171 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5172 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5173 created by virtual register instantiation, register elimination, and
5174 similar optimizations. */
5175 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5178 x = gen_rtx_PLUS (Pmode,
5179 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5180 XEXP (XEXP (x, 1), 0)),
5181 XEXP (XEXP (x, 1), 1));
5185 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5186 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5187 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5188 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5189 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5190 && CONSTANT_P (XEXP (x, 1)))
5193 rtx other = NULL_RTX;
5195 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5197 constant = XEXP (x, 1);
5198 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5200 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5202 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5203 other = XEXP (x, 1);
5211 x = gen_rtx_PLUS (Pmode,
5212 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5213 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5214 plus_constant (other, INTVAL (constant)));
5218 if (changed && legitimate_address_p (mode, x, FALSE))
5221 if (GET_CODE (XEXP (x, 0)) == MULT)
5224 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5227 if (GET_CODE (XEXP (x, 1)) == MULT)
5230 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5234 && GET_CODE (XEXP (x, 1)) == REG
5235 && GET_CODE (XEXP (x, 0)) == REG)
5238 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5241 x = legitimize_pic_address (x, 0);
5244 if (changed && legitimate_address_p (mode, x, FALSE))
5247 if (GET_CODE (XEXP (x, 0)) == REG)
5249 register rtx temp = gen_reg_rtx (Pmode);
5250 register rtx val = force_operand (XEXP (x, 1), temp);
5252 emit_move_insn (temp, val);
5258 else if (GET_CODE (XEXP (x, 1)) == REG)
5260 register rtx temp = gen_reg_rtx (Pmode);
5261 register rtx val = force_operand (XEXP (x, 0), temp);
5263 emit_move_insn (temp, val);
5273 /* Print an integer constant expression in assembler syntax. Addition
5274 and subtraction are the only arithmetic that may appear in these
5275 expressions. FILE is the stdio stream to write to, X is the rtx, and
5276 CODE is the operand print code from the output string. */
5279 output_pic_addr_const (file, x, code)
5286 switch (GET_CODE (x))
5296 assemble_name (file, XSTR (x, 0));
5297 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5298 fputs ("@PLT", file);
5305 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5306 assemble_name (asm_out_file, buf);
5310 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5314 /* This used to output parentheses around the expression,
5315 but that does not work on the 386 (either ATT or BSD assembler). */
5316 output_pic_addr_const (file, XEXP (x, 0), code);
5320 if (GET_MODE (x) == VOIDmode)
5322 /* We can use %d if the number is <32 bits and positive. */
5323 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5324 fprintf (file, "0x%lx%08lx",
5325 (unsigned long) CONST_DOUBLE_HIGH (x),
5326 (unsigned long) CONST_DOUBLE_LOW (x));
5328 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5331 /* We can't handle floating point constants;
5332 PRINT_OPERAND must handle them. */
5333 output_operand_lossage ("floating constant misused");
5337 /* Some assemblers need integer constants to appear first. */
5338 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5340 output_pic_addr_const (file, XEXP (x, 0), code);
5342 output_pic_addr_const (file, XEXP (x, 1), code);
5344 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5346 output_pic_addr_const (file, XEXP (x, 1), code);
5348 output_pic_addr_const (file, XEXP (x, 0), code);
5355 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5356 output_pic_addr_const (file, XEXP (x, 0), code);
5358 output_pic_addr_const (file, XEXP (x, 1), code);
5359 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5363 if (XVECLEN (x, 0) != 1)
5365 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5366 switch (XINT (x, 1))
5369 fputs ("@GOT", file);
5372 fputs ("@GOTOFF", file);
5375 fputs ("@PLT", file);
5377 case UNSPEC_GOTPCREL:
5378 fputs ("@GOTPCREL(%RIP)", file);
5381 output_operand_lossage ("invalid UNSPEC as operand");
5387 output_operand_lossage ("invalid expression as operand");
5391 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5392 We need to handle our special PIC relocations. */
5395 i386_dwarf_output_addr_const (file, x)
5400 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5404 fprintf (file, "%s", ASM_LONG);
5407 output_pic_addr_const (file, x, '\0');
5409 output_addr_const (file, x);
5413 /* In the name of slightly smaller debug output, and to cater to
5414 general assembler losage, recognize PIC+GOTOFF and turn it back
5415 into a direct symbol reference. */
5418 i386_simplify_dwarf_addr (orig_x)
5423 if (GET_CODE (x) == MEM)
5428 if (GET_CODE (x) != CONST
5429 || GET_CODE (XEXP (x, 0)) != UNSPEC
5430 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5431 || GET_CODE (orig_x) != MEM)
5433 return XVECEXP (XEXP (x, 0), 0, 0);
5436 if (GET_CODE (x) != PLUS
5437 || GET_CODE (XEXP (x, 1)) != CONST)
5440 if (GET_CODE (XEXP (x, 0)) == REG
5441 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5442 /* %ebx + GOT/GOTOFF */
5444 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5446 /* %ebx + %reg * scale + GOT/GOTOFF */
5448 if (GET_CODE (XEXP (y, 0)) == REG
5449 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5451 else if (GET_CODE (XEXP (y, 1)) == REG
5452 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5456 if (GET_CODE (y) != REG
5457 && GET_CODE (y) != MULT
5458 && GET_CODE (y) != ASHIFT)
5464 x = XEXP (XEXP (x, 1), 0);
5465 if (GET_CODE (x) == UNSPEC
5466 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5467 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5470 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5471 return XVECEXP (x, 0, 0);
5474 if (GET_CODE (x) == PLUS
5475 && GET_CODE (XEXP (x, 0)) == UNSPEC
5476 && GET_CODE (XEXP (x, 1)) == CONST_INT
5477 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5478 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5479 && GET_CODE (orig_x) != MEM)))
5481 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5483 return gen_rtx_PLUS (Pmode, y, x);
5491 put_condition_code (code, mode, reverse, fp, file)
5493 enum machine_mode mode;
5499 if (mode == CCFPmode || mode == CCFPUmode)
5501 enum rtx_code second_code, bypass_code;
5502 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5503 if (bypass_code != NIL || second_code != NIL)
5505 code = ix86_fp_compare_code_to_integer (code);
5509 code = reverse_condition (code);
5520 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5525 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5526 Those same assemblers have the same but opposite losage on cmov. */
5529 suffix = fp ? "nbe" : "a";
5532 if (mode == CCNOmode || mode == CCGOCmode)
5534 else if (mode == CCmode || mode == CCGCmode)
5545 if (mode == CCNOmode || mode == CCGOCmode)
5547 else if (mode == CCmode || mode == CCGCmode)
5556 suffix = fp ? "nb" : "ae";
5559 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5569 suffix = fp ? "u" : "p";
5572 suffix = fp ? "nu" : "np";
5577 fputs (suffix, file);
5581 print_reg (x, code, file)
5586 if (REGNO (x) == ARG_POINTER_REGNUM
5587 || REGNO (x) == FRAME_POINTER_REGNUM
5588 || REGNO (x) == FLAGS_REG
5589 || REGNO (x) == FPSR_REG)
5592 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
5595 if (code == 'w' || MMX_REG_P (x))
5597 else if (code == 'b')
5599 else if (code == 'k')
5601 else if (code == 'q')
5603 else if (code == 'y')
5605 else if (code == 'h')
5608 code = GET_MODE_SIZE (GET_MODE (x));
5610 /* Irritatingly, AMD extended registers use different naming convention
5611 from the normal registers. */
5612 if (REX_INT_REG_P (x))
5619 error ("extended registers have no high halves");
5622 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5625 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5628 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5631 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5634 error ("unsupported operand size for extended register");
5642 if (STACK_TOP_P (x))
5644 fputs ("st(0)", file);
5651 if (! ANY_FP_REG_P (x))
5652 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5656 fputs (hi_reg_name[REGNO (x)], file);
5659 fputs (qi_reg_name[REGNO (x)], file);
5662 fputs (qi_high_reg_name[REGNO (x)], file);
5670 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5671 C -- print opcode suffix for set/cmov insn.
5672 c -- like C, but print reversed condition
5673 F,f -- likewise, but for floating-point.
5674 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5676 R -- print the prefix for register names.
5677 z -- print the opcode suffix for the size of the current operand.
5678 * -- print a star (in certain assembler syntax)
5679 A -- print an absolute memory reference.
5680 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5681 s -- print a shift double count, followed by the assemblers argument
5683 b -- print the QImode name of the register for the indicated operand.
5684 %b0 would print %al if operands[0] is reg 0.
5685 w -- likewise, print the HImode name of the register.
5686 k -- likewise, print the SImode name of the register.
5687 q -- likewise, print the DImode name of the register.
5688 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5689 y -- print "st(0)" instead of "st" as a register.
5690 D -- print condition for SSE cmp instruction.
5691 P -- if PIC, print an @PLT suffix.
5692 X -- don't print any sort of PIC '@' suffix for a symbol.
5696 print_operand (file, x, code)
5706 if (ASSEMBLER_DIALECT == ASM_ATT)
5711 if (ASSEMBLER_DIALECT == ASM_ATT)
5713 else if (ASSEMBLER_DIALECT == ASM_INTEL)
5715 /* Intel syntax. For absolute addresses, registers should not
5716 be surrounded by braces. */
5717 if (GET_CODE (x) != REG)
5720 PRINT_OPERAND (file, x, 0);
5728 PRINT_OPERAND (file, x, 0);
5733 if (ASSEMBLER_DIALECT == ASM_ATT)
5738 if (ASSEMBLER_DIALECT == ASM_ATT)
5743 if (ASSEMBLER_DIALECT == ASM_ATT)
5748 if (ASSEMBLER_DIALECT == ASM_ATT)
5753 if (ASSEMBLER_DIALECT == ASM_ATT)
5758 if (ASSEMBLER_DIALECT == ASM_ATT)
5763 /* 387 opcodes don't get size suffixes if the operands are
5765 if (STACK_REG_P (x))
5768 /* Likewise if using Intel opcodes. */
5769 if (ASSEMBLER_DIALECT == ASM_INTEL)
5772 /* This is the size of op from size of operand. */
5773 switch (GET_MODE_SIZE (GET_MODE (x)))
5776 #ifdef HAVE_GAS_FILDS_FISTS
5782 if (GET_MODE (x) == SFmode)
5797 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5799 #ifdef GAS_MNEMONICS
5825 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5827 PRINT_OPERAND (file, x, 0);
5833 /* Little bit of braindamage here. The SSE compare instructions
5834 does use completely different names for the comparisons that the
5835 fp conditional moves. */
5836 switch (GET_CODE (x))
5851 fputs ("unord", file);
5855 fputs ("neq", file);
5859 fputs ("nlt", file);
5863 fputs ("nle", file);
5866 fputs ("ord", file);
5874 #ifdef CMOV_SUN_AS_SYNTAX
5875 if (ASSEMBLER_DIALECT == ASM_ATT)
5877 switch (GET_MODE (x))
5879 case HImode: putc ('w', file); break;
5881 case SFmode: putc ('l', file); break;
5883 case DFmode: putc ('q', file); break;
5891 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5894 #ifdef CMOV_SUN_AS_SYNTAX
5895 if (ASSEMBLER_DIALECT == ASM_ATT)
5898 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5901 /* Like above, but reverse condition */
5903 /* Check to see if argument to %c is really a constant
5904 and not a condition code which needs to be reversed. */
5905 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5907 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5910 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5913 #ifdef CMOV_SUN_AS_SYNTAX
5914 if (ASSEMBLER_DIALECT == ASM_ATT)
5917 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5923 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5926 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5929 int pred_val = INTVAL (XEXP (x, 0));
5931 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5932 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5934 int taken = pred_val > REG_BR_PROB_BASE / 2;
5935 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5937 /* Emit hints only in the case default branch prediction
5938 heruistics would fail. */
5939 if (taken != cputaken)
5941 /* We use 3e (DS) prefix for taken branches and
5942 2e (CS) prefix for not taken branches. */
5944 fputs ("ds ; ", file);
5946 fputs ("cs ; ", file);
5953 output_operand_lossage ("invalid operand code `%c'", code);
5957 if (GET_CODE (x) == REG)
5959 PRINT_REG (x, code, file);
5962 else if (GET_CODE (x) == MEM)
5964 /* No `byte ptr' prefix for call instructions. */
5965 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
5968 switch (GET_MODE_SIZE (GET_MODE (x)))
5970 case 1: size = "BYTE"; break;
5971 case 2: size = "WORD"; break;
5972 case 4: size = "DWORD"; break;
5973 case 8: size = "QWORD"; break;
5974 case 12: size = "XWORD"; break;
5975 case 16: size = "XMMWORD"; break;
5980 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5983 else if (code == 'w')
5985 else if (code == 'k')
5989 fputs (" PTR ", file);
5993 if (flag_pic && CONSTANT_ADDRESS_P (x))
5994 output_pic_addr_const (file, x, code);
5995 /* Avoid (%rip) for call operands. */
5996 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5997 && GET_CODE (x) != CONST_INT)
5998 output_addr_const (file, x);
5999 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6000 output_operand_lossage ("invalid constraints for operand");
6005 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6010 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6011 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6013 if (ASSEMBLER_DIALECT == ASM_ATT)
6015 fprintf (file, "0x%lx", l);
6018 /* These float cases don't actually occur as immediate operands. */
6019 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6024 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6025 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6026 fprintf (file, "%s", dstr);
6029 else if (GET_CODE (x) == CONST_DOUBLE
6030 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6035 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6036 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6037 fprintf (file, "%s", dstr);
6043 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6045 if (ASSEMBLER_DIALECT == ASM_ATT)
6048 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6049 || GET_CODE (x) == LABEL_REF)
6051 if (ASSEMBLER_DIALECT == ASM_ATT)
6054 fputs ("OFFSET FLAT:", file);
6057 if (GET_CODE (x) == CONST_INT)
6058 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6060 output_pic_addr_const (file, x, code);
6062 output_addr_const (file, x);
6066 /* Print a memory operand whose address is ADDR. */
6069 print_operand_address (file, addr)
6073 struct ix86_address parts;
6074 rtx base, index, disp;
6077 if (! ix86_decompose_address (addr, &parts))
6081 index = parts.index;
6083 scale = parts.scale;
6085 if (!base && !index)
6087 /* Displacement only requires special attention. */
6089 if (GET_CODE (disp) == CONST_INT)
6091 if (ASSEMBLER_DIALECT == ASM_INTEL)
6093 if (USER_LABEL_PREFIX[0] == 0)
6095 fputs ("ds:", file);
6097 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6100 output_pic_addr_const (file, addr, 0);
6102 output_addr_const (file, addr);
6104 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6105 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6106 fputs ("(%rip)", file);
6110 if (ASSEMBLER_DIALECT == ASM_ATT)
6115 output_pic_addr_const (file, disp, 0);
6116 else if (GET_CODE (disp) == LABEL_REF)
6117 output_asm_label (disp);
6119 output_addr_const (file, disp);
6124 PRINT_REG (base, 0, file);
6128 PRINT_REG (index, 0, file);
6130 fprintf (file, ",%d", scale);
6136 rtx offset = NULL_RTX;
6140 /* Pull out the offset of a symbol; print any symbol itself. */
6141 if (GET_CODE (disp) == CONST
6142 && GET_CODE (XEXP (disp, 0)) == PLUS
6143 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6145 offset = XEXP (XEXP (disp, 0), 1);
6146 disp = gen_rtx_CONST (VOIDmode,
6147 XEXP (XEXP (disp, 0), 0));
6151 output_pic_addr_const (file, disp, 0);
6152 else if (GET_CODE (disp) == LABEL_REF)
6153 output_asm_label (disp);
6154 else if (GET_CODE (disp) == CONST_INT)
6157 output_addr_const (file, disp);
6163 PRINT_REG (base, 0, file);
6166 if (INTVAL (offset) >= 0)
6168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6172 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6179 PRINT_REG (index, 0, file);
6181 fprintf (file, "*%d", scale);
6188 /* Split one or more DImode RTL references into pairs of SImode
6189 references. The RTL can be REG, offsettable MEM, integer constant, or
6190 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6191 split and "num" is its length. lo_half and hi_half are output arrays
6192 that parallel "operands". */
6195 split_di (operands, num, lo_half, hi_half)
6198 rtx lo_half[], hi_half[];
6202 rtx op = operands[num];
6204 /* simplify_subreg refuse to split volatile memory addresses,
6205 but we still have to handle it. */
6206 if (GET_CODE (op) == MEM)
6208 lo_half[num] = adjust_address (op, SImode, 0);
6209 hi_half[num] = adjust_address (op, SImode, 4);
6213 lo_half[num] = simplify_gen_subreg (SImode, op,
6214 GET_MODE (op) == VOIDmode
6215 ? DImode : GET_MODE (op), 0);
6216 hi_half[num] = simplify_gen_subreg (SImode, op,
6217 GET_MODE (op) == VOIDmode
6218 ? DImode : GET_MODE (op), 4);
6222 /* Split one or more TImode RTL references into pairs of SImode
6223 references. The RTL can be REG, offsettable MEM, integer constant, or
6224 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6225 split and "num" is its length. lo_half and hi_half are output arrays
6226 that parallel "operands". */
6229 split_ti (operands, num, lo_half, hi_half)
6232 rtx lo_half[], hi_half[];
6236 rtx op = operands[num];
6238 /* simplify_subreg refuse to split volatile memory addresses, but we
6239 still have to handle it. */
6240 if (GET_CODE (op) == MEM)
6242 lo_half[num] = adjust_address (op, DImode, 0);
6243 hi_half[num] = adjust_address (op, DImode, 8);
6247 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6248 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6253 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6254 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6255 is the expression of the binary operation. The output may either be
6256 emitted here, or returned to the caller, like all output_* functions.
6258 There is no guarantee that the operands are the same mode, as they
6259 might be within FLOAT or FLOAT_EXTEND expressions. */
6261 #ifndef SYSV386_COMPAT
6262 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6263 wants to fix the assemblers because that causes incompatibility
6264 with gcc. No-one wants to fix gcc because that causes
6265 incompatibility with assemblers... You can use the option of
6266 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6267 #define SYSV386_COMPAT 1
6271 output_387_binary_op (insn, operands)
6275 static char buf[30];
6278 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6280 #ifdef ENABLE_CHECKING
6281 /* Even if we do not want to check the inputs, this documents input
6282 constraints. Which helps in understanding the following code. */
6283 if (STACK_REG_P (operands[0])
6284 && ((REG_P (operands[1])
6285 && REGNO (operands[0]) == REGNO (operands[1])
6286 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6287 || (REG_P (operands[2])
6288 && REGNO (operands[0]) == REGNO (operands[2])
6289 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6290 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6296 switch (GET_CODE (operands[3]))
6299 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6300 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6308 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6309 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6317 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6318 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6326 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6327 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6341 if (GET_MODE (operands[0]) == SFmode)
6342 strcat (buf, "ss\t{%2, %0|%0, %2}");
6344 strcat (buf, "sd\t{%2, %0|%0, %2}");
6349 switch (GET_CODE (operands[3]))
6353 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6355 rtx temp = operands[2];
6356 operands[2] = operands[1];
6360 /* know operands[0] == operands[1]. */
6362 if (GET_CODE (operands[2]) == MEM)
6368 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6370 if (STACK_TOP_P (operands[0]))
6371 /* How is it that we are storing to a dead operand[2]?
6372 Well, presumably operands[1] is dead too. We can't
6373 store the result to st(0) as st(0) gets popped on this
6374 instruction. Instead store to operands[2] (which I
6375 think has to be st(1)). st(1) will be popped later.
6376 gcc <= 2.8.1 didn't have this check and generated
6377 assembly code that the Unixware assembler rejected. */
6378 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6380 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6384 if (STACK_TOP_P (operands[0]))
6385 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6387 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6392 if (GET_CODE (operands[1]) == MEM)
6398 if (GET_CODE (operands[2]) == MEM)
6404 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6407 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6408 derived assemblers, confusingly reverse the direction of
6409 the operation for fsub{r} and fdiv{r} when the
6410 destination register is not st(0). The Intel assembler
6411 doesn't have this brain damage. Read !SYSV386_COMPAT to
6412 figure out what the hardware really does. */
6413 if (STACK_TOP_P (operands[0]))
6414 p = "{p\t%0, %2|rp\t%2, %0}";
6416 p = "{rp\t%2, %0|p\t%0, %2}";
6418 if (STACK_TOP_P (operands[0]))
6419 /* As above for fmul/fadd, we can't store to st(0). */
6420 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6422 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6427 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6430 if (STACK_TOP_P (operands[0]))
6431 p = "{rp\t%0, %1|p\t%1, %0}";
6433 p = "{p\t%1, %0|rp\t%0, %1}";
6435 if (STACK_TOP_P (operands[0]))
6436 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6438 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6443 if (STACK_TOP_P (operands[0]))
6445 if (STACK_TOP_P (operands[1]))
6446 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6448 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6451 else if (STACK_TOP_P (operands[1]))
6454 p = "{\t%1, %0|r\t%0, %1}";
6456 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6462 p = "{r\t%2, %0|\t%0, %2}";
6464 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6477 /* Output code to initialize control word copies used by
6478 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6479 is set to control word rounding downwards. */
6481 emit_i387_cw_initialization (normal, round_down)
6482 rtx normal, round_down;
6484 rtx reg = gen_reg_rtx (HImode);
6486 emit_insn (gen_x86_fnstcw_1 (normal));
6487 emit_move_insn (reg, normal);
6488 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6490 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6492 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6493 emit_move_insn (round_down, reg);
6496 /* Output code for INSN to convert a float to a signed int. OPERANDS
6497 are the insn operands. The output may be [HSD]Imode and the input
6498 operand may be [SDX]Fmode. */
6501 output_fix_trunc (insn, operands)
6505 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6506 int dimode_p = GET_MODE (operands[0]) == DImode;
6508 /* Jump through a hoop or two for DImode, since the hardware has no
6509 non-popping instruction. We used to do this a different way, but
6510 that was somewhat fragile and broke with post-reload splitters. */
6511 if (dimode_p && !stack_top_dies)
6512 output_asm_insn ("fld\t%y1", operands);
6514 if (!STACK_TOP_P (operands[1]))
6517 if (GET_CODE (operands[0]) != MEM)
6520 output_asm_insn ("fldcw\t%3", operands);
6521 if (stack_top_dies || dimode_p)
6522 output_asm_insn ("fistp%z0\t%0", operands);
6524 output_asm_insn ("fist%z0\t%0", operands);
6525 output_asm_insn ("fldcw\t%2", operands);
6530 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6531 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6532 when fucom should be used. */
6535 output_fp_compare (insn, operands, eflags_p, unordered_p)
6538 int eflags_p, unordered_p;
6541 rtx cmp_op0 = operands[0];
6542 rtx cmp_op1 = operands[1];
6543 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6548 cmp_op1 = operands[2];
6552 if (GET_MODE (operands[0]) == SFmode)
6554 return "ucomiss\t{%1, %0|%0, %1}";
6556 return "comiss\t{%1, %0|%0, %y}";
6559 return "ucomisd\t{%1, %0|%0, %1}";
6561 return "comisd\t{%1, %0|%0, %y}";
6564 if (! STACK_TOP_P (cmp_op0))
6567 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6569 if (STACK_REG_P (cmp_op1)
6571 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6572 && REGNO (cmp_op1) != FIRST_STACK_REG)
6574 /* If both the top of the 387 stack dies, and the other operand
6575 is also a stack register that dies, then this must be a
6576 `fcompp' float compare */
6580 /* There is no double popping fcomi variant. Fortunately,
6581 eflags is immune from the fstp's cc clobbering. */
6583 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6585 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6593 return "fucompp\n\tfnstsw\t%0";
6595 return "fcompp\n\tfnstsw\t%0";
6608 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6610 static const char * const alt[24] =
6622 "fcomi\t{%y1, %0|%0, %y1}",
6623 "fcomip\t{%y1, %0|%0, %y1}",
6624 "fucomi\t{%y1, %0|%0, %y1}",
6625 "fucomip\t{%y1, %0|%0, %y1}",
6632 "fcom%z2\t%y2\n\tfnstsw\t%0",
6633 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6634 "fucom%z2\t%y2\n\tfnstsw\t%0",
6635 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6637 "ficom%z2\t%y2\n\tfnstsw\t%0",
6638 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6646 mask = eflags_p << 3;
6647 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6648 mask |= unordered_p << 1;
6649 mask |= stack_top_dies;
6662 ix86_output_addr_vec_elt (file, value)
6666 const char *directive = ASM_LONG;
6671 directive = ASM_QUAD;
6677 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6681 ix86_output_addr_diff_elt (file, value, rel)
6686 fprintf (file, "%s%s%d-%s%d\n",
6687 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6688 else if (HAVE_AS_GOTOFF_IN_DATA)
6689 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6691 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6692 ASM_LONG, LPREFIX, value);
6695 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6699 ix86_expand_clear (dest)
6704 /* We play register width games, which are only valid after reload. */
6705 if (!reload_completed)
6708 /* Avoid HImode and its attendant prefix byte. */
6709 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6710 dest = gen_rtx_REG (SImode, REGNO (dest));
6712 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6714 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6715 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6717 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6718 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6725 ix86_expand_move (mode, operands)
6726 enum machine_mode mode;
6729 int strict = (reload_in_progress || reload_completed);
6732 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6734 /* Emit insns to move operands[1] into operands[0]. */
6736 if (GET_CODE (operands[0]) == MEM)
6737 operands[1] = force_reg (Pmode, operands[1]);
6740 rtx temp = operands[0];
6741 if (GET_CODE (temp) != REG)
6742 temp = gen_reg_rtx (Pmode);
6743 temp = legitimize_pic_address (operands[1], temp);
6744 if (temp == operands[0])
6751 if (GET_CODE (operands[0]) == MEM
6752 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6753 || !push_operand (operands[0], mode))
6754 && GET_CODE (operands[1]) == MEM)
6755 operands[1] = force_reg (mode, operands[1]);
6757 if (push_operand (operands[0], mode)
6758 && ! general_no_elim_operand (operands[1], mode))
6759 operands[1] = copy_to_mode_reg (mode, operands[1]);
6761 /* Force large constants in 64bit compilation into register
6762 to get them CSEed. */
6763 if (TARGET_64BIT && mode == DImode
6764 && immediate_operand (operands[1], mode)
6765 && !x86_64_zero_extended_value (operands[1])
6766 && !register_operand (operands[0], mode)
6767 && optimize && !reload_completed && !reload_in_progress)
6768 operands[1] = copy_to_mode_reg (mode, operands[1]);
6770 if (FLOAT_MODE_P (mode))
6772 /* If we are loading a floating point constant to a register,
6773 force the value to memory now, since we'll get better code
6774 out the back end. */
6778 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6779 && register_operand (operands[0], mode))
6780 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6784 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6790 ix86_expand_vector_move (mode, operands)
6791 enum machine_mode mode;
6794 /* Force constants other than zero into memory. We do not know how
6795 the instructions used to build constants modify the upper 64 bits
6796 of the register, once we have that information we may be able
6797 to handle some of them more efficiently. */
6798 if ((reload_in_progress | reload_completed) == 0
6799 && register_operand (operands[0], mode)
6800 && CONSTANT_P (operands[1]))
6802 rtx addr = gen_reg_rtx (Pmode);
6803 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6804 operands[1] = gen_rtx_MEM (mode, addr);
6807 /* Make operand1 a register if it isn't already. */
6808 if ((reload_in_progress | reload_completed) == 0
6809 && !register_operand (operands[0], mode)
6810 && !register_operand (operands[1], mode)
6811 && operands[1] != CONST0_RTX (mode))
6813 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
6814 emit_move_insn (operands[0], temp);
6818 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6821 /* Attempt to expand a binary operator. Make the expansion closer to the
6822 actual machine, then just general_operand, which will allow 3 separate
6823 memory references (one output, two input) in a single insn. */
6826 ix86_expand_binary_operator (code, mode, operands)
6828 enum machine_mode mode;
6831 int matching_memory;
6832 rtx src1, src2, dst, op, clob;
6838 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6839 if (GET_RTX_CLASS (code) == 'c'
6840 && (rtx_equal_p (dst, src2)
6841 || immediate_operand (src1, mode)))
6848 /* If the destination is memory, and we do not have matching source
6849 operands, do things in registers. */
6850 matching_memory = 0;
6851 if (GET_CODE (dst) == MEM)
6853 if (rtx_equal_p (dst, src1))
6854 matching_memory = 1;
6855 else if (GET_RTX_CLASS (code) == 'c'
6856 && rtx_equal_p (dst, src2))
6857 matching_memory = 2;
6859 dst = gen_reg_rtx (mode);
6862 /* Both source operands cannot be in memory. */
6863 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6865 if (matching_memory != 2)
6866 src2 = force_reg (mode, src2);
6868 src1 = force_reg (mode, src1);
6871 /* If the operation is not commutable, source 1 cannot be a constant
6872 or non-matching memory. */
6873 if ((CONSTANT_P (src1)
6874 || (!matching_memory && GET_CODE (src1) == MEM))
6875 && GET_RTX_CLASS (code) != 'c')
6876 src1 = force_reg (mode, src1);
6878 /* If optimizing, copy to regs to improve CSE */
6879 if (optimize && ! no_new_pseudos)
6881 if (GET_CODE (dst) == MEM)
6882 dst = gen_reg_rtx (mode);
6883 if (GET_CODE (src1) == MEM)
6884 src1 = force_reg (mode, src1);
6885 if (GET_CODE (src2) == MEM)
6886 src2 = force_reg (mode, src2);
6889 /* Emit the instruction. */
6891 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6892 if (reload_in_progress)
6894 /* Reload doesn't know about the flags register, and doesn't know that
6895 it doesn't want to clobber it. We can only do this with PLUS. */
6902 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6903 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6906 /* Fix up the destination if needed. */
6907 if (dst != operands[0])
6908 emit_move_insn (operands[0], dst);
6911 /* Return TRUE or FALSE depending on whether the binary operator meets the
6912 appropriate constraints. */
6915 ix86_binary_operator_ok (code, mode, operands)
6917 enum machine_mode mode ATTRIBUTE_UNUSED;
6920 /* Both source operands cannot be in memory. */
6921 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6923 /* If the operation is not commutable, source 1 cannot be a constant. */
6924 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6926 /* If the destination is memory, we must have a matching source operand. */
6927 if (GET_CODE (operands[0]) == MEM
6928 && ! (rtx_equal_p (operands[0], operands[1])
6929 || (GET_RTX_CLASS (code) == 'c'
6930 && rtx_equal_p (operands[0], operands[2]))))
6932 /* If the operation is not commutable and the source 1 is memory, we must
6933 have a matching destination. */
6934 if (GET_CODE (operands[1]) == MEM
6935 && GET_RTX_CLASS (code) != 'c'
6936 && ! rtx_equal_p (operands[0], operands[1]))
6941 /* Attempt to expand a unary operator. Make the expansion closer to the
6942 actual machine, then just general_operand, which will allow 2 separate
6943 memory references (one output, one input) in a single insn. */
6946 ix86_expand_unary_operator (code, mode, operands)
6948 enum machine_mode mode;
6951 int matching_memory;
6952 rtx src, dst, op, clob;
6957 /* If the destination is memory, and we do not have matching source
6958 operands, do things in registers. */
6959 matching_memory = 0;
6960 if (GET_CODE (dst) == MEM)
6962 if (rtx_equal_p (dst, src))
6963 matching_memory = 1;
6965 dst = gen_reg_rtx (mode);
6968 /* When source operand is memory, destination must match. */
6969 if (!matching_memory && GET_CODE (src) == MEM)
6970 src = force_reg (mode, src);
6972 /* If optimizing, copy to regs to improve CSE */
6973 if (optimize && ! no_new_pseudos)
6975 if (GET_CODE (dst) == MEM)
6976 dst = gen_reg_rtx (mode);
6977 if (GET_CODE (src) == MEM)
6978 src = force_reg (mode, src);
6981 /* Emit the instruction. */
6983 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6984 if (reload_in_progress || code == NOT)
6986 /* Reload doesn't know about the flags register, and doesn't know that
6987 it doesn't want to clobber it. */
6994 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6995 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6998 /* Fix up the destination if needed. */
6999 if (dst != operands[0])
7000 emit_move_insn (operands[0], dst);
7003 /* Return TRUE or FALSE depending on whether the unary operator meets the
7004 appropriate constraints. */
7007 ix86_unary_operator_ok (code, mode, operands)
7008 enum rtx_code code ATTRIBUTE_UNUSED;
7009 enum machine_mode mode ATTRIBUTE_UNUSED;
7010 rtx operands[2] ATTRIBUTE_UNUSED;
7012 /* If one of operands is memory, source and destination must match. */
7013 if ((GET_CODE (operands[0]) == MEM
7014 || GET_CODE (operands[1]) == MEM)
7015 && ! rtx_equal_p (operands[0], operands[1]))
7020 /* Return TRUE or FALSE depending on whether the first SET in INSN
7021 has source and destination with matching CC modes, and that the
7022 CC mode is at least as constrained as REQ_MODE. */
7025 ix86_match_ccmode (insn, req_mode)
7027 enum machine_mode req_mode;
7030 enum machine_mode set_mode;
7032 set = PATTERN (insn);
7033 if (GET_CODE (set) == PARALLEL)
7034 set = XVECEXP (set, 0, 0);
7035 if (GET_CODE (set) != SET)
7037 if (GET_CODE (SET_SRC (set)) != COMPARE)
7040 set_mode = GET_MODE (SET_DEST (set));
7044 if (req_mode != CCNOmode
7045 && (req_mode != CCmode
7046 || XEXP (SET_SRC (set), 1) != const0_rtx))
7050 if (req_mode == CCGCmode)
7054 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7058 if (req_mode == CCZmode)
7068 return (GET_MODE (SET_SRC (set)) == set_mode);
7071 /* Generate insn patterns to do an integer compare of OPERANDS. */
7074 ix86_expand_int_compare (code, op0, op1)
7078 enum machine_mode cmpmode;
7081 cmpmode = SELECT_CC_MODE (code, op0, op1);
7082 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7084 /* This is very simple, but making the interface the same as in the
7085 FP case makes the rest of the code easier. */
7086 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7087 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7089 /* Return the test that should be put into the flags user, i.e.
7090 the bcc, scc, or cmov instruction. */
7091 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7094 /* Figure out whether to use ordered or unordered fp comparisons.
7095 Return the appropriate mode to use. */
7098 ix86_fp_compare_mode (code)
7099 enum rtx_code code ATTRIBUTE_UNUSED;
7101 /* ??? In order to make all comparisons reversible, we do all comparisons
7102 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7103 all forms trapping and nontrapping comparisons, we can make inequality
7104 comparisons trapping again, since it results in better code when using
7105 FCOM based compares. */
7106 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7110 ix86_cc_mode (code, op0, op1)
7114 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7115 return ix86_fp_compare_mode (code);
7118 /* Only zero flag is needed. */
7120 case NE: /* ZF!=0 */
7122 /* Codes needing carry flag. */
7123 case GEU: /* CF=0 */
7124 case GTU: /* CF=0 & ZF=0 */
7125 case LTU: /* CF=1 */
7126 case LEU: /* CF=1 | ZF=1 */
7128 /* Codes possibly doable only with sign flag when
7129 comparing against zero. */
7130 case GE: /* SF=OF or SF=0 */
7131 case LT: /* SF<>OF or SF=1 */
7132 if (op1 == const0_rtx)
7135 /* For other cases Carry flag is not required. */
7137 /* Codes doable only with sign flag when comparing
7138 against zero, but we miss jump instruction for it
7139 so we need to use relational tests agains overflow
7140 that thus needs to be zero. */
7141 case GT: /* ZF=0 & SF=OF */
7142 case LE: /* ZF=1 | SF<>OF */
7143 if (op1 == const0_rtx)
7147 /* strcmp pattern do (use flags) and combine may ask us for proper
7156 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7159 ix86_use_fcomi_compare (code)
7160 enum rtx_code code ATTRIBUTE_UNUSED;
7162 enum rtx_code swapped_code = swap_condition (code);
7163 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7164 || (ix86_fp_comparison_cost (swapped_code)
7165 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7168 /* Swap, force into registers, or otherwise massage the two operands
7169 to a fp comparison. The operands are updated in place; the new
7170 comparsion code is returned. */
7172 static enum rtx_code
7173 ix86_prepare_fp_compare_args (code, pop0, pop1)
7177 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7178 rtx op0 = *pop0, op1 = *pop1;
7179 enum machine_mode op_mode = GET_MODE (op0);
7180 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7182 /* All of the unordered compare instructions only work on registers.
7183 The same is true of the XFmode compare instructions. The same is
7184 true of the fcomi compare instructions. */
7187 && (fpcmp_mode == CCFPUmode
7188 || op_mode == XFmode
7189 || op_mode == TFmode
7190 || ix86_use_fcomi_compare (code)))
7192 op0 = force_reg (op_mode, op0);
7193 op1 = force_reg (op_mode, op1);
7197 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7198 things around if they appear profitable, otherwise force op0
7201 if (standard_80387_constant_p (op0) == 0
7202 || (GET_CODE (op0) == MEM
7203 && ! (standard_80387_constant_p (op1) == 0
7204 || GET_CODE (op1) == MEM)))
7207 tmp = op0, op0 = op1, op1 = tmp;
7208 code = swap_condition (code);
7211 if (GET_CODE (op0) != REG)
7212 op0 = force_reg (op_mode, op0);
7214 if (CONSTANT_P (op1))
7216 if (standard_80387_constant_p (op1))
7217 op1 = force_reg (op_mode, op1);
7219 op1 = validize_mem (force_const_mem (op_mode, op1));
7223 /* Try to rearrange the comparison to make it cheaper. */
7224 if (ix86_fp_comparison_cost (code)
7225 > ix86_fp_comparison_cost (swap_condition (code))
7226 && (GET_CODE (op1) == REG || !no_new_pseudos))
7229 tmp = op0, op0 = op1, op1 = tmp;
7230 code = swap_condition (code);
7231 if (GET_CODE (op0) != REG)
7232 op0 = force_reg (op_mode, op0);
7240 /* Convert comparison codes we use to represent FP comparison to integer
7241 code that will result in proper branch. Return UNKNOWN if no such code
7243 static enum rtx_code
7244 ix86_fp_compare_code_to_integer (code)
7274 /* Split comparison code CODE into comparisons we can do using branch
7275 instructions. BYPASS_CODE is comparison code for branch that will
7276 branch around FIRST_CODE and SECOND_CODE. If some of branches
7277 is not required, set value to NIL.
7278 We never require more than two branches. */
7280 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7281 enum rtx_code code, *bypass_code, *first_code, *second_code;
7287 /* The fcomi comparison sets flags as follows:
7297 case GT: /* GTU - CF=0 & ZF=0 */
7298 case GE: /* GEU - CF=0 */
7299 case ORDERED: /* PF=0 */
7300 case UNORDERED: /* PF=1 */
7301 case UNEQ: /* EQ - ZF=1 */
7302 case UNLT: /* LTU - CF=1 */
7303 case UNLE: /* LEU - CF=1 | ZF=1 */
7304 case LTGT: /* EQ - ZF=0 */
7306 case LT: /* LTU - CF=1 - fails on unordered */
7308 *bypass_code = UNORDERED;
7310 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7312 *bypass_code = UNORDERED;
7314 case EQ: /* EQ - ZF=1 - fails on unordered */
7316 *bypass_code = UNORDERED;
7318 case NE: /* NE - ZF=0 - fails on unordered */
7320 *second_code = UNORDERED;
7322 case UNGE: /* GEU - CF=0 - fails on unordered */
7324 *second_code = UNORDERED;
7326 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7328 *second_code = UNORDERED;
7333 if (!TARGET_IEEE_FP)
7340 /* Return cost of comparison done fcom + arithmetics operations on AX.
7341 All following functions do use number of instructions as an cost metrics.
7342 In future this should be tweaked to compute bytes for optimize_size and
7343 take into account performance of various instructions on various CPUs. */
7345 ix86_fp_comparison_arithmetics_cost (code)
7348 if (!TARGET_IEEE_FP)
7350 /* The cost of code output by ix86_expand_fp_compare. */
7378 /* Return cost of comparison done using fcomi operation.
7379 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7381 ix86_fp_comparison_fcomi_cost (code)
7384 enum rtx_code bypass_code, first_code, second_code;
7385 /* Return arbitarily high cost when instruction is not supported - this
7386 prevents gcc from using it. */
7389 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7390 return (bypass_code != NIL || second_code != NIL) + 2;
7393 /* Return cost of comparison done using sahf operation.
7394 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7396 ix86_fp_comparison_sahf_cost (code)
7399 enum rtx_code bypass_code, first_code, second_code;
7400 /* Return arbitarily high cost when instruction is not preferred - this
7401 avoids gcc from using it. */
7402 if (!TARGET_USE_SAHF && !optimize_size)
7404 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7405 return (bypass_code != NIL || second_code != NIL) + 3;
7408 /* Compute cost of the comparison done using any method.
7409 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7411 ix86_fp_comparison_cost (code)
7414 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7417 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7418 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7420 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7421 if (min > sahf_cost)
7423 if (min > fcomi_cost)
7428 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7431 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7433 rtx op0, op1, scratch;
7437 enum machine_mode fpcmp_mode, intcmp_mode;
7439 int cost = ix86_fp_comparison_cost (code);
7440 enum rtx_code bypass_code, first_code, second_code;
7442 fpcmp_mode = ix86_fp_compare_mode (code);
7443 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7446 *second_test = NULL_RTX;
7448 *bypass_test = NULL_RTX;
7450 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7452 /* Do fcomi/sahf based test when profitable. */
7453 if ((bypass_code == NIL || bypass_test)
7454 && (second_code == NIL || second_test)
7455 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7459 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7460 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7466 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7467 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
7469 scratch = gen_reg_rtx (HImode);
7470 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7471 emit_insn (gen_x86_sahf_1 (scratch));
7474 /* The FP codes work out to act like unsigned. */
7475 intcmp_mode = fpcmp_mode;
7477 if (bypass_code != NIL)
7478 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7479 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7481 if (second_code != NIL)
7482 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7483 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7488 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7489 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7490 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
7492 scratch = gen_reg_rtx (HImode);
7493 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7495 /* In the unordered case, we have to check C2 for NaN's, which
7496 doesn't happen to work out to anything nice combination-wise.
7497 So do some bit twiddling on the value we've got in AH to come
7498 up with an appropriate set of condition codes. */
7500 intcmp_mode = CCNOmode;
7505 if (code == GT || !TARGET_IEEE_FP)
7507 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7512 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7513 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7514 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7515 intcmp_mode = CCmode;
7521 if (code == LT && TARGET_IEEE_FP)
7523 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7524 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7525 intcmp_mode = CCmode;
7530 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7536 if (code == GE || !TARGET_IEEE_FP)
7538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7543 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7544 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7551 if (code == LE && TARGET_IEEE_FP)
7553 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7554 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7555 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7556 intcmp_mode = CCmode;
7561 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7567 if (code == EQ && TARGET_IEEE_FP)
7569 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7570 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7571 intcmp_mode = CCmode;
7576 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7583 if (code == NE && TARGET_IEEE_FP)
7585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7586 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7592 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7598 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7602 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7611 /* Return the test that should be put into the flags user, i.e.
7612 the bcc, scc, or cmov instruction. */
7613 return gen_rtx_fmt_ee (code, VOIDmode,
7614 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7619 ix86_expand_compare (code, second_test, bypass_test)
7621 rtx *second_test, *bypass_test;
7624 op0 = ix86_compare_op0;
7625 op1 = ix86_compare_op1;
7628 *second_test = NULL_RTX;
7630 *bypass_test = NULL_RTX;
7632 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7633 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7634 second_test, bypass_test);
7636 ret = ix86_expand_int_compare (code, op0, op1);
7641 /* Return true if the CODE will result in nontrivial jump sequence. */
7643 ix86_fp_jump_nontrivial_p (code)
7646 enum rtx_code bypass_code, first_code, second_code;
7649 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7650 return bypass_code != NIL || second_code != NIL;
7654 ix86_expand_branch (code, label)
7660 switch (GET_MODE (ix86_compare_op0))
7666 tmp = ix86_expand_compare (code, NULL, NULL);
7667 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7668 gen_rtx_LABEL_REF (VOIDmode, label),
7670 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7680 enum rtx_code bypass_code, first_code, second_code;
7682 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7685 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7687 /* Check whether we will use the natural sequence with one jump. If
7688 so, we can expand jump early. Otherwise delay expansion by
7689 creating compound insn to not confuse optimizers. */
7690 if (bypass_code == NIL && second_code == NIL
7693 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7694 gen_rtx_LABEL_REF (VOIDmode, label),
7699 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7700 ix86_compare_op0, ix86_compare_op1);
7701 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7702 gen_rtx_LABEL_REF (VOIDmode, label),
7704 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7706 use_fcomi = ix86_use_fcomi_compare (code);
7707 vec = rtvec_alloc (3 + !use_fcomi);
7708 RTVEC_ELT (vec, 0) = tmp;
7710 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7712 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7715 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7717 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7725 /* Expand DImode branch into multiple compare+branch. */
7727 rtx lo[2], hi[2], label2;
7728 enum rtx_code code1, code2, code3;
7730 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7732 tmp = ix86_compare_op0;
7733 ix86_compare_op0 = ix86_compare_op1;
7734 ix86_compare_op1 = tmp;
7735 code = swap_condition (code);
7737 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7738 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7740 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7741 avoid two branches. This costs one extra insn, so disable when
7742 optimizing for size. */
7744 if ((code == EQ || code == NE)
7746 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7751 if (hi[1] != const0_rtx)
7752 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7753 NULL_RTX, 0, OPTAB_WIDEN);
7756 if (lo[1] != const0_rtx)
7757 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7758 NULL_RTX, 0, OPTAB_WIDEN);
7760 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7761 NULL_RTX, 0, OPTAB_WIDEN);
7763 ix86_compare_op0 = tmp;
7764 ix86_compare_op1 = const0_rtx;
7765 ix86_expand_branch (code, label);
7769 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7770 op1 is a constant and the low word is zero, then we can just
7771 examine the high word. */
7773 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7776 case LT: case LTU: case GE: case GEU:
7777 ix86_compare_op0 = hi[0];
7778 ix86_compare_op1 = hi[1];
7779 ix86_expand_branch (code, label);
7785 /* Otherwise, we need two or three jumps. */
7787 label2 = gen_label_rtx ();
7790 code2 = swap_condition (code);
7791 code3 = unsigned_condition (code);
7795 case LT: case GT: case LTU: case GTU:
7798 case LE: code1 = LT; code2 = GT; break;
7799 case GE: code1 = GT; code2 = LT; break;
7800 case LEU: code1 = LTU; code2 = GTU; break;
7801 case GEU: code1 = GTU; code2 = LTU; break;
7803 case EQ: code1 = NIL; code2 = NE; break;
7804 case NE: code2 = NIL; break;
7812 * if (hi(a) < hi(b)) goto true;
7813 * if (hi(a) > hi(b)) goto false;
7814 * if (lo(a) < lo(b)) goto true;
7818 ix86_compare_op0 = hi[0];
7819 ix86_compare_op1 = hi[1];
7822 ix86_expand_branch (code1, label);
7824 ix86_expand_branch (code2, label2);
7826 ix86_compare_op0 = lo[0];
7827 ix86_compare_op1 = lo[1];
7828 ix86_expand_branch (code3, label);
7831 emit_label (label2);
7840 /* Split branch based on floating point condition. */
7842 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7844 rtx op1, op2, target1, target2, tmp;
7847 rtx label = NULL_RTX;
7849 int bypass_probability = -1, second_probability = -1, probability = -1;
7852 if (target2 != pc_rtx)
7855 code = reverse_condition_maybe_unordered (code);
7860 condition = ix86_expand_fp_compare (code, op1, op2,
7861 tmp, &second, &bypass);
7863 if (split_branch_probability >= 0)
7865 /* Distribute the probabilities across the jumps.
7866 Assume the BYPASS and SECOND to be always test
7868 probability = split_branch_probability;
7870 /* Value of 1 is low enough to make no need for probability
7871 to be updated. Later we may run some experiments and see
7872 if unordered values are more frequent in practice. */
7874 bypass_probability = 1;
7876 second_probability = 1;
7878 if (bypass != NULL_RTX)
7880 label = gen_label_rtx ();
7881 i = emit_jump_insn (gen_rtx_SET
7883 gen_rtx_IF_THEN_ELSE (VOIDmode,
7885 gen_rtx_LABEL_REF (VOIDmode,
7888 if (bypass_probability >= 0)
7890 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7891 GEN_INT (bypass_probability),
7894 i = emit_jump_insn (gen_rtx_SET
7896 gen_rtx_IF_THEN_ELSE (VOIDmode,
7897 condition, target1, target2)));
7898 if (probability >= 0)
7900 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7901 GEN_INT (probability),
7903 if (second != NULL_RTX)
7905 i = emit_jump_insn (gen_rtx_SET
7907 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7909 if (second_probability >= 0)
7911 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7912 GEN_INT (second_probability),
7915 if (label != NULL_RTX)
7920 ix86_expand_setcc (code, dest)
7924 rtx ret, tmp, tmpreg;
7925 rtx second_test, bypass_test;
7927 if (GET_MODE (ix86_compare_op0) == DImode
7929 return 0; /* FAIL */
7931 if (GET_MODE (dest) != QImode)
7934 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7935 PUT_MODE (ret, QImode);
7940 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7941 if (bypass_test || second_test)
7943 rtx test = second_test;
7945 rtx tmp2 = gen_reg_rtx (QImode);
7952 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7954 PUT_MODE (test, QImode);
7955 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7958 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7960 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7963 return 1; /* DONE */
7967 ix86_expand_int_movcc (operands)
7970 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7971 rtx compare_seq, compare_op;
7972 rtx second_test, bypass_test;
7973 enum machine_mode mode = GET_MODE (operands[0]);
7975 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7976 In case comparsion is done with immediate, we can convert it to LTU or
7977 GEU by altering the integer. */
7979 if ((code == LEU || code == GTU)
7980 && GET_CODE (ix86_compare_op1) == CONST_INT
7982 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
7983 /* The operand still must be representable as sign extended value. */
7985 || GET_MODE (ix86_compare_op0) != DImode
7986 || (unsigned int) INTVAL (ix86_compare_op1) != 0x7fffffff)
7987 && GET_CODE (operands[2]) == CONST_INT
7988 && GET_CODE (operands[3]) == CONST_INT)
7995 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
7996 GET_MODE (ix86_compare_op0));
8000 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8001 compare_seq = gen_sequence ();
8004 compare_code = GET_CODE (compare_op);
8006 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8007 HImode insns, we'd be swallowed in word prefix ops. */
8010 && (mode != DImode || TARGET_64BIT)
8011 && GET_CODE (operands[2]) == CONST_INT
8012 && GET_CODE (operands[3]) == CONST_INT)
8014 rtx out = operands[0];
8015 HOST_WIDE_INT ct = INTVAL (operands[2]);
8016 HOST_WIDE_INT cf = INTVAL (operands[3]);
8019 if ((compare_code == LTU || compare_code == GEU)
8020 && !second_test && !bypass_test)
8023 /* Detect overlap between destination and compare sources. */
8026 /* To simplify rest of code, restrict to the GEU case. */
8027 if (compare_code == LTU)
8032 compare_code = reverse_condition (compare_code);
8033 code = reverse_condition (code);
8037 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8038 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8039 tmp = gen_reg_rtx (mode);
8041 emit_insn (compare_seq);
8043 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8045 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8057 tmp = expand_simple_binop (mode, PLUS,
8059 tmp, 1, OPTAB_DIRECT);
8070 tmp = expand_simple_binop (mode, IOR,
8072 tmp, 1, OPTAB_DIRECT);
8074 else if (diff == -1 && ct)
8084 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8086 tmp = expand_simple_binop (mode, PLUS,
8088 tmp, 1, OPTAB_DIRECT);
8095 * andl cf - ct, dest
8100 tmp = expand_simple_binop (mode, AND,
8102 gen_int_mode (cf - ct, mode),
8103 tmp, 1, OPTAB_DIRECT);
8105 tmp = expand_simple_binop (mode, PLUS,
8107 tmp, 1, OPTAB_DIRECT);
8111 emit_move_insn (out, tmp);
8113 return 1; /* DONE */
8120 tmp = ct, ct = cf, cf = tmp;
8122 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8124 /* We may be reversing unordered compare to normal compare, that
8125 is not valid in general (we may convert non-trapping condition
8126 to trapping one), however on i386 we currently emit all
8127 comparisons unordered. */
8128 compare_code = reverse_condition_maybe_unordered (compare_code);
8129 code = reverse_condition_maybe_unordered (code);
8133 compare_code = reverse_condition (compare_code);
8134 code = reverse_condition (code);
8139 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8140 && GET_CODE (ix86_compare_op1) == CONST_INT)
8142 if (ix86_compare_op1 == const0_rtx
8143 && (code == LT || code == GE))
8144 compare_code = code;
8145 else if (ix86_compare_op1 == constm1_rtx)
8149 else if (code == GT)
8154 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8155 if (compare_code != NIL
8156 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8157 && (cf == -1 || ct == -1))
8159 /* If lea code below could be used, only optimize
8160 if it results in a 2 insn sequence. */
8162 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8163 || diff == 3 || diff == 5 || diff == 9)
8164 || (compare_code == LT && ct == -1)
8165 || (compare_code == GE && cf == -1))
8168 * notl op1 (if necessary)
8176 code = reverse_condition (code);
8179 out = emit_store_flag (out, code, ix86_compare_op0,
8180 ix86_compare_op1, VOIDmode, 0, -1);
8182 out = expand_simple_binop (mode, IOR,
8184 out, 1, OPTAB_DIRECT);
8185 if (out != operands[0])
8186 emit_move_insn (operands[0], out);
8188 return 1; /* DONE */
8192 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8193 || diff == 3 || diff == 5 || diff == 9)
8194 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8200 * lea cf(dest*(ct-cf)),dest
8204 * This also catches the degenerate setcc-only case.
8210 out = emit_store_flag (out, code, ix86_compare_op0,
8211 ix86_compare_op1, VOIDmode, 0, 1);
8214 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8215 done in proper mode to match. */
8222 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8226 tmp = gen_rtx_PLUS (mode, tmp, out1);
8232 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8236 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8242 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8243 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8245 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8246 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8250 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8252 if (out != operands[0])
8253 emit_move_insn (operands[0], out);
8255 return 1; /* DONE */
8259 * General case: Jumpful:
8260 * xorl dest,dest cmpl op1, op2
8261 * cmpl op1, op2 movl ct, dest
8263 * decl dest movl cf, dest
8264 * andl (cf-ct),dest 1:
8269 * This is reasonably steep, but branch mispredict costs are
8270 * high on modern cpus, so consider failing only if optimizing
8273 * %%% Parameterize branch_cost on the tuning architecture, then
8274 * use that. The 80386 couldn't care less about mispredicts.
8277 if (!optimize_size && !TARGET_CMOVE)
8283 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8284 /* We may be reversing unordered compare to normal compare,
8285 that is not valid in general (we may convert non-trapping
8286 condition to trapping one), however on i386 we currently
8287 emit all comparisons unordered. */
8288 code = reverse_condition_maybe_unordered (code);
8291 code = reverse_condition (code);
8292 if (compare_code != NIL)
8293 compare_code = reverse_condition (compare_code);
8297 if (compare_code != NIL)
8299 /* notl op1 (if needed)
8304 For x < 0 (resp. x <= -1) there will be no notl,
8305 so if possible swap the constants to get rid of the
8307 True/false will be -1/0 while code below (store flag
8308 followed by decrement) is 0/-1, so the constants need
8309 to be exchanged once more. */
8311 if (compare_code == GE || !cf)
8313 code = reverse_condition (code);
8318 HOST_WIDE_INT tmp = cf;
8323 out = emit_store_flag (out, code, ix86_compare_op0,
8324 ix86_compare_op1, VOIDmode, 0, -1);
8328 out = emit_store_flag (out, code, ix86_compare_op0,
8329 ix86_compare_op1, VOIDmode, 0, 1);
8331 out = expand_simple_binop (mode, PLUS,
8333 out, 1, OPTAB_DIRECT);
8336 out = expand_simple_binop (mode, AND,
8338 gen_int_mode (cf - ct, mode),
8339 out, 1, OPTAB_DIRECT);
8340 out = expand_simple_binop (mode, PLUS,
8342 out, 1, OPTAB_DIRECT);
8343 if (out != operands[0])
8344 emit_move_insn (operands[0], out);
8346 return 1; /* DONE */
8352 /* Try a few things more with specific constants and a variable. */
8355 rtx var, orig_out, out, tmp;
8358 return 0; /* FAIL */
8360 /* If one of the two operands is an interesting constant, load a
8361 constant with the above and mask it in with a logical operation. */
8363 if (GET_CODE (operands[2]) == CONST_INT)
8366 if (INTVAL (operands[2]) == 0)
8367 operands[3] = constm1_rtx, op = and_optab;
8368 else if (INTVAL (operands[2]) == -1)
8369 operands[3] = const0_rtx, op = ior_optab;
8371 return 0; /* FAIL */
8373 else if (GET_CODE (operands[3]) == CONST_INT)
8376 if (INTVAL (operands[3]) == 0)
8377 operands[2] = constm1_rtx, op = and_optab;
8378 else if (INTVAL (operands[3]) == -1)
8379 operands[2] = const0_rtx, op = ior_optab;
8381 return 0; /* FAIL */
8384 return 0; /* FAIL */
8386 orig_out = operands[0];
8387 tmp = gen_reg_rtx (mode);
8390 /* Recurse to get the constant loaded. */
8391 if (ix86_expand_int_movcc (operands) == 0)
8392 return 0; /* FAIL */
8394 /* Mask in the interesting variable. */
8395 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8397 if (out != orig_out)
8398 emit_move_insn (orig_out, out);
8400 return 1; /* DONE */
8404 * For comparison with above,
8414 if (! nonimmediate_operand (operands[2], mode))
8415 operands[2] = force_reg (mode, operands[2]);
8416 if (! nonimmediate_operand (operands[3], mode))
8417 operands[3] = force_reg (mode, operands[3]);
8419 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8421 rtx tmp = gen_reg_rtx (mode);
8422 emit_move_insn (tmp, operands[3]);
8425 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8427 rtx tmp = gen_reg_rtx (mode);
8428 emit_move_insn (tmp, operands[2]);
8431 if (! register_operand (operands[2], VOIDmode)
8432 && ! register_operand (operands[3], VOIDmode))
8433 operands[2] = force_reg (mode, operands[2]);
8435 emit_insn (compare_seq);
8436 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8437 gen_rtx_IF_THEN_ELSE (mode,
8438 compare_op, operands[2],
8441 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8442 gen_rtx_IF_THEN_ELSE (mode,
8447 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8448 gen_rtx_IF_THEN_ELSE (mode,
8453 return 1; /* DONE */
8457 ix86_expand_fp_movcc (operands)
8462 rtx compare_op, second_test, bypass_test;
8464 /* For SF/DFmode conditional moves based on comparisons
8465 in same mode, we may want to use SSE min/max instructions. */
8466 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8467 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8468 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8469 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8471 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8472 /* We may be called from the post-reload splitter. */
8473 && (!REG_P (operands[0])
8474 || SSE_REG_P (operands[0])
8475 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8477 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8478 code = GET_CODE (operands[1]);
8480 /* See if we have (cross) match between comparison operands and
8481 conditional move operands. */
8482 if (rtx_equal_p (operands[2], op1))
8487 code = reverse_condition_maybe_unordered (code);
8489 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8491 /* Check for min operation. */
8494 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8495 if (memory_operand (op0, VOIDmode))
8496 op0 = force_reg (GET_MODE (operands[0]), op0);
8497 if (GET_MODE (operands[0]) == SFmode)
8498 emit_insn (gen_minsf3 (operands[0], op0, op1));
8500 emit_insn (gen_mindf3 (operands[0], op0, op1));
8503 /* Check for max operation. */
8506 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8507 if (memory_operand (op0, VOIDmode))
8508 op0 = force_reg (GET_MODE (operands[0]), op0);
8509 if (GET_MODE (operands[0]) == SFmode)
8510 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8512 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8516 /* Manage condition to be sse_comparison_operator. In case we are
8517 in non-ieee mode, try to canonicalize the destination operand
8518 to be first in the comparison - this helps reload to avoid extra
8520 if (!sse_comparison_operator (operands[1], VOIDmode)
8521 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8523 rtx tmp = ix86_compare_op0;
8524 ix86_compare_op0 = ix86_compare_op1;
8525 ix86_compare_op1 = tmp;
8526 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8527 VOIDmode, ix86_compare_op0,
8530 /* Similary try to manage result to be first operand of conditional
8531 move. We also don't support the NE comparison on SSE, so try to
8533 if ((rtx_equal_p (operands[0], operands[3])
8534 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8535 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8537 rtx tmp = operands[2];
8538 operands[2] = operands[3];
8540 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8541 (GET_CODE (operands[1])),
8542 VOIDmode, ix86_compare_op0,
8545 if (GET_MODE (operands[0]) == SFmode)
8546 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8547 operands[2], operands[3],
8548 ix86_compare_op0, ix86_compare_op1));
8550 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8551 operands[2], operands[3],
8552 ix86_compare_op0, ix86_compare_op1));
8556 /* The floating point conditional move instructions don't directly
8557 support conditions resulting from a signed integer comparison. */
8559 code = GET_CODE (operands[1]);
8560 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8562 /* The floating point conditional move instructions don't directly
8563 support signed integer comparisons. */
8565 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8567 if (second_test != NULL || bypass_test != NULL)
8569 tmp = gen_reg_rtx (QImode);
8570 ix86_expand_setcc (code, tmp);
8572 ix86_compare_op0 = tmp;
8573 ix86_compare_op1 = const0_rtx;
8574 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8576 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8578 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8579 emit_move_insn (tmp, operands[3]);
8582 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8584 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8585 emit_move_insn (tmp, operands[2]);
8589 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8590 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8595 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8596 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8602 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8610 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8611 works for floating pointer parameters and nonoffsetable memories.
8612 For pushes, it returns just stack offsets; the values will be saved
8613 in the right order. Maximally three parts are generated. */
8616 ix86_split_to_parts (operand, parts, mode)
8619 enum machine_mode mode;
8624 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8626 size = (GET_MODE_SIZE (mode) + 4) / 8;
8628 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8630 if (size < 2 || size > 3)
8633 /* Optimize constant pool reference to immediates. This is used by fp moves,
8634 that force all constants to memory to allow combining. */
8636 if (GET_CODE (operand) == MEM
8637 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8638 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8639 operand = get_pool_constant (XEXP (operand, 0));
8641 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8643 /* The only non-offsetable memories we handle are pushes. */
8644 if (! push_operand (operand, VOIDmode))
8647 operand = copy_rtx (operand);
8648 PUT_MODE (operand, Pmode);
8649 parts[0] = parts[1] = parts[2] = operand;
8651 else if (!TARGET_64BIT)
8654 split_di (&operand, 1, &parts[0], &parts[1]);
8657 if (REG_P (operand))
8659 if (!reload_completed)
8661 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8662 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8664 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8666 else if (offsettable_memref_p (operand))
8668 operand = adjust_address (operand, SImode, 0);
8670 parts[1] = adjust_address (operand, SImode, 4);
8672 parts[2] = adjust_address (operand, SImode, 8);
8674 else if (GET_CODE (operand) == CONST_DOUBLE)
8679 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8684 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8685 parts[2] = gen_int_mode (l[2], SImode);
8688 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8693 parts[1] = gen_int_mode (l[1], SImode);
8694 parts[0] = gen_int_mode (l[0], SImode);
8703 split_ti (&operand, 1, &parts[0], &parts[1]);
8704 if (mode == XFmode || mode == TFmode)
8706 if (REG_P (operand))
8708 if (!reload_completed)
8710 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8711 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8713 else if (offsettable_memref_p (operand))
8715 operand = adjust_address (operand, DImode, 0);
8717 parts[1] = adjust_address (operand, SImode, 8);
8719 else if (GET_CODE (operand) == CONST_DOUBLE)
8724 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8725 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8726 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8727 if (HOST_BITS_PER_WIDE_INT >= 64)
8730 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8731 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8734 parts[0] = immed_double_const (l[0], l[1], DImode);
8735 parts[1] = gen_int_mode (l[2], SImode);
8745 /* Emit insns to perform a move or push of DI, DF, and XF values.
8746 Return false when normal moves are needed; true when all required
8747 insns have been emitted. Operands 2-4 contain the input values
8748 int the correct order; operands 5-7 contain the output values. */
8751 ix86_split_long_move (operands)
8758 enum machine_mode mode = GET_MODE (operands[0]);
8760 /* The DFmode expanders may ask us to move double.
8761 For 64bit target this is single move. By hiding the fact
8762 here we simplify i386.md splitters. */
8763 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8765 /* Optimize constant pool reference to immediates. This is used by
8766 fp moves, that force all constants to memory to allow combining. */
8768 if (GET_CODE (operands[1]) == MEM
8769 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8770 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8771 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8772 if (push_operand (operands[0], VOIDmode))
8774 operands[0] = copy_rtx (operands[0]);
8775 PUT_MODE (operands[0], Pmode);
8778 operands[0] = gen_lowpart (DImode, operands[0]);
8779 operands[1] = gen_lowpart (DImode, operands[1]);
8780 emit_move_insn (operands[0], operands[1]);
8784 /* The only non-offsettable memory we handle is push. */
8785 if (push_operand (operands[0], VOIDmode))
8787 else if (GET_CODE (operands[0]) == MEM
8788 && ! offsettable_memref_p (operands[0]))
8791 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8792 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8794 /* When emitting push, take care for source operands on the stack. */
8795 if (push && GET_CODE (operands[1]) == MEM
8796 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8799 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8800 XEXP (part[1][2], 0));
8801 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8802 XEXP (part[1][1], 0));
8805 /* We need to do copy in the right order in case an address register
8806 of the source overlaps the destination. */
8807 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8809 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8811 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8814 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8817 /* Collision in the middle part can be handled by reordering. */
8818 if (collisions == 1 && nparts == 3
8819 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8822 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8823 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8826 /* If there are more collisions, we can't handle it by reordering.
8827 Do an lea to the last part and use only one colliding move. */
8828 else if (collisions > 1)
8831 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8832 XEXP (part[1][0], 0)));
8833 part[1][0] = change_address (part[1][0],
8834 TARGET_64BIT ? DImode : SImode,
8835 part[0][nparts - 1]);
8836 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8838 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8848 /* We use only first 12 bytes of TFmode value, but for pushing we
8849 are required to adjust stack as if we were pushing real 16byte
8851 if (mode == TFmode && !TARGET_64BIT)
8852 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8854 emit_move_insn (part[0][2], part[1][2]);
8859 /* In 64bit mode we don't have 32bit push available. In case this is
8860 register, it is OK - we will just use larger counterpart. We also
8861 retype memory - these comes from attempt to avoid REX prefix on
8862 moving of second half of TFmode value. */
8863 if (GET_MODE (part[1][1]) == SImode)
8865 if (GET_CODE (part[1][1]) == MEM)
8866 part[1][1] = adjust_address (part[1][1], DImode, 0);
8867 else if (REG_P (part[1][1]))
8868 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8871 if (GET_MODE (part[1][0]) == SImode)
8872 part[1][0] = part[1][1];
8875 emit_move_insn (part[0][1], part[1][1]);
8876 emit_move_insn (part[0][0], part[1][0]);
8880 /* Choose correct order to not overwrite the source before it is copied. */
8881 if ((REG_P (part[0][0])
8882 && REG_P (part[1][1])
8883 && (REGNO (part[0][0]) == REGNO (part[1][1])
8885 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8887 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8891 operands[2] = part[0][2];
8892 operands[3] = part[0][1];
8893 operands[4] = part[0][0];
8894 operands[5] = part[1][2];
8895 operands[6] = part[1][1];
8896 operands[7] = part[1][0];
8900 operands[2] = part[0][1];
8901 operands[3] = part[0][0];
8902 operands[5] = part[1][1];
8903 operands[6] = part[1][0];
8910 operands[2] = part[0][0];
8911 operands[3] = part[0][1];
8912 operands[4] = part[0][2];
8913 operands[5] = part[1][0];
8914 operands[6] = part[1][1];
8915 operands[7] = part[1][2];
8919 operands[2] = part[0][0];
8920 operands[3] = part[0][1];
8921 operands[5] = part[1][0];
8922 operands[6] = part[1][1];
8925 emit_move_insn (operands[2], operands[5]);
8926 emit_move_insn (operands[3], operands[6]);
8928 emit_move_insn (operands[4], operands[7]);
8934 ix86_split_ashldi (operands, scratch)
8935 rtx *operands, scratch;
8937 rtx low[2], high[2];
8940 if (GET_CODE (operands[2]) == CONST_INT)
8942 split_di (operands, 2, low, high);
8943 count = INTVAL (operands[2]) & 63;
8947 emit_move_insn (high[0], low[1]);
8948 emit_move_insn (low[0], const0_rtx);
8951 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8955 if (!rtx_equal_p (operands[0], operands[1]))
8956 emit_move_insn (operands[0], operands[1]);
8957 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8958 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8963 if (!rtx_equal_p (operands[0], operands[1]))
8964 emit_move_insn (operands[0], operands[1]);
8966 split_di (operands, 1, low, high);
8968 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8969 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8971 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8973 if (! no_new_pseudos)
8974 scratch = force_reg (SImode, const0_rtx);
8976 emit_move_insn (scratch, const0_rtx);
8978 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8982 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8987 ix86_split_ashrdi (operands, scratch)
8988 rtx *operands, scratch;
8990 rtx low[2], high[2];
8993 if (GET_CODE (operands[2]) == CONST_INT)
8995 split_di (operands, 2, low, high);
8996 count = INTVAL (operands[2]) & 63;
9000 emit_move_insn (low[0], high[1]);
9002 if (! reload_completed)
9003 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9006 emit_move_insn (high[0], low[0]);
9007 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9011 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9015 if (!rtx_equal_p (operands[0], operands[1]))
9016 emit_move_insn (operands[0], operands[1]);
9017 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9018 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9023 if (!rtx_equal_p (operands[0], operands[1]))
9024 emit_move_insn (operands[0], operands[1]);
9026 split_di (operands, 1, low, high);
9028 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9029 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9031 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9033 if (! no_new_pseudos)
9034 scratch = gen_reg_rtx (SImode);
9035 emit_move_insn (scratch, high[0]);
9036 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9037 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9041 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9046 ix86_split_lshrdi (operands, scratch)
9047 rtx *operands, scratch;
9049 rtx low[2], high[2];
9052 if (GET_CODE (operands[2]) == CONST_INT)
9054 split_di (operands, 2, low, high);
9055 count = INTVAL (operands[2]) & 63;
9059 emit_move_insn (low[0], high[1]);
9060 emit_move_insn (high[0], const0_rtx);
9063 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9067 if (!rtx_equal_p (operands[0], operands[1]))
9068 emit_move_insn (operands[0], operands[1]);
9069 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9070 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9075 if (!rtx_equal_p (operands[0], operands[1]))
9076 emit_move_insn (operands[0], operands[1]);
9078 split_di (operands, 1, low, high);
9080 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9081 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9083 /* Heh. By reversing the arguments, we can reuse this pattern. */
9084 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9086 if (! no_new_pseudos)
9087 scratch = force_reg (SImode, const0_rtx);
9089 emit_move_insn (scratch, const0_rtx);
9091 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9095 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9099 /* Helper function for the string operations below. Dest VARIABLE whether
9100 it is aligned to VALUE bytes. If true, jump to the label. */
9102 ix86_expand_aligntest (variable, value)
9106 rtx label = gen_label_rtx ();
9107 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9108 if (GET_MODE (variable) == DImode)
9109 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9111 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9112 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9117 /* Adjust COUNTER by the VALUE. */
9119 ix86_adjust_counter (countreg, value)
9121 HOST_WIDE_INT value;
9123 if (GET_MODE (countreg) == DImode)
9124 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9126 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9129 /* Zero extend possibly SImode EXP to Pmode register. */
9131 ix86_zero_extend_to_Pmode (exp)
9135 if (GET_MODE (exp) == VOIDmode)
9136 return force_reg (Pmode, exp);
9137 if (GET_MODE (exp) == Pmode)
9138 return copy_to_mode_reg (Pmode, exp);
9139 r = gen_reg_rtx (Pmode);
9140 emit_insn (gen_zero_extendsidi2 (r, exp));
9144 /* Expand string move (memcpy) operation. Use i386 string operations when
9145 profitable. expand_clrstr contains similar code. */
9147 ix86_expand_movstr (dst, src, count_exp, align_exp)
9148 rtx dst, src, count_exp, align_exp;
9150 rtx srcreg, destreg, countreg;
9151 enum machine_mode counter_mode;
9152 HOST_WIDE_INT align = 0;
9153 unsigned HOST_WIDE_INT count = 0;
9158 if (GET_CODE (align_exp) == CONST_INT)
9159 align = INTVAL (align_exp);
9161 /* This simple hack avoids all inlining code and simplifies code below. */
9162 if (!TARGET_ALIGN_STRINGOPS)
9165 if (GET_CODE (count_exp) == CONST_INT)
9166 count = INTVAL (count_exp);
9168 /* Figure out proper mode for counter. For 32bits it is always SImode,
9169 for 64bits use SImode when possible, otherwise DImode.
9170 Set count to number of bytes copied when known at compile time. */
9171 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9172 || x86_64_zero_extended_value (count_exp))
9173 counter_mode = SImode;
9175 counter_mode = DImode;
9177 if (counter_mode != SImode && counter_mode != DImode)
9180 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9181 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9183 emit_insn (gen_cld ());
9185 /* When optimizing for size emit simple rep ; movsb instruction for
9186 counts not divisible by 4. */
9188 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9190 countreg = ix86_zero_extend_to_Pmode (count_exp);
9192 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9193 destreg, srcreg, countreg));
9195 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9196 destreg, srcreg, countreg));
9199 /* For constant aligned (or small unaligned) copies use rep movsl
9200 followed by code copying the rest. For PentiumPro ensure 8 byte
9201 alignment to allow rep movsl acceleration. */
9205 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9206 || optimize_size || count < (unsigned int) 64))
9208 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9209 if (count & ~(size - 1))
9211 countreg = copy_to_mode_reg (counter_mode,
9212 GEN_INT ((count >> (size == 4 ? 2 : 3))
9213 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9214 countreg = ix86_zero_extend_to_Pmode (countreg);
9218 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9219 destreg, srcreg, countreg));
9221 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9222 destreg, srcreg, countreg));
9225 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9226 destreg, srcreg, countreg));
9228 if (size == 8 && (count & 0x04))
9229 emit_insn (gen_strmovsi (destreg, srcreg));
9231 emit_insn (gen_strmovhi (destreg, srcreg));
9233 emit_insn (gen_strmovqi (destreg, srcreg));
9235 /* The generic code based on the glibc implementation:
9236 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9237 allowing accelerated copying there)
9238 - copy the data using rep movsl
9244 int desired_alignment = (TARGET_PENTIUMPRO
9245 && (count == 0 || count >= (unsigned int) 260)
9246 ? 8 : UNITS_PER_WORD);
9248 /* In case we don't know anything about the alignment, default to
9249 library version, since it is usually equally fast and result in
9251 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9257 if (TARGET_SINGLE_STRINGOP)
9258 emit_insn (gen_cld ());
9260 countreg2 = gen_reg_rtx (Pmode);
9261 countreg = copy_to_mode_reg (counter_mode, count_exp);
9263 /* We don't use loops to align destination and to copy parts smaller
9264 than 4 bytes, because gcc is able to optimize such code better (in
9265 the case the destination or the count really is aligned, gcc is often
9266 able to predict the branches) and also it is friendlier to the
9267 hardware branch prediction.
9269 Using loops is benefical for generic case, because we can
9270 handle small counts using the loops. Many CPUs (such as Athlon)
9271 have large REP prefix setup costs.
9273 This is quite costy. Maybe we can revisit this decision later or
9274 add some customizability to this code. */
9276 if (count == 0 && align < desired_alignment)
9278 label = gen_label_rtx ();
9279 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9280 LEU, 0, counter_mode, 1, label);
9284 rtx label = ix86_expand_aligntest (destreg, 1);
9285 emit_insn (gen_strmovqi (destreg, srcreg));
9286 ix86_adjust_counter (countreg, 1);
9288 LABEL_NUSES (label) = 1;
9292 rtx label = ix86_expand_aligntest (destreg, 2);
9293 emit_insn (gen_strmovhi (destreg, srcreg));
9294 ix86_adjust_counter (countreg, 2);
9296 LABEL_NUSES (label) = 1;
9298 if (align <= 4 && desired_alignment > 4)
9300 rtx label = ix86_expand_aligntest (destreg, 4);
9301 emit_insn (gen_strmovsi (destreg, srcreg));
9302 ix86_adjust_counter (countreg, 4);
9304 LABEL_NUSES (label) = 1;
9307 if (label && desired_alignment > 4 && !TARGET_64BIT)
9310 LABEL_NUSES (label) = 1;
9313 if (!TARGET_SINGLE_STRINGOP)
9314 emit_insn (gen_cld ());
9317 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9319 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9320 destreg, srcreg, countreg2));
9324 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9325 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9326 destreg, srcreg, countreg2));
9332 LABEL_NUSES (label) = 1;
9334 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9335 emit_insn (gen_strmovsi (destreg, srcreg));
9336 if ((align <= 4 || count == 0) && TARGET_64BIT)
9338 rtx label = ix86_expand_aligntest (countreg, 4);
9339 emit_insn (gen_strmovsi (destreg, srcreg));
9341 LABEL_NUSES (label) = 1;
9343 if (align > 2 && count != 0 && (count & 2))
9344 emit_insn (gen_strmovhi (destreg, srcreg));
9345 if (align <= 2 || count == 0)
9347 rtx label = ix86_expand_aligntest (countreg, 2);
9348 emit_insn (gen_strmovhi (destreg, srcreg));
9350 LABEL_NUSES (label) = 1;
9352 if (align > 1 && count != 0 && (count & 1))
9353 emit_insn (gen_strmovqi (destreg, srcreg));
9354 if (align <= 1 || count == 0)
9356 rtx label = ix86_expand_aligntest (countreg, 1);
9357 emit_insn (gen_strmovqi (destreg, srcreg));
9359 LABEL_NUSES (label) = 1;
9363 insns = get_insns ();
9366 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9371 /* Expand string clear operation (bzero). Use i386 string operations when
9372 profitable. expand_movstr contains similar code. */
9374 ix86_expand_clrstr (src, count_exp, align_exp)
9375 rtx src, count_exp, align_exp;
9377 rtx destreg, zeroreg, countreg;
9378 enum machine_mode counter_mode;
9379 HOST_WIDE_INT align = 0;
9380 unsigned HOST_WIDE_INT count = 0;
9382 if (GET_CODE (align_exp) == CONST_INT)
9383 align = INTVAL (align_exp);
9385 /* This simple hack avoids all inlining code and simplifies code below. */
9386 if (!TARGET_ALIGN_STRINGOPS)
9389 if (GET_CODE (count_exp) == CONST_INT)
9390 count = INTVAL (count_exp);
9391 /* Figure out proper mode for counter. For 32bits it is always SImode,
9392 for 64bits use SImode when possible, otherwise DImode.
9393 Set count to number of bytes copied when known at compile time. */
9394 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9395 || x86_64_zero_extended_value (count_exp))
9396 counter_mode = SImode;
9398 counter_mode = DImode;
9400 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9402 emit_insn (gen_cld ());
9404 /* When optimizing for size emit simple rep ; movsb instruction for
9405 counts not divisible by 4. */
9407 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9409 countreg = ix86_zero_extend_to_Pmode (count_exp);
9410 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9412 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9413 destreg, countreg));
9415 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9416 destreg, countreg));
9420 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9421 || optimize_size || count < (unsigned int) 64))
9423 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9424 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9425 if (count & ~(size - 1))
9427 countreg = copy_to_mode_reg (counter_mode,
9428 GEN_INT ((count >> (size == 4 ? 2 : 3))
9429 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9430 countreg = ix86_zero_extend_to_Pmode (countreg);
9434 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9435 destreg, countreg));
9437 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9438 destreg, countreg));
9441 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9442 destreg, countreg));
9444 if (size == 8 && (count & 0x04))
9445 emit_insn (gen_strsetsi (destreg,
9446 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9448 emit_insn (gen_strsethi (destreg,
9449 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9451 emit_insn (gen_strsetqi (destreg,
9452 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9458 /* Compute desired alignment of the string operation. */
9459 int desired_alignment = (TARGET_PENTIUMPRO
9460 && (count == 0 || count >= (unsigned int) 260)
9461 ? 8 : UNITS_PER_WORD);
9463 /* In case we don't know anything about the alignment, default to
9464 library version, since it is usually equally fast and result in
9466 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9469 if (TARGET_SINGLE_STRINGOP)
9470 emit_insn (gen_cld ());
9472 countreg2 = gen_reg_rtx (Pmode);
9473 countreg = copy_to_mode_reg (counter_mode, count_exp);
9474 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9476 if (count == 0 && align < desired_alignment)
9478 label = gen_label_rtx ();
9479 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
9480 LEU, 0, counter_mode, 1, label);
9484 rtx label = ix86_expand_aligntest (destreg, 1);
9485 emit_insn (gen_strsetqi (destreg,
9486 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9487 ix86_adjust_counter (countreg, 1);
9489 LABEL_NUSES (label) = 1;
9493 rtx label = ix86_expand_aligntest (destreg, 2);
9494 emit_insn (gen_strsethi (destreg,
9495 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9496 ix86_adjust_counter (countreg, 2);
9498 LABEL_NUSES (label) = 1;
9500 if (align <= 4 && desired_alignment > 4)
9502 rtx label = ix86_expand_aligntest (destreg, 4);
9503 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9504 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9506 ix86_adjust_counter (countreg, 4);
9508 LABEL_NUSES (label) = 1;
9511 if (label && desired_alignment > 4 && !TARGET_64BIT)
9514 LABEL_NUSES (label) = 1;
9518 if (!TARGET_SINGLE_STRINGOP)
9519 emit_insn (gen_cld ());
9522 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9524 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9525 destreg, countreg2));
9529 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9530 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9531 destreg, countreg2));
9536 LABEL_NUSES (label) = 1;
9539 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9540 emit_insn (gen_strsetsi (destreg,
9541 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9542 if (TARGET_64BIT && (align <= 4 || count == 0))
9544 rtx label = ix86_expand_aligntest (countreg, 2);
9545 emit_insn (gen_strsetsi (destreg,
9546 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9548 LABEL_NUSES (label) = 1;
9550 if (align > 2 && count != 0 && (count & 2))
9551 emit_insn (gen_strsethi (destreg,
9552 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9553 if (align <= 2 || count == 0)
9555 rtx label = ix86_expand_aligntest (countreg, 2);
9556 emit_insn (gen_strsethi (destreg,
9557 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9559 LABEL_NUSES (label) = 1;
9561 if (align > 1 && count != 0 && (count & 1))
9562 emit_insn (gen_strsetqi (destreg,
9563 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9564 if (align <= 1 || count == 0)
9566 rtx label = ix86_expand_aligntest (countreg, 1);
9567 emit_insn (gen_strsetqi (destreg,
9568 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9570 LABEL_NUSES (label) = 1;
9575 /* Expand strlen. */
9577 ix86_expand_strlen (out, src, eoschar, align)
9578 rtx out, src, eoschar, align;
9580 rtx addr, scratch1, scratch2, scratch3, scratch4;
9582 /* The generic case of strlen expander is long. Avoid it's
9583 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9585 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9586 && !TARGET_INLINE_ALL_STRINGOPS
9588 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9591 addr = force_reg (Pmode, XEXP (src, 0));
9592 scratch1 = gen_reg_rtx (Pmode);
9594 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9597 /* Well it seems that some optimizer does not combine a call like
9598 foo(strlen(bar), strlen(bar));
9599 when the move and the subtraction is done here. It does calculate
9600 the length just once when these instructions are done inside of
9601 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9602 often used and I use one fewer register for the lifetime of
9603 output_strlen_unroll() this is better. */
9605 emit_move_insn (out, addr);
9607 ix86_expand_strlensi_unroll_1 (out, align);
9609 /* strlensi_unroll_1 returns the address of the zero at the end of
9610 the string, like memchr(), so compute the length by subtracting
9611 the start address. */
9613 emit_insn (gen_subdi3 (out, out, addr));
9615 emit_insn (gen_subsi3 (out, out, addr));
9619 scratch2 = gen_reg_rtx (Pmode);
9620 scratch3 = gen_reg_rtx (Pmode);
9621 scratch4 = force_reg (Pmode, constm1_rtx);
9623 emit_move_insn (scratch3, addr);
9624 eoschar = force_reg (QImode, eoschar);
9626 emit_insn (gen_cld ());
9629 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9630 align, scratch4, scratch3));
9631 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9632 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9636 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9637 align, scratch4, scratch3));
9638 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9639 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9645 /* Expand the appropriate insns for doing strlen if not just doing
9648 out = result, initialized with the start address
9649 align_rtx = alignment of the address.
9650 scratch = scratch register, initialized with the startaddress when
9651 not aligned, otherwise undefined
9653 This is just the body. It needs the initialisations mentioned above and
9654 some address computing at the end. These things are done in i386.md. */
9657 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9662 rtx align_2_label = NULL_RTX;
9663 rtx align_3_label = NULL_RTX;
9664 rtx align_4_label = gen_label_rtx ();
9665 rtx end_0_label = gen_label_rtx ();
9667 rtx tmpreg = gen_reg_rtx (SImode);
9668 rtx scratch = gen_reg_rtx (SImode);
9671 if (GET_CODE (align_rtx) == CONST_INT)
9672 align = INTVAL (align_rtx);
9674 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9676 /* Is there a known alignment and is it less than 4? */
9679 rtx scratch1 = gen_reg_rtx (Pmode);
9680 emit_move_insn (scratch1, out);
9681 /* Is there a known alignment and is it not 2? */
9684 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9685 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9687 /* Leave just the 3 lower bits. */
9688 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9689 NULL_RTX, 0, OPTAB_WIDEN);
9691 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9692 Pmode, 1, align_4_label);
9693 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9694 Pmode, 1, align_2_label);
9695 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9696 Pmode, 1, align_3_label);
9700 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9701 check if is aligned to 4 - byte. */
9703 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9704 NULL_RTX, 0, OPTAB_WIDEN);
9706 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9707 Pmode, 1, align_4_label);
9710 mem = gen_rtx_MEM (QImode, out);
9712 /* Now compare the bytes. */
9714 /* Compare the first n unaligned byte on a byte per byte basis. */
9715 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9716 QImode, 1, end_0_label);
9718 /* Increment the address. */
9720 emit_insn (gen_adddi3 (out, out, const1_rtx));
9722 emit_insn (gen_addsi3 (out, out, const1_rtx));
9724 /* Not needed with an alignment of 2 */
9727 emit_label (align_2_label);
9729 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9733 emit_insn (gen_adddi3 (out, out, const1_rtx));
9735 emit_insn (gen_addsi3 (out, out, const1_rtx));
9737 emit_label (align_3_label);
9740 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9744 emit_insn (gen_adddi3 (out, out, const1_rtx));
9746 emit_insn (gen_addsi3 (out, out, const1_rtx));
9749 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9750 align this loop. It gives only huge programs, but does not help to
9752 emit_label (align_4_label);
9754 mem = gen_rtx_MEM (SImode, out);
9755 emit_move_insn (scratch, mem);
9757 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9759 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9761 /* This formula yields a nonzero result iff one of the bytes is zero.
9762 This saves three branches inside loop and many cycles. */
9764 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9765 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9766 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9767 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9768 gen_int_mode (0x80808080, SImode)));
9769 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9774 rtx reg = gen_reg_rtx (SImode);
9775 rtx reg2 = gen_reg_rtx (Pmode);
9776 emit_move_insn (reg, tmpreg);
9777 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9779 /* If zero is not in the first two bytes, move two bytes forward. */
9780 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9781 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9782 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9783 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9784 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9787 /* Emit lea manually to avoid clobbering of flags. */
9788 emit_insn (gen_rtx_SET (SImode, reg2,
9789 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9791 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9792 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9793 emit_insn (gen_rtx_SET (VOIDmode, out,
9794 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9801 rtx end_2_label = gen_label_rtx ();
9802 /* Is zero in the first two bytes? */
9804 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9805 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9806 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9807 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9808 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9810 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9811 JUMP_LABEL (tmp) = end_2_label;
9813 /* Not in the first two. Move two bytes forward. */
9814 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9816 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9818 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9820 emit_label (end_2_label);
9824 /* Avoid branch in fixing the byte. */
9825 tmpreg = gen_lowpart (QImode, tmpreg);
9826 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9828 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9830 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9832 emit_label (end_0_label);
9835 /* Clear stack slot assignments remembered from previous functions.
9836 This is called from INIT_EXPANDERS once before RTL is emitted for each
9840 ix86_init_machine_status (p)
9843 p->machine = (struct machine_function *)
9844 xcalloc (1, sizeof (struct machine_function));
9847 /* Mark machine specific bits of P for GC. */
9849 ix86_mark_machine_status (p)
9852 struct machine_function *machine = p->machine;
9853 enum machine_mode mode;
9859 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9860 mode = (enum machine_mode) ((int) mode + 1))
9861 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9862 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9866 ix86_free_machine_status (p)
9873 /* Return a MEM corresponding to a stack slot with mode MODE.
9874 Allocate a new slot if necessary.
9876 The RTL for a function can have several slots available: N is
9877 which slot to use. */
9880 assign_386_stack_local (mode, n)
9881 enum machine_mode mode;
9884 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9887 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9888 ix86_stack_locals[(int) mode][n]
9889 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9891 return ix86_stack_locals[(int) mode][n];
9894 /* Calculate the length of the memory address in the instruction
9895 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9898 memory_address_length (addr)
9901 struct ix86_address parts;
9902 rtx base, index, disp;
9905 if (GET_CODE (addr) == PRE_DEC
9906 || GET_CODE (addr) == POST_INC
9907 || GET_CODE (addr) == PRE_MODIFY
9908 || GET_CODE (addr) == POST_MODIFY)
9911 if (! ix86_decompose_address (addr, &parts))
9915 index = parts.index;
9919 /* Register Indirect. */
9920 if (base && !index && !disp)
9922 /* Special cases: ebp and esp need the two-byte modrm form. */
9923 if (addr == stack_pointer_rtx
9924 || addr == arg_pointer_rtx
9925 || addr == frame_pointer_rtx
9926 || addr == hard_frame_pointer_rtx)
9930 /* Direct Addressing. */
9931 else if (disp && !base && !index)
9936 /* Find the length of the displacement constant. */
9939 if (GET_CODE (disp) == CONST_INT
9940 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9946 /* An index requires the two-byte modrm form. */
9954 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9955 expect that insn have 8bit immediate alternative. */
9957 ix86_attr_length_immediate_default (insn, shortform)
9963 extract_insn_cached (insn);
9964 for (i = recog_data.n_operands - 1; i >= 0; --i)
9965 if (CONSTANT_P (recog_data.operand[i]))
9970 && GET_CODE (recog_data.operand[i]) == CONST_INT
9971 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9975 switch (get_attr_mode (insn))
9986 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9991 fatal_insn ("unknown insn mode", insn);
9997 /* Compute default value for "length_address" attribute. */
9999 ix86_attr_length_address_default (insn)
10003 extract_insn_cached (insn);
10004 for (i = recog_data.n_operands - 1; i >= 0; --i)
10005 if (GET_CODE (recog_data.operand[i]) == MEM)
10007 return memory_address_length (XEXP (recog_data.operand[i], 0));
10013 /* Return the maximum number of instructions a cpu can issue. */
10020 case PROCESSOR_PENTIUM:
10024 case PROCESSOR_PENTIUMPRO:
10025 case PROCESSOR_PENTIUM4:
10026 case PROCESSOR_ATHLON:
10034 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10035 by DEP_INSN and nothing set by DEP_INSN. */
10038 ix86_flags_dependant (insn, dep_insn, insn_type)
10039 rtx insn, dep_insn;
10040 enum attr_type insn_type;
10044 /* Simplify the test for uninteresting insns. */
10045 if (insn_type != TYPE_SETCC
10046 && insn_type != TYPE_ICMOV
10047 && insn_type != TYPE_FCMOV
10048 && insn_type != TYPE_IBR)
10051 if ((set = single_set (dep_insn)) != 0)
10053 set = SET_DEST (set);
10056 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10057 && XVECLEN (PATTERN (dep_insn), 0) == 2
10058 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10059 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10061 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10062 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10067 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10070 /* This test is true if the dependent insn reads the flags but
10071 not any other potentially set register. */
10072 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10075 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10081 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10082 address with operands set by DEP_INSN. */
10085 ix86_agi_dependant (insn, dep_insn, insn_type)
10086 rtx insn, dep_insn;
10087 enum attr_type insn_type;
10091 if (insn_type == TYPE_LEA
10094 addr = PATTERN (insn);
10095 if (GET_CODE (addr) == SET)
10097 else if (GET_CODE (addr) == PARALLEL
10098 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10099 addr = XVECEXP (addr, 0, 0);
10102 addr = SET_SRC (addr);
10107 extract_insn_cached (insn);
10108 for (i = recog_data.n_operands - 1; i >= 0; --i)
10109 if (GET_CODE (recog_data.operand[i]) == MEM)
10111 addr = XEXP (recog_data.operand[i], 0);
10118 return modified_in_p (addr, dep_insn);
10122 ix86_adjust_cost (insn, link, dep_insn, cost)
10123 rtx insn, link, dep_insn;
10126 enum attr_type insn_type, dep_insn_type;
10127 enum attr_memory memory, dep_memory;
10129 int dep_insn_code_number;
10131 /* Anti and output depenancies have zero cost on all CPUs. */
10132 if (REG_NOTE_KIND (link) != 0)
10135 dep_insn_code_number = recog_memoized (dep_insn);
10137 /* If we can't recognize the insns, we can't really do anything. */
10138 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10141 insn_type = get_attr_type (insn);
10142 dep_insn_type = get_attr_type (dep_insn);
10146 case PROCESSOR_PENTIUM:
10147 /* Address Generation Interlock adds a cycle of latency. */
10148 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10151 /* ??? Compares pair with jump/setcc. */
10152 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10155 /* Floating point stores require value to be ready one cycle ealier. */
10156 if (insn_type == TYPE_FMOV
10157 && get_attr_memory (insn) == MEMORY_STORE
10158 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10162 case PROCESSOR_PENTIUMPRO:
10163 memory = get_attr_memory (insn);
10164 dep_memory = get_attr_memory (dep_insn);
10166 /* Since we can't represent delayed latencies of load+operation,
10167 increase the cost here for non-imov insns. */
10168 if (dep_insn_type != TYPE_IMOV
10169 && dep_insn_type != TYPE_FMOV
10170 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10173 /* INT->FP conversion is expensive. */
10174 if (get_attr_fp_int_src (dep_insn))
10177 /* There is one cycle extra latency between an FP op and a store. */
10178 if (insn_type == TYPE_FMOV
10179 && (set = single_set (dep_insn)) != NULL_RTX
10180 && (set2 = single_set (insn)) != NULL_RTX
10181 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10182 && GET_CODE (SET_DEST (set2)) == MEM)
10185 /* Show ability of reorder buffer to hide latency of load by executing
10186 in parallel with previous instruction in case
10187 previous instruction is not needed to compute the address. */
10188 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10189 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10191 /* Claim moves to take one cycle, as core can issue one load
10192 at time and the next load can start cycle later. */
10193 if (dep_insn_type == TYPE_IMOV
10194 || dep_insn_type == TYPE_FMOV)
10202 memory = get_attr_memory (insn);
10203 dep_memory = get_attr_memory (dep_insn);
10204 /* The esp dependency is resolved before the instruction is really
10206 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10207 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10210 /* Since we can't represent delayed latencies of load+operation,
10211 increase the cost here for non-imov insns. */
10212 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10213 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10215 /* INT->FP conversion is expensive. */
10216 if (get_attr_fp_int_src (dep_insn))
10219 /* Show ability of reorder buffer to hide latency of load by executing
10220 in parallel with previous instruction in case
10221 previous instruction is not needed to compute the address. */
10222 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10223 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10225 /* Claim moves to take one cycle, as core can issue one load
10226 at time and the next load can start cycle later. */
10227 if (dep_insn_type == TYPE_IMOV
10228 || dep_insn_type == TYPE_FMOV)
10237 case PROCESSOR_ATHLON:
10238 memory = get_attr_memory (insn);
10239 dep_memory = get_attr_memory (dep_insn);
10241 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10243 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10248 /* Show ability of reorder buffer to hide latency of load by executing
10249 in parallel with previous instruction in case
10250 previous instruction is not needed to compute the address. */
10251 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10252 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10254 /* Claim moves to take one cycle, as core can issue one load
10255 at time and the next load can start cycle later. */
10256 if (dep_insn_type == TYPE_IMOV
10257 || dep_insn_type == TYPE_FMOV)
10259 else if (cost >= 3)
10274 struct ppro_sched_data
10277 int issued_this_cycle;
10281 static enum attr_ppro_uops
10282 ix86_safe_ppro_uops (insn)
10285 if (recog_memoized (insn) >= 0)
10286 return get_attr_ppro_uops (insn);
10288 return PPRO_UOPS_MANY;
10292 ix86_dump_ppro_packet (dump)
10295 if (ix86_sched_data.ppro.decode[0])
10297 fprintf (dump, "PPRO packet: %d",
10298 INSN_UID (ix86_sched_data.ppro.decode[0]));
10299 if (ix86_sched_data.ppro.decode[1])
10300 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10301 if (ix86_sched_data.ppro.decode[2])
10302 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10303 fputc ('\n', dump);
10307 /* We're beginning a new block. Initialize data structures as necessary. */
10310 ix86_sched_init (dump, sched_verbose, veclen)
10311 FILE *dump ATTRIBUTE_UNUSED;
10312 int sched_verbose ATTRIBUTE_UNUSED;
10313 int veclen ATTRIBUTE_UNUSED;
10315 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10318 /* Shift INSN to SLOT, and shift everything else down. */
10321 ix86_reorder_insn (insnp, slot)
10328 insnp[0] = insnp[1];
10329 while (++insnp != slot);
10335 ix86_sched_reorder_ppro (ready, e_ready)
10340 enum attr_ppro_uops cur_uops;
10341 int issued_this_cycle;
10345 /* At this point .ppro.decode contains the state of the three
10346 decoders from last "cycle". That is, those insns that were
10347 actually independent. But here we're scheduling for the
10348 decoder, and we may find things that are decodable in the
10351 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10352 issued_this_cycle = 0;
10355 cur_uops = ix86_safe_ppro_uops (*insnp);
10357 /* If the decoders are empty, and we've a complex insn at the
10358 head of the priority queue, let it issue without complaint. */
10359 if (decode[0] == NULL)
10361 if (cur_uops == PPRO_UOPS_MANY)
10363 decode[0] = *insnp;
10367 /* Otherwise, search for a 2-4 uop unsn to issue. */
10368 while (cur_uops != PPRO_UOPS_FEW)
10370 if (insnp == ready)
10372 cur_uops = ix86_safe_ppro_uops (*--insnp);
10375 /* If so, move it to the head of the line. */
10376 if (cur_uops == PPRO_UOPS_FEW)
10377 ix86_reorder_insn (insnp, e_ready);
10379 /* Issue the head of the queue. */
10380 issued_this_cycle = 1;
10381 decode[0] = *e_ready--;
10384 /* Look for simple insns to fill in the other two slots. */
10385 for (i = 1; i < 3; ++i)
10386 if (decode[i] == NULL)
10388 if (ready >= e_ready)
10392 cur_uops = ix86_safe_ppro_uops (*insnp);
10393 while (cur_uops != PPRO_UOPS_ONE)
10395 if (insnp == ready)
10397 cur_uops = ix86_safe_ppro_uops (*--insnp);
10400 /* Found one. Move it to the head of the queue and issue it. */
10401 if (cur_uops == PPRO_UOPS_ONE)
10403 ix86_reorder_insn (insnp, e_ready);
10404 decode[i] = *e_ready--;
10405 issued_this_cycle++;
10409 /* ??? Didn't find one. Ideally, here we would do a lazy split
10410 of 2-uop insns, issue one and queue the other. */
10414 if (issued_this_cycle == 0)
10415 issued_this_cycle = 1;
10416 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10419 /* We are about to being issuing insns for this clock cycle.
10420 Override the default sort algorithm to better slot instructions. */
10422 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10423 FILE *dump ATTRIBUTE_UNUSED;
10424 int sched_verbose ATTRIBUTE_UNUSED;
10427 int clock_var ATTRIBUTE_UNUSED;
10429 int n_ready = *n_readyp;
10430 rtx *e_ready = ready + n_ready - 1;
10440 case PROCESSOR_PENTIUMPRO:
10441 ix86_sched_reorder_ppro (ready, e_ready);
10446 return ix86_issue_rate ();
10449 /* We are about to issue INSN. Return the number of insns left on the
10450 ready queue that can be issued this cycle. */
10453 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10457 int can_issue_more;
10463 return can_issue_more - 1;
10465 case PROCESSOR_PENTIUMPRO:
10467 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10469 if (uops == PPRO_UOPS_MANY)
10472 ix86_dump_ppro_packet (dump);
10473 ix86_sched_data.ppro.decode[0] = insn;
10474 ix86_sched_data.ppro.decode[1] = NULL;
10475 ix86_sched_data.ppro.decode[2] = NULL;
10477 ix86_dump_ppro_packet (dump);
10478 ix86_sched_data.ppro.decode[0] = NULL;
10480 else if (uops == PPRO_UOPS_FEW)
10483 ix86_dump_ppro_packet (dump);
10484 ix86_sched_data.ppro.decode[0] = insn;
10485 ix86_sched_data.ppro.decode[1] = NULL;
10486 ix86_sched_data.ppro.decode[2] = NULL;
10490 for (i = 0; i < 3; ++i)
10491 if (ix86_sched_data.ppro.decode[i] == NULL)
10493 ix86_sched_data.ppro.decode[i] = insn;
10501 ix86_dump_ppro_packet (dump);
10502 ix86_sched_data.ppro.decode[0] = NULL;
10503 ix86_sched_data.ppro.decode[1] = NULL;
10504 ix86_sched_data.ppro.decode[2] = NULL;
10508 return --ix86_sched_data.ppro.issued_this_cycle;
10513 ia32_use_dfa_pipeline_interface ()
10515 if (ix86_cpu == PROCESSOR_PENTIUM)
10520 /* How many alternative schedules to try. This should be as wide as the
10521 scheduling freedom in the DFA, but no wider. Making this value too
10522 large results extra work for the scheduler. */
10525 ia32_multipass_dfa_lookahead ()
10527 if (ix86_cpu == PROCESSOR_PENTIUM)
10534 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10535 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10539 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10541 rtx dstref, srcref, dstreg, srcreg;
10545 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10547 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10551 /* Subroutine of above to actually do the updating by recursively walking
10555 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10557 rtx dstref, srcref, dstreg, srcreg;
10559 enum rtx_code code = GET_CODE (x);
10560 const char *format_ptr = GET_RTX_FORMAT (code);
10563 if (code == MEM && XEXP (x, 0) == dstreg)
10564 MEM_COPY_ATTRIBUTES (x, dstref);
10565 else if (code == MEM && XEXP (x, 0) == srcreg)
10566 MEM_COPY_ATTRIBUTES (x, srcref);
10568 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10570 if (*format_ptr == 'e')
10571 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10573 else if (*format_ptr == 'E')
10574 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10575 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10580 /* Compute the alignment given to a constant that is being placed in memory.
10581 EXP is the constant and ALIGN is the alignment that the object would
10583 The value of this function is used instead of that alignment to align
10587 ix86_constant_alignment (exp, align)
10591 if (TREE_CODE (exp) == REAL_CST)
10593 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10595 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10598 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10605 /* Compute the alignment for a static variable.
10606 TYPE is the data type, and ALIGN is the alignment that
10607 the object would ordinarily have. The value of this function is used
10608 instead of that alignment to align the object. */
10611 ix86_data_alignment (type, align)
10615 if (AGGREGATE_TYPE_P (type)
10616 && TYPE_SIZE (type)
10617 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10618 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10619 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10622 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10623 to 16byte boundary. */
10626 if (AGGREGATE_TYPE_P (type)
10627 && TYPE_SIZE (type)
10628 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10629 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10630 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10634 if (TREE_CODE (type) == ARRAY_TYPE)
10636 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10638 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10641 else if (TREE_CODE (type) == COMPLEX_TYPE)
10644 if (TYPE_MODE (type) == DCmode && align < 64)
10646 if (TYPE_MODE (type) == XCmode && align < 128)
10649 else if ((TREE_CODE (type) == RECORD_TYPE
10650 || TREE_CODE (type) == UNION_TYPE
10651 || TREE_CODE (type) == QUAL_UNION_TYPE)
10652 && TYPE_FIELDS (type))
10654 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10656 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10659 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10660 || TREE_CODE (type) == INTEGER_TYPE)
10662 if (TYPE_MODE (type) == DFmode && align < 64)
10664 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10671 /* Compute the alignment for a local variable.
10672 TYPE is the data type, and ALIGN is the alignment that
10673 the object would ordinarily have. The value of this macro is used
10674 instead of that alignment to align the object. */
10677 ix86_local_alignment (type, align)
10681 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10682 to 16byte boundary. */
10685 if (AGGREGATE_TYPE_P (type)
10686 && TYPE_SIZE (type)
10687 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10688 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10689 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10692 if (TREE_CODE (type) == ARRAY_TYPE)
10694 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10696 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10699 else if (TREE_CODE (type) == COMPLEX_TYPE)
10701 if (TYPE_MODE (type) == DCmode && align < 64)
10703 if (TYPE_MODE (type) == XCmode && align < 128)
10706 else if ((TREE_CODE (type) == RECORD_TYPE
10707 || TREE_CODE (type) == UNION_TYPE
10708 || TREE_CODE (type) == QUAL_UNION_TYPE)
10709 && TYPE_FIELDS (type))
10711 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10713 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10716 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10717 || TREE_CODE (type) == INTEGER_TYPE)
10720 if (TYPE_MODE (type) == DFmode && align < 64)
10722 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10728 /* Emit RTL insns to initialize the variable parts of a trampoline.
10729 FNADDR is an RTX for the address of the function's pure code.
10730 CXT is an RTX for the static chain value for the function. */
10732 x86_initialize_trampoline (tramp, fnaddr, cxt)
10733 rtx tramp, fnaddr, cxt;
10737 /* Compute offset from the end of the jmp to the target function. */
10738 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10739 plus_constant (tramp, 10),
10740 NULL_RTX, 1, OPTAB_DIRECT);
10741 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10742 gen_int_mode (0xb9, QImode));
10743 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10744 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10745 gen_int_mode (0xe9, QImode));
10746 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10751 /* Try to load address using shorter movl instead of movabs.
10752 We may want to support movq for kernel mode, but kernel does not use
10753 trampolines at the moment. */
10754 if (x86_64_zero_extended_value (fnaddr))
10756 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10757 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10758 gen_int_mode (0xbb41, HImode));
10759 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10760 gen_lowpart (SImode, fnaddr));
10765 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10766 gen_int_mode (0xbb49, HImode));
10767 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10771 /* Load static chain using movabs to r10. */
10772 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10773 gen_int_mode (0xba49, HImode));
10774 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10777 /* Jump to the r11 */
10778 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10779 gen_int_mode (0xff49, HImode));
10780 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10781 gen_int_mode (0xe3, QImode));
10783 if (offset > TRAMPOLINE_SIZE)
10788 #define def_builtin(MASK, NAME, TYPE, CODE) \
10790 if ((MASK) & target_flags) \
10791 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10794 struct builtin_description
10796 const unsigned int mask;
10797 const enum insn_code icode;
10798 const char *const name;
10799 const enum ix86_builtins code;
10800 const enum rtx_code comparison;
10801 const unsigned int flag;
10804 /* Used for builtins that are enabled both by -msse and -msse2. */
10805 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
10807 static const struct builtin_description bdesc_comi[] =
10809 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10810 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10811 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10812 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10813 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10814 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10815 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10816 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10817 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10818 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10819 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10820 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
10821 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
10822 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
10823 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
10824 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
10825 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
10826 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
10827 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
10828 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
10829 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
10830 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
10831 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
10832 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
10835 static const struct builtin_description bdesc_2arg[] =
10838 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10839 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10840 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10841 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10842 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10843 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10844 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10845 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10847 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10848 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10849 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10850 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10851 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10852 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10853 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10854 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10855 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10856 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10857 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10858 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10859 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10860 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10861 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10862 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10863 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10864 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10865 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10866 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10867 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10868 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10869 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10870 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10872 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10873 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10874 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10875 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10877 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10878 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10879 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10880 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10881 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10884 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10885 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10886 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10887 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10888 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10889 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10891 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10892 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10893 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10894 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10895 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10896 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10897 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10898 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10900 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10901 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10902 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10904 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10905 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10906 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10907 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10909 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10910 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10912 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10913 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10914 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10915 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10916 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10917 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10919 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10920 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10921 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10922 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10924 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10925 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10926 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10927 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10928 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10929 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10932 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10933 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10934 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10936 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10937 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10939 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10940 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10941 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10942 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10943 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10944 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10946 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10947 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10948 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10949 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10950 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10951 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10953 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10954 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10955 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10956 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10958 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10959 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
10962 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
10963 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
10964 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
10965 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
10966 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
10967 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
10968 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
10969 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
10971 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
10972 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
10973 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
10974 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
10975 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
10976 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
10977 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
10978 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
10979 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
10980 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
10981 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
10982 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
10983 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
10984 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
10985 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
10986 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
10987 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
10988 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
10989 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
10990 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
10991 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
10992 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
10993 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
10994 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
10996 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
10997 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
10998 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
10999 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11001 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11002 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11003 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11004 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11006 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11007 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11008 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11011 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11012 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11013 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11014 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11015 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11016 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11017 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11018 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11020 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11021 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11022 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11023 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11024 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11025 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11026 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11027 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11029 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11030 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11031 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11032 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11034 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11035 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11036 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11037 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11039 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11040 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11042 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11043 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11044 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11045 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11046 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11047 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11049 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11050 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11051 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11052 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11054 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11055 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11056 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11057 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11058 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11059 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11061 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11062 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11063 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11065 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11066 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11068 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11069 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11070 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11071 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11072 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11073 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11075 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11076 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11077 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11078 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11079 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11080 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11082 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11083 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11084 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11085 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11087 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11089 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11090 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11091 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11094 static const struct builtin_description bdesc_1arg[] =
11096 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11097 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11099 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11100 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11101 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11103 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11104 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11105 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11106 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11108 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11109 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11110 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11112 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11114 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11115 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11117 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11118 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11119 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11120 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11121 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11123 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11125 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11126 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11128 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11129 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11130 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11134 ix86_init_builtins ()
11137 ix86_init_mmx_sse_builtins ();
11140 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11141 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11144 ix86_init_mmx_sse_builtins ()
11146 const struct builtin_description * d;
11148 tree endlink = void_list_node;
11150 tree pchar_type_node = build_pointer_type (char_type_node);
11151 tree pfloat_type_node = build_pointer_type (float_type_node);
11152 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11153 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11154 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11157 tree int_ftype_v4sf_v4sf
11158 = build_function_type (integer_type_node,
11159 tree_cons (NULL_TREE, V4SF_type_node,
11160 tree_cons (NULL_TREE,
11163 tree v4si_ftype_v4sf_v4sf
11164 = build_function_type (V4SI_type_node,
11165 tree_cons (NULL_TREE, V4SF_type_node,
11166 tree_cons (NULL_TREE,
11169 /* MMX/SSE/integer conversions. */
11170 tree int_ftype_v4sf
11171 = build_function_type (integer_type_node,
11172 tree_cons (NULL_TREE, V4SF_type_node,
11174 tree int_ftype_v8qi
11175 = build_function_type (integer_type_node,
11176 tree_cons (NULL_TREE, V8QI_type_node,
11178 tree v4sf_ftype_v4sf_int
11179 = build_function_type (V4SF_type_node,
11180 tree_cons (NULL_TREE, V4SF_type_node,
11181 tree_cons (NULL_TREE, integer_type_node,
11183 tree v4sf_ftype_v4sf_v2si
11184 = build_function_type (V4SF_type_node,
11185 tree_cons (NULL_TREE, V4SF_type_node,
11186 tree_cons (NULL_TREE, V2SI_type_node,
11188 tree int_ftype_v4hi_int
11189 = build_function_type (integer_type_node,
11190 tree_cons (NULL_TREE, V4HI_type_node,
11191 tree_cons (NULL_TREE, integer_type_node,
11193 tree v4hi_ftype_v4hi_int_int
11194 = build_function_type (V4HI_type_node,
11195 tree_cons (NULL_TREE, V4HI_type_node,
11196 tree_cons (NULL_TREE, integer_type_node,
11197 tree_cons (NULL_TREE,
11200 /* Miscellaneous. */
11201 tree v8qi_ftype_v4hi_v4hi
11202 = build_function_type (V8QI_type_node,
11203 tree_cons (NULL_TREE, V4HI_type_node,
11204 tree_cons (NULL_TREE, V4HI_type_node,
11206 tree v4hi_ftype_v2si_v2si
11207 = build_function_type (V4HI_type_node,
11208 tree_cons (NULL_TREE, V2SI_type_node,
11209 tree_cons (NULL_TREE, V2SI_type_node,
11211 tree v4sf_ftype_v4sf_v4sf_int
11212 = build_function_type (V4SF_type_node,
11213 tree_cons (NULL_TREE, V4SF_type_node,
11214 tree_cons (NULL_TREE, V4SF_type_node,
11215 tree_cons (NULL_TREE,
11218 tree v2si_ftype_v4hi_v4hi
11219 = build_function_type (V2SI_type_node,
11220 tree_cons (NULL_TREE, V4HI_type_node,
11221 tree_cons (NULL_TREE, V4HI_type_node,
11223 tree v4hi_ftype_v4hi_int
11224 = build_function_type (V4HI_type_node,
11225 tree_cons (NULL_TREE, V4HI_type_node,
11226 tree_cons (NULL_TREE, integer_type_node,
11228 tree v4hi_ftype_v4hi_di
11229 = build_function_type (V4HI_type_node,
11230 tree_cons (NULL_TREE, V4HI_type_node,
11231 tree_cons (NULL_TREE,
11232 long_long_integer_type_node,
11234 tree v2si_ftype_v2si_di
11235 = build_function_type (V2SI_type_node,
11236 tree_cons (NULL_TREE, V2SI_type_node,
11237 tree_cons (NULL_TREE,
11238 long_long_integer_type_node,
11240 tree void_ftype_void
11241 = build_function_type (void_type_node, endlink);
11242 tree void_ftype_unsigned
11243 = build_function_type (void_type_node,
11244 tree_cons (NULL_TREE, unsigned_type_node,
11246 tree unsigned_ftype_void
11247 = build_function_type (unsigned_type_node, endlink);
11249 = build_function_type (long_long_unsigned_type_node, endlink);
11250 tree v4sf_ftype_void
11251 = build_function_type (V4SF_type_node, endlink);
11252 tree v2si_ftype_v4sf
11253 = build_function_type (V2SI_type_node,
11254 tree_cons (NULL_TREE, V4SF_type_node,
11256 /* Loads/stores. */
11257 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11258 tree_cons (NULL_TREE, V8QI_type_node,
11259 tree_cons (NULL_TREE,
11262 tree void_ftype_v8qi_v8qi_pchar
11263 = build_function_type (void_type_node, maskmovq_args);
11264 tree v4sf_ftype_pfloat
11265 = build_function_type (V4SF_type_node,
11266 tree_cons (NULL_TREE, pfloat_type_node,
11268 /* @@@ the type is bogus */
11269 tree v4sf_ftype_v4sf_pv2si
11270 = build_function_type (V4SF_type_node,
11271 tree_cons (NULL_TREE, V4SF_type_node,
11272 tree_cons (NULL_TREE, pv2si_type_node,
11274 tree void_ftype_pv2si_v4sf
11275 = build_function_type (void_type_node,
11276 tree_cons (NULL_TREE, pv2si_type_node,
11277 tree_cons (NULL_TREE, V4SF_type_node,
11279 tree void_ftype_pfloat_v4sf
11280 = build_function_type (void_type_node,
11281 tree_cons (NULL_TREE, pfloat_type_node,
11282 tree_cons (NULL_TREE, V4SF_type_node,
11284 tree void_ftype_pdi_di
11285 = build_function_type (void_type_node,
11286 tree_cons (NULL_TREE, pdi_type_node,
11287 tree_cons (NULL_TREE,
11288 long_long_unsigned_type_node,
11290 tree void_ftype_pv2di_v2di
11291 = build_function_type (void_type_node,
11292 tree_cons (NULL_TREE, pv2di_type_node,
11293 tree_cons (NULL_TREE,
11296 /* Normal vector unops. */
11297 tree v4sf_ftype_v4sf
11298 = build_function_type (V4SF_type_node,
11299 tree_cons (NULL_TREE, V4SF_type_node,
11302 /* Normal vector binops. */
11303 tree v4sf_ftype_v4sf_v4sf
11304 = build_function_type (V4SF_type_node,
11305 tree_cons (NULL_TREE, V4SF_type_node,
11306 tree_cons (NULL_TREE, V4SF_type_node,
11308 tree v8qi_ftype_v8qi_v8qi
11309 = build_function_type (V8QI_type_node,
11310 tree_cons (NULL_TREE, V8QI_type_node,
11311 tree_cons (NULL_TREE, V8QI_type_node,
11313 tree v4hi_ftype_v4hi_v4hi
11314 = build_function_type (V4HI_type_node,
11315 tree_cons (NULL_TREE, V4HI_type_node,
11316 tree_cons (NULL_TREE, V4HI_type_node,
11318 tree v2si_ftype_v2si_v2si
11319 = build_function_type (V2SI_type_node,
11320 tree_cons (NULL_TREE, V2SI_type_node,
11321 tree_cons (NULL_TREE, V2SI_type_node,
11323 tree di_ftype_di_di
11324 = build_function_type (long_long_unsigned_type_node,
11325 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11326 tree_cons (NULL_TREE,
11327 long_long_unsigned_type_node,
11330 tree v2si_ftype_v2sf
11331 = build_function_type (V2SI_type_node,
11332 tree_cons (NULL_TREE, V2SF_type_node,
11334 tree v2sf_ftype_v2si
11335 = build_function_type (V2SF_type_node,
11336 tree_cons (NULL_TREE, V2SI_type_node,
11338 tree v2si_ftype_v2si
11339 = build_function_type (V2SI_type_node,
11340 tree_cons (NULL_TREE, V2SI_type_node,
11342 tree v2sf_ftype_v2sf
11343 = build_function_type (V2SF_type_node,
11344 tree_cons (NULL_TREE, V2SF_type_node,
11346 tree v2sf_ftype_v2sf_v2sf
11347 = build_function_type (V2SF_type_node,
11348 tree_cons (NULL_TREE, V2SF_type_node,
11349 tree_cons (NULL_TREE,
11352 tree v2si_ftype_v2sf_v2sf
11353 = build_function_type (V2SI_type_node,
11354 tree_cons (NULL_TREE, V2SF_type_node,
11355 tree_cons (NULL_TREE,
11358 tree pint_type_node = build_pointer_type (integer_type_node);
11359 tree pdouble_type_node = build_pointer_type (double_type_node);
11360 tree int_ftype_v2df_v2df
11361 = build_function_type (integer_type_node,
11362 tree_cons (NULL_TREE, V2DF_type_node,
11363 tree_cons (NULL_TREE, V2DF_type_node, endlink)));
11366 = build_function_type (intTI_type_node, endlink);
11367 tree ti_ftype_ti_ti
11368 = build_function_type (intTI_type_node,
11369 tree_cons (NULL_TREE, intTI_type_node,
11370 tree_cons (NULL_TREE, intTI_type_node,
11372 tree void_ftype_pvoid
11373 = build_function_type (void_type_node,
11374 tree_cons (NULL_TREE, ptr_type_node, endlink));
11376 = build_function_type (V2DI_type_node,
11377 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11379 tree v4sf_ftype_v4si
11380 = build_function_type (V4SF_type_node,
11381 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11382 tree v4si_ftype_v4sf
11383 = build_function_type (V4SI_type_node,
11384 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11385 tree v2df_ftype_v4si
11386 = build_function_type (V2DF_type_node,
11387 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11388 tree v4si_ftype_v2df
11389 = build_function_type (V4SI_type_node,
11390 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11391 tree v2si_ftype_v2df
11392 = build_function_type (V2SI_type_node,
11393 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11394 tree v4sf_ftype_v2df
11395 = build_function_type (V4SF_type_node,
11396 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11397 tree v2df_ftype_v2si
11398 = build_function_type (V2DF_type_node,
11399 tree_cons (NULL_TREE, V2SI_type_node, endlink));
11400 tree v2df_ftype_v4sf
11401 = build_function_type (V2DF_type_node,
11402 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11403 tree int_ftype_v2df
11404 = build_function_type (integer_type_node,
11405 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11406 tree v2df_ftype_v2df_int
11407 = build_function_type (V2DF_type_node,
11408 tree_cons (NULL_TREE, V2DF_type_node,
11409 tree_cons (NULL_TREE, integer_type_node,
11411 tree v4sf_ftype_v4sf_v2df
11412 = build_function_type (V4SF_type_node,
11413 tree_cons (NULL_TREE, V4SF_type_node,
11414 tree_cons (NULL_TREE, V2DF_type_node,
11416 tree v2df_ftype_v2df_v4sf
11417 = build_function_type (V2DF_type_node,
11418 tree_cons (NULL_TREE, V2DF_type_node,
11419 tree_cons (NULL_TREE, V4SF_type_node,
11421 tree v2df_ftype_v2df_v2df_int
11422 = build_function_type (V2DF_type_node,
11423 tree_cons (NULL_TREE, V2DF_type_node,
11424 tree_cons (NULL_TREE, V2DF_type_node,
11425 tree_cons (NULL_TREE,
11428 tree v2df_ftype_v2df_pv2si
11429 = build_function_type (V2DF_type_node,
11430 tree_cons (NULL_TREE, V2DF_type_node,
11431 tree_cons (NULL_TREE, pv2si_type_node,
11433 tree void_ftype_pv2si_v2df
11434 = build_function_type (void_type_node,
11435 tree_cons (NULL_TREE, pv2si_type_node,
11436 tree_cons (NULL_TREE, V2DF_type_node,
11438 tree void_ftype_pdouble_v2df
11439 = build_function_type (void_type_node,
11440 tree_cons (NULL_TREE, pdouble_type_node,
11441 tree_cons (NULL_TREE, V2DF_type_node,
11443 tree void_ftype_pint_int
11444 = build_function_type (void_type_node,
11445 tree_cons (NULL_TREE, pint_type_node,
11446 tree_cons (NULL_TREE, integer_type_node,
11448 tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
11449 tree_cons (NULL_TREE, V16QI_type_node,
11450 tree_cons (NULL_TREE,
11453 tree void_ftype_v16qi_v16qi_pchar
11454 = build_function_type (void_type_node, maskmovdqu_args);
11455 tree v2df_ftype_pdouble
11456 = build_function_type (V2DF_type_node,
11457 tree_cons (NULL_TREE, pdouble_type_node,
11459 tree v2df_ftype_v2df_v2df
11460 = build_function_type (V2DF_type_node,
11461 tree_cons (NULL_TREE, V2DF_type_node,
11462 tree_cons (NULL_TREE, V2DF_type_node,
11464 tree v16qi_ftype_v16qi_v16qi
11465 = build_function_type (V16QI_type_node,
11466 tree_cons (NULL_TREE, V16QI_type_node,
11467 tree_cons (NULL_TREE, V16QI_type_node,
11469 tree v8hi_ftype_v8hi_v8hi
11470 = build_function_type (V8HI_type_node,
11471 tree_cons (NULL_TREE, V8HI_type_node,
11472 tree_cons (NULL_TREE, V8HI_type_node,
11474 tree v4si_ftype_v4si_v4si
11475 = build_function_type (V4SI_type_node,
11476 tree_cons (NULL_TREE, V4SI_type_node,
11477 tree_cons (NULL_TREE, V4SI_type_node,
11479 tree v2di_ftype_v2di_v2di
11480 = build_function_type (V2DI_type_node,
11481 tree_cons (NULL_TREE, V2DI_type_node,
11482 tree_cons (NULL_TREE, V2DI_type_node,
11484 tree v2di_ftype_v2df_v2df
11485 = build_function_type (V2DI_type_node,
11486 tree_cons (NULL_TREE, V2DF_type_node,
11487 tree_cons (NULL_TREE, V2DF_type_node,
11489 tree v2df_ftype_v2df
11490 = build_function_type (V2DF_type_node,
11491 tree_cons (NULL_TREE, V2DF_type_node,
11493 tree v2df_ftype_double
11494 = build_function_type (V2DF_type_node,
11495 tree_cons (NULL_TREE, double_type_node,
11497 tree v2df_ftype_double_double
11498 = build_function_type (V2DF_type_node,
11499 tree_cons (NULL_TREE, double_type_node,
11500 tree_cons (NULL_TREE, double_type_node,
11502 tree int_ftype_v8hi_int
11503 = build_function_type (integer_type_node,
11504 tree_cons (NULL_TREE, V8HI_type_node,
11505 tree_cons (NULL_TREE, integer_type_node,
11507 tree v8hi_ftype_v8hi_int_int
11508 = build_function_type (V8HI_type_node,
11509 tree_cons (NULL_TREE, V8HI_type_node,
11510 tree_cons (NULL_TREE, integer_type_node,
11511 tree_cons (NULL_TREE,
11514 tree v2di_ftype_v2di_int
11515 = build_function_type (V2DI_type_node,
11516 tree_cons (NULL_TREE, V2DI_type_node,
11517 tree_cons (NULL_TREE, integer_type_node,
11519 tree v4si_ftype_v4si_int
11520 = build_function_type (V4SI_type_node,
11521 tree_cons (NULL_TREE, V4SI_type_node,
11522 tree_cons (NULL_TREE, integer_type_node,
11524 tree v8hi_ftype_v8hi_int
11525 = build_function_type (V8HI_type_node,
11526 tree_cons (NULL_TREE, V8HI_type_node,
11527 tree_cons (NULL_TREE, integer_type_node,
11529 tree v8hi_ftype_v8hi_v2di
11530 = build_function_type (V8HI_type_node,
11531 tree_cons (NULL_TREE, V8HI_type_node,
11532 tree_cons (NULL_TREE, V2DI_type_node,
11534 tree v4si_ftype_v4si_v2di
11535 = build_function_type (V4SI_type_node,
11536 tree_cons (NULL_TREE, V4SI_type_node,
11537 tree_cons (NULL_TREE, V2DI_type_node,
11539 tree v4si_ftype_v8hi_v8hi
11540 = build_function_type (V4SI_type_node,
11541 tree_cons (NULL_TREE, V8HI_type_node,
11542 tree_cons (NULL_TREE, V8HI_type_node,
11544 tree di_ftype_v8qi_v8qi
11545 = build_function_type (long_long_unsigned_type_node,
11546 tree_cons (NULL_TREE, V8QI_type_node,
11547 tree_cons (NULL_TREE, V8QI_type_node,
11549 tree v2di_ftype_v16qi_v16qi
11550 = build_function_type (V2DI_type_node,
11551 tree_cons (NULL_TREE, V16QI_type_node,
11552 tree_cons (NULL_TREE, V16QI_type_node,
11554 tree int_ftype_v16qi
11555 = build_function_type (integer_type_node,
11556 tree_cons (NULL_TREE, V16QI_type_node, endlink));
11558 /* Add all builtins that are more or less simple operations on two
11560 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11562 /* Use one of the operands; the target can have a different mode for
11563 mask-generating compares. */
11564 enum machine_mode mode;
11569 mode = insn_data[d->icode].operand[1].mode;
11574 type = v16qi_ftype_v16qi_v16qi;
11577 type = v8hi_ftype_v8hi_v8hi;
11580 type = v4si_ftype_v4si_v4si;
11583 type = v2di_ftype_v2di_v2di;
11586 type = v2df_ftype_v2df_v2df;
11589 type = ti_ftype_ti_ti;
11592 type = v4sf_ftype_v4sf_v4sf;
11595 type = v8qi_ftype_v8qi_v8qi;
11598 type = v4hi_ftype_v4hi_v4hi;
11601 type = v2si_ftype_v2si_v2si;
11604 type = di_ftype_di_di;
11611 /* Override for comparisons. */
11612 if (d->icode == CODE_FOR_maskcmpv4sf3
11613 || d->icode == CODE_FOR_maskncmpv4sf3
11614 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11615 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11616 type = v4si_ftype_v4sf_v4sf;
11618 if (d->icode == CODE_FOR_maskcmpv2df3
11619 || d->icode == CODE_FOR_maskncmpv2df3
11620 || d->icode == CODE_FOR_vmmaskcmpv2df3
11621 || d->icode == CODE_FOR_vmmaskncmpv2df3)
11622 type = v2di_ftype_v2df_v2df;
11624 def_builtin (d->mask, d->name, type, d->code);
11627 /* Add the remaining MMX insns with somewhat more complicated types. */
11628 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11629 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11630 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11631 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11632 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11633 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11634 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11636 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11637 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11638 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11640 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11641 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11643 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11644 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11646 /* comi/ucomi insns. */
11647 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
11648 if (d->mask == MASK_SSE2)
11649 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
11651 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11653 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11654 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11655 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11657 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11658 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11659 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11660 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11661 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11662 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11664 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11665 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11666 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11667 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11669 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11670 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11672 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11674 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11675 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11676 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11677 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11678 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11679 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11681 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11682 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11683 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11684 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11686 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11687 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11688 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11689 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11691 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11693 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11695 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11696 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11697 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11698 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11699 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11700 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11702 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11704 /* Original 3DNow! */
11705 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11706 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11707 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11708 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11709 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11710 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11711 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11712 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11713 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11714 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11715 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11716 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11717 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11718 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11719 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11720 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11721 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11722 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11723 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11724 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11726 /* 3DNow! extension as used in the Athlon CPU. */
11727 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11728 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11729 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11730 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11731 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11732 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11734 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11737 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
11738 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
11740 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
11741 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
11743 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
11744 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
11745 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
11746 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
11747 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
11748 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
11750 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
11751 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
11752 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
11753 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
11755 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
11756 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
11757 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
11758 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
11759 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
11761 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
11762 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
11763 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
11764 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
11766 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
11767 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
11769 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
11771 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
11772 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
11774 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
11775 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
11776 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
11777 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
11778 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
11780 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
11782 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
11783 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
11785 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
11786 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
11787 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
11789 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
11790 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
11791 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
11793 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
11794 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
11795 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
11796 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
11797 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
11798 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
11799 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
11801 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
11802 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
11803 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
11805 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
11806 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
11807 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
11809 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
11810 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
11811 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
11813 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
11814 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
11816 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
11817 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
11818 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
11820 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
11821 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
11822 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
11824 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
11825 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
11827 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
11830 /* Errors in the source file can cause expand_expr to return const0_rtx
11831 where we expect a vector. To avoid crashing, use one of the vector
11832 clear instructions. */
11834 safe_vector_operand (x, mode)
11836 enum machine_mode mode;
11838 if (x != const0_rtx)
11840 x = gen_reg_rtx (mode);
11842 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11843 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11844 : gen_rtx_SUBREG (DImode, x, 0)));
11846 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11847 : gen_rtx_SUBREG (V4SFmode, x, 0)));
11851 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11854 ix86_expand_binop_builtin (icode, arglist, target)
11855 enum insn_code icode;
11860 tree arg0 = TREE_VALUE (arglist);
11861 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11862 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11863 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11864 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11865 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11866 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11868 if (VECTOR_MODE_P (mode0))
11869 op0 = safe_vector_operand (op0, mode0);
11870 if (VECTOR_MODE_P (mode1))
11871 op1 = safe_vector_operand (op1, mode1);
11874 || GET_MODE (target) != tmode
11875 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11876 target = gen_reg_rtx (tmode);
11878 /* In case the insn wants input operands in modes different from
11879 the result, abort. */
11880 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11883 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11884 op0 = copy_to_mode_reg (mode0, op0);
11885 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11886 op1 = copy_to_mode_reg (mode1, op1);
11888 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11889 yet one of the two must not be a memory. This is normally enforced
11890 by expanders, but we didn't bother to create one here. */
11891 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11892 op0 = copy_to_mode_reg (mode0, op0);
11894 pat = GEN_FCN (icode) (target, op0, op1);
11901 /* In type_for_mode we restrict the ability to create TImode types
11902 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11903 to have a V4SFmode signature. Convert them in-place to TImode. */
11906 ix86_expand_timode_binop_builtin (icode, arglist, target)
11907 enum insn_code icode;
11912 tree arg0 = TREE_VALUE (arglist);
11913 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11914 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11915 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11917 op0 = gen_lowpart (TImode, op0);
11918 op1 = gen_lowpart (TImode, op1);
11919 target = gen_reg_rtx (TImode);
11921 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11922 op0 = copy_to_mode_reg (TImode, op0);
11923 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11924 op1 = copy_to_mode_reg (TImode, op1);
11926 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11927 yet one of the two must not be a memory. This is normally enforced
11928 by expanders, but we didn't bother to create one here. */
11929 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11930 op0 = copy_to_mode_reg (TImode, op0);
11932 pat = GEN_FCN (icode) (target, op0, op1);
11937 return gen_lowpart (V4SFmode, target);
11940 /* Subroutine of ix86_expand_builtin to take care of stores. */
11943 ix86_expand_store_builtin (icode, arglist)
11944 enum insn_code icode;
11948 tree arg0 = TREE_VALUE (arglist);
11949 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11950 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11951 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11952 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11953 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11955 if (VECTOR_MODE_P (mode1))
11956 op1 = safe_vector_operand (op1, mode1);
11958 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11960 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11961 op1 = copy_to_mode_reg (mode1, op1);
11963 pat = GEN_FCN (icode) (op0, op1);
11969 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11972 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11973 enum insn_code icode;
11979 tree arg0 = TREE_VALUE (arglist);
11980 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11981 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11982 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11985 || GET_MODE (target) != tmode
11986 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11987 target = gen_reg_rtx (tmode);
11989 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11992 if (VECTOR_MODE_P (mode0))
11993 op0 = safe_vector_operand (op0, mode0);
11995 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11996 op0 = copy_to_mode_reg (mode0, op0);
11999 pat = GEN_FCN (icode) (target, op0);
12006 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12007 sqrtss, rsqrtss, rcpss. */
12010 ix86_expand_unop1_builtin (icode, arglist, target)
12011 enum insn_code icode;
12016 tree arg0 = TREE_VALUE (arglist);
12017 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12018 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12019 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12022 || GET_MODE (target) != tmode
12023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12024 target = gen_reg_rtx (tmode);
12026 if (VECTOR_MODE_P (mode0))
12027 op0 = safe_vector_operand (op0, mode0);
12029 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12030 op0 = copy_to_mode_reg (mode0, op0);
12033 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12034 op1 = copy_to_mode_reg (mode0, op1);
12036 pat = GEN_FCN (icode) (target, op0, op1);
12043 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12046 ix86_expand_sse_compare (d, arglist, target)
12047 const struct builtin_description *d;
12052 tree arg0 = TREE_VALUE (arglist);
12053 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12054 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12055 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12057 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12058 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12059 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12060 enum rtx_code comparison = d->comparison;
12062 if (VECTOR_MODE_P (mode0))
12063 op0 = safe_vector_operand (op0, mode0);
12064 if (VECTOR_MODE_P (mode1))
12065 op1 = safe_vector_operand (op1, mode1);
12067 /* Swap operands if we have a comparison that isn't available in
12071 rtx tmp = gen_reg_rtx (mode1);
12072 emit_move_insn (tmp, op1);
12078 || GET_MODE (target) != tmode
12079 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12080 target = gen_reg_rtx (tmode);
12082 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12083 op0 = copy_to_mode_reg (mode0, op0);
12084 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12085 op1 = copy_to_mode_reg (mode1, op1);
12087 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12088 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12095 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12098 ix86_expand_sse_comi (d, arglist, target)
12099 const struct builtin_description *d;
12104 tree arg0 = TREE_VALUE (arglist);
12105 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12106 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12107 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12109 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12110 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12111 enum rtx_code comparison = d->comparison;
12113 if (VECTOR_MODE_P (mode0))
12114 op0 = safe_vector_operand (op0, mode0);
12115 if (VECTOR_MODE_P (mode1))
12116 op1 = safe_vector_operand (op1, mode1);
12118 /* Swap operands if we have a comparison that isn't available in
12127 target = gen_reg_rtx (SImode);
12128 emit_move_insn (target, const0_rtx);
12129 target = gen_rtx_SUBREG (QImode, target, 0);
12131 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12132 op0 = copy_to_mode_reg (mode0, op0);
12133 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12134 op1 = copy_to_mode_reg (mode1, op1);
12136 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12137 pat = GEN_FCN (d->icode) (op0, op1, op2);
12141 emit_insn (gen_rtx_SET (VOIDmode,
12142 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12143 gen_rtx_fmt_ee (comparison, QImode,
12144 gen_rtx_REG (CCmode, FLAGS_REG),
12147 return SUBREG_REG (target);
12150 /* Expand an expression EXP that calls a built-in function,
12151 with result going to TARGET if that's convenient
12152 (and in mode MODE if that's convenient).
12153 SUBTARGET may be used as the target for computing one of EXP's operands.
12154 IGNORE is nonzero if the value is to be ignored. */
12157 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12160 rtx subtarget ATTRIBUTE_UNUSED;
12161 enum machine_mode mode ATTRIBUTE_UNUSED;
12162 int ignore ATTRIBUTE_UNUSED;
12164 const struct builtin_description *d;
12166 enum insn_code icode;
12167 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12168 tree arglist = TREE_OPERAND (exp, 1);
12169 tree arg0, arg1, arg2;
12170 rtx op0, op1, op2, pat;
12171 enum machine_mode tmode, mode0, mode1, mode2;
12172 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12176 case IX86_BUILTIN_EMMS:
12177 emit_insn (gen_emms ());
12180 case IX86_BUILTIN_SFENCE:
12181 emit_insn (gen_sfence ());
12184 case IX86_BUILTIN_PEXTRW:
12185 case IX86_BUILTIN_PEXTRW128:
12186 icode = (fcode == IX86_BUILTIN_PEXTRW
12187 ? CODE_FOR_mmx_pextrw
12188 : CODE_FOR_sse2_pextrw);
12189 arg0 = TREE_VALUE (arglist);
12190 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12191 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12192 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12193 tmode = insn_data[icode].operand[0].mode;
12194 mode0 = insn_data[icode].operand[1].mode;
12195 mode1 = insn_data[icode].operand[2].mode;
12197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12198 op0 = copy_to_mode_reg (mode0, op0);
12199 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12201 /* @@@ better error message */
12202 error ("selector must be an immediate");
12203 return gen_reg_rtx (tmode);
12206 || GET_MODE (target) != tmode
12207 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12208 target = gen_reg_rtx (tmode);
12209 pat = GEN_FCN (icode) (target, op0, op1);
12215 case IX86_BUILTIN_PINSRW:
12216 case IX86_BUILTIN_PINSRW128:
12217 icode = (fcode == IX86_BUILTIN_PINSRW
12218 ? CODE_FOR_mmx_pinsrw
12219 : CODE_FOR_sse2_pinsrw);
12220 arg0 = TREE_VALUE (arglist);
12221 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12222 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12223 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12224 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12225 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12226 tmode = insn_data[icode].operand[0].mode;
12227 mode0 = insn_data[icode].operand[1].mode;
12228 mode1 = insn_data[icode].operand[2].mode;
12229 mode2 = insn_data[icode].operand[3].mode;
12231 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12232 op0 = copy_to_mode_reg (mode0, op0);
12233 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12234 op1 = copy_to_mode_reg (mode1, op1);
12235 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12237 /* @@@ better error message */
12238 error ("selector must be an immediate");
12242 || GET_MODE (target) != tmode
12243 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12244 target = gen_reg_rtx (tmode);
12245 pat = GEN_FCN (icode) (target, op0, op1, op2);
12251 case IX86_BUILTIN_MASKMOVQ:
12252 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12253 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12254 : CODE_FOR_sse2_maskmovdqu);
12255 /* Note the arg order is different from the operand order. */
12256 arg1 = TREE_VALUE (arglist);
12257 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12258 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12259 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12260 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12261 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12262 mode0 = insn_data[icode].operand[0].mode;
12263 mode1 = insn_data[icode].operand[1].mode;
12264 mode2 = insn_data[icode].operand[2].mode;
12266 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12267 op0 = copy_to_mode_reg (mode0, op0);
12268 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12269 op1 = copy_to_mode_reg (mode1, op1);
12270 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12271 op2 = copy_to_mode_reg (mode2, op2);
12272 pat = GEN_FCN (icode) (op0, op1, op2);
12278 case IX86_BUILTIN_SQRTSS:
12279 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12280 case IX86_BUILTIN_RSQRTSS:
12281 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12282 case IX86_BUILTIN_RCPSS:
12283 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12285 case IX86_BUILTIN_ANDPS:
12286 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12288 case IX86_BUILTIN_ANDNPS:
12289 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12291 case IX86_BUILTIN_ORPS:
12292 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12294 case IX86_BUILTIN_XORPS:
12295 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12298 case IX86_BUILTIN_LOADAPS:
12299 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12301 case IX86_BUILTIN_LOADUPS:
12302 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12304 case IX86_BUILTIN_STOREAPS:
12305 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12306 case IX86_BUILTIN_STOREUPS:
12307 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12309 case IX86_BUILTIN_LOADSS:
12310 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12312 case IX86_BUILTIN_STORESS:
12313 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12315 case IX86_BUILTIN_LOADHPS:
12316 case IX86_BUILTIN_LOADLPS:
12317 case IX86_BUILTIN_LOADHPD:
12318 case IX86_BUILTIN_LOADLPD:
12319 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12320 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12321 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12322 : CODE_FOR_sse2_movlpd);
12323 arg0 = TREE_VALUE (arglist);
12324 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12325 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12326 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12327 tmode = insn_data[icode].operand[0].mode;
12328 mode0 = insn_data[icode].operand[1].mode;
12329 mode1 = insn_data[icode].operand[2].mode;
12331 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12332 op0 = copy_to_mode_reg (mode0, op0);
12333 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12335 || GET_MODE (target) != tmode
12336 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12337 target = gen_reg_rtx (tmode);
12338 pat = GEN_FCN (icode) (target, op0, op1);
12344 case IX86_BUILTIN_STOREHPS:
12345 case IX86_BUILTIN_STORELPS:
12346 case IX86_BUILTIN_STOREHPD:
12347 case IX86_BUILTIN_STORELPD:
12348 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12349 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12350 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12351 : CODE_FOR_sse2_movlpd);
12352 arg0 = TREE_VALUE (arglist);
12353 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12354 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12355 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12356 mode0 = insn_data[icode].operand[1].mode;
12357 mode1 = insn_data[icode].operand[2].mode;
12359 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12360 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12361 op1 = copy_to_mode_reg (mode1, op1);
12363 pat = GEN_FCN (icode) (op0, op0, op1);
12369 case IX86_BUILTIN_MOVNTPS:
12370 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12371 case IX86_BUILTIN_MOVNTQ:
12372 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
12374 case IX86_BUILTIN_LDMXCSR:
12375 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12376 target = assign_386_stack_local (SImode, 0);
12377 emit_move_insn (target, op0);
12378 emit_insn (gen_ldmxcsr (target));
12381 case IX86_BUILTIN_STMXCSR:
12382 target = assign_386_stack_local (SImode, 0);
12383 emit_insn (gen_stmxcsr (target));
12384 return copy_to_mode_reg (SImode, target);
12386 case IX86_BUILTIN_SHUFPS:
12387 case IX86_BUILTIN_SHUFPD:
12388 icode = (fcode == IX86_BUILTIN_SHUFPS
12389 ? CODE_FOR_sse_shufps
12390 : CODE_FOR_sse2_shufpd);
12391 arg0 = TREE_VALUE (arglist);
12392 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12393 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12394 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12395 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12396 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12397 tmode = insn_data[icode].operand[0].mode;
12398 mode0 = insn_data[icode].operand[1].mode;
12399 mode1 = insn_data[icode].operand[2].mode;
12400 mode2 = insn_data[icode].operand[3].mode;
12402 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12403 op0 = copy_to_mode_reg (mode0, op0);
12404 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12405 op1 = copy_to_mode_reg (mode1, op1);
12406 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12408 /* @@@ better error message */
12409 error ("mask must be an immediate");
12410 return gen_reg_rtx (tmode);
12413 || GET_MODE (target) != tmode
12414 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12415 target = gen_reg_rtx (tmode);
12416 pat = GEN_FCN (icode) (target, op0, op1, op2);
12422 case IX86_BUILTIN_PSHUFW:
12423 case IX86_BUILTIN_PSHUFD:
12424 case IX86_BUILTIN_PSHUFHW:
12425 case IX86_BUILTIN_PSHUFLW:
12426 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12427 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12428 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12429 : CODE_FOR_mmx_pshufw);
12430 arg0 = TREE_VALUE (arglist);
12431 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12432 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12433 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12434 tmode = insn_data[icode].operand[0].mode;
12435 mode1 = insn_data[icode].operand[1].mode;
12436 mode2 = insn_data[icode].operand[2].mode;
12438 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12439 op0 = copy_to_mode_reg (mode1, op0);
12440 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12442 /* @@@ better error message */
12443 error ("mask must be an immediate");
12447 || GET_MODE (target) != tmode
12448 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12449 target = gen_reg_rtx (tmode);
12450 pat = GEN_FCN (icode) (target, op0, op1);
12456 case IX86_BUILTIN_FEMMS:
12457 emit_insn (gen_femms ());
12460 case IX86_BUILTIN_PAVGUSB:
12461 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12463 case IX86_BUILTIN_PF2ID:
12464 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
12466 case IX86_BUILTIN_PFACC:
12467 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
12469 case IX86_BUILTIN_PFADD:
12470 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
12472 case IX86_BUILTIN_PFCMPEQ:
12473 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
12475 case IX86_BUILTIN_PFCMPGE:
12476 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
12478 case IX86_BUILTIN_PFCMPGT:
12479 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12481 case IX86_BUILTIN_PFMAX:
12482 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12484 case IX86_BUILTIN_PFMIN:
12485 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12487 case IX86_BUILTIN_PFMUL:
12488 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12490 case IX86_BUILTIN_PFRCP:
12491 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12493 case IX86_BUILTIN_PFRCPIT1:
12494 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12496 case IX86_BUILTIN_PFRCPIT2:
12497 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12499 case IX86_BUILTIN_PFRSQIT1:
12500 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12502 case IX86_BUILTIN_PFRSQRT:
12503 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12505 case IX86_BUILTIN_PFSUB:
12506 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12508 case IX86_BUILTIN_PFSUBR:
12509 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12511 case IX86_BUILTIN_PI2FD:
12512 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12514 case IX86_BUILTIN_PMULHRW:
12515 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12517 case IX86_BUILTIN_PF2IW:
12518 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12520 case IX86_BUILTIN_PFNACC:
12521 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12523 case IX86_BUILTIN_PFPNACC:
12524 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12526 case IX86_BUILTIN_PI2FW:
12527 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12529 case IX86_BUILTIN_PSWAPDSI:
12530 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12532 case IX86_BUILTIN_PSWAPDSF:
12533 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12535 case IX86_BUILTIN_SSE_ZERO:
12536 target = gen_reg_rtx (V4SFmode);
12537 emit_insn (gen_sse_clrv4sf (target));
12540 case IX86_BUILTIN_MMX_ZERO:
12541 target = gen_reg_rtx (DImode);
12542 emit_insn (gen_mmx_clrdi (target));
12545 case IX86_BUILTIN_SQRTSD:
12546 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
12547 case IX86_BUILTIN_LOADAPD:
12548 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
12549 case IX86_BUILTIN_LOADUPD:
12550 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
12552 case IX86_BUILTIN_STOREAPD:
12553 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12554 case IX86_BUILTIN_STOREUPD:
12555 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
12557 case IX86_BUILTIN_LOADSD:
12558 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
12560 case IX86_BUILTIN_STORESD:
12561 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
12563 case IX86_BUILTIN_SETPD1:
12564 target = assign_386_stack_local (DFmode, 0);
12565 arg0 = TREE_VALUE (arglist);
12566 emit_move_insn (adjust_address (target, DFmode, 0),
12567 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12568 op0 = gen_reg_rtx (V2DFmode);
12569 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
12570 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
12573 case IX86_BUILTIN_SETPD:
12574 target = assign_386_stack_local (V2DFmode, 0);
12575 arg0 = TREE_VALUE (arglist);
12576 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12577 emit_move_insn (adjust_address (target, DFmode, 0),
12578 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12579 emit_move_insn (adjust_address (target, DFmode, 8),
12580 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
12581 op0 = gen_reg_rtx (V2DFmode);
12582 emit_insn (gen_sse2_movapd (op0, target));
12585 case IX86_BUILTIN_LOADRPD:
12586 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
12587 gen_reg_rtx (V2DFmode), 1);
12588 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
12591 case IX86_BUILTIN_LOADPD1:
12592 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
12593 gen_reg_rtx (V2DFmode), 1);
12594 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
12597 case IX86_BUILTIN_STOREPD1:
12598 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12599 case IX86_BUILTIN_STORERPD:
12600 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12602 case IX86_BUILTIN_MFENCE:
12603 emit_insn (gen_sse2_mfence ());
12605 case IX86_BUILTIN_LFENCE:
12606 emit_insn (gen_sse2_lfence ());
12609 case IX86_BUILTIN_CLFLUSH:
12610 arg0 = TREE_VALUE (arglist);
12611 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12612 icode = CODE_FOR_sse2_clflush;
12613 mode0 = insn_data[icode].operand[0].mode;
12614 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12615 op0 = copy_to_mode_reg (mode0, op0);
12617 emit_insn (gen_sse2_clflush (op0));
12620 case IX86_BUILTIN_MOVNTPD:
12621 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
12622 case IX86_BUILTIN_MOVNTDQ:
12623 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
12624 case IX86_BUILTIN_MOVNTI:
12625 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
12631 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12632 if (d->code == fcode)
12634 /* Compares are treated specially. */
12635 if (d->icode == CODE_FOR_maskcmpv4sf3
12636 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12637 || d->icode == CODE_FOR_maskncmpv4sf3
12638 || d->icode == CODE_FOR_vmmaskncmpv4sf3
12639 || d->icode == CODE_FOR_maskcmpv2df3
12640 || d->icode == CODE_FOR_vmmaskcmpv2df3
12641 || d->icode == CODE_FOR_maskncmpv2df3
12642 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12643 return ix86_expand_sse_compare (d, arglist, target);
12645 return ix86_expand_binop_builtin (d->icode, arglist, target);
12648 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12649 if (d->code == fcode)
12650 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12652 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12653 if (d->code == fcode)
12654 return ix86_expand_sse_comi (d, arglist, target);
12656 /* @@@ Should really do something sensible here. */
12660 /* Store OPERAND to the memory after reload is completed. This means
12661 that we can't easily use assign_stack_local. */
12663 ix86_force_to_memory (mode, operand)
12664 enum machine_mode mode;
12668 if (!reload_completed)
12670 if (TARGET_64BIT && TARGET_RED_ZONE)
12672 result = gen_rtx_MEM (mode,
12673 gen_rtx_PLUS (Pmode,
12675 GEN_INT (-RED_ZONE_SIZE)));
12676 emit_move_insn (result, operand);
12678 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12684 operand = gen_lowpart (DImode, operand);
12688 gen_rtx_SET (VOIDmode,
12689 gen_rtx_MEM (DImode,
12690 gen_rtx_PRE_DEC (DImode,
12691 stack_pointer_rtx)),
12697 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12706 split_di (&operand, 1, operands, operands + 1);
12708 gen_rtx_SET (VOIDmode,
12709 gen_rtx_MEM (SImode,
12710 gen_rtx_PRE_DEC (Pmode,
12711 stack_pointer_rtx)),
12714 gen_rtx_SET (VOIDmode,
12715 gen_rtx_MEM (SImode,
12716 gen_rtx_PRE_DEC (Pmode,
12717 stack_pointer_rtx)),
12722 /* It is better to store HImodes as SImodes. */
12723 if (!TARGET_PARTIAL_REG_STALL)
12724 operand = gen_lowpart (SImode, operand);
12728 gen_rtx_SET (VOIDmode,
12729 gen_rtx_MEM (GET_MODE (operand),
12730 gen_rtx_PRE_DEC (SImode,
12731 stack_pointer_rtx)),
12737 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12742 /* Free operand from the memory. */
12744 ix86_free_from_memory (mode)
12745 enum machine_mode mode;
12747 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12751 if (mode == DImode || TARGET_64BIT)
12753 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12757 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12758 to pop or add instruction if registers are available. */
12759 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12760 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12765 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12766 QImode must go into class Q_REGS.
12767 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12768 movdf to do mem-to-mem moves through integer regs. */
12770 ix86_preferred_reload_class (x, class)
12772 enum reg_class class;
12774 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12776 /* SSE can't load any constant directly yet. */
12777 if (SSE_CLASS_P (class))
12779 /* Floats can load 0 and 1. */
12780 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12782 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12783 if (MAYBE_SSE_CLASS_P (class))
12784 return (reg_class_subset_p (class, GENERAL_REGS)
12785 ? GENERAL_REGS : FLOAT_REGS);
12789 /* General regs can load everything. */
12790 if (reg_class_subset_p (class, GENERAL_REGS))
12791 return GENERAL_REGS;
12792 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12793 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12796 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12798 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12803 /* If we are copying between general and FP registers, we need a memory
12804 location. The same is true for SSE and MMX registers.
12806 The macro can't work reliably when one of the CLASSES is class containing
12807 registers from multiple units (SSE, MMX, integer). We avoid this by never
12808 combining those units in single alternative in the machine description.
12809 Ensure that this constraint holds to avoid unexpected surprises.
12811 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12812 enforce these sanity checks. */
12814 ix86_secondary_memory_needed (class1, class2, mode, strict)
12815 enum reg_class class1, class2;
12816 enum machine_mode mode;
12819 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12820 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12821 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12822 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12823 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12824 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12831 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12832 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12833 && (mode) != SImode)
12834 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12835 && (mode) != SImode));
12837 /* Return the cost of moving data from a register in class CLASS1 to
12838 one in class CLASS2.
12840 It is not required that the cost always equal 2 when FROM is the same as TO;
12841 on some machines it is expensive to move between registers if they are not
12842 general registers. */
12844 ix86_register_move_cost (mode, class1, class2)
12845 enum machine_mode mode;
12846 enum reg_class class1, class2;
12848 /* In case we require secondary memory, compute cost of the store followed
12849 by load. In case of copying from general_purpose_register we may emit
12850 multiple stores followed by single load causing memory size mismatch
12851 stall. Count this as arbitarily high cost of 20. */
12852 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12855 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12857 return (MEMORY_MOVE_COST (mode, class1, 0)
12858 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12860 /* Moves between SSE/MMX and integer unit are expensive. */
12861 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12862 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12863 return ix86_cost->mmxsse_to_integer;
12864 if (MAYBE_FLOAT_CLASS_P (class1))
12865 return ix86_cost->fp_move;
12866 if (MAYBE_SSE_CLASS_P (class1))
12867 return ix86_cost->sse_move;
12868 if (MAYBE_MMX_CLASS_P (class1))
12869 return ix86_cost->mmx_move;
12873 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12875 ix86_hard_regno_mode_ok (regno, mode)
12877 enum machine_mode mode;
12879 /* Flags and only flags can only hold CCmode values. */
12880 if (CC_REGNO_P (regno))
12881 return GET_MODE_CLASS (mode) == MODE_CC;
12882 if (GET_MODE_CLASS (mode) == MODE_CC
12883 || GET_MODE_CLASS (mode) == MODE_RANDOM
12884 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12886 if (FP_REGNO_P (regno))
12887 return VALID_FP_MODE_P (mode);
12888 if (SSE_REGNO_P (regno))
12889 return VALID_SSE_REG_MODE (mode);
12890 if (MMX_REGNO_P (regno))
12891 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12892 /* We handle both integer and floats in the general purpose registers.
12893 In future we should be able to handle vector modes as well. */
12894 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12896 /* Take care for QImode values - they can be in non-QI regs, but then
12897 they do cause partial register stalls. */
12898 if (regno < 4 || mode != QImode || TARGET_64BIT)
12900 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12903 /* Return the cost of moving data of mode M between a
12904 register and memory. A value of 2 is the default; this cost is
12905 relative to those in `REGISTER_MOVE_COST'.
12907 If moving between registers and memory is more expensive than
12908 between two registers, you should define this macro to express the
12911 Model also increased moving costs of QImode registers in non
12915 ix86_memory_move_cost (mode, class, in)
12916 enum machine_mode mode;
12917 enum reg_class class;
12920 if (FLOAT_CLASS_P (class))
12938 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12940 if (SSE_CLASS_P (class))
12943 switch (GET_MODE_SIZE (mode))
12957 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12959 if (MMX_CLASS_P (class))
12962 switch (GET_MODE_SIZE (mode))
12973 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12975 switch (GET_MODE_SIZE (mode))
12979 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12980 : ix86_cost->movzbl_load);
12982 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12983 : ix86_cost->int_store[0] + 4);
12986 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12988 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12989 if (mode == TFmode)
12991 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12992 * (int) GET_MODE_SIZE (mode) / 4);
12996 #ifdef DO_GLOBAL_CTORS_BODY
12998 ix86_svr3_asm_out_constructor (symbol, priority)
13000 int priority ATTRIBUTE_UNUSED;
13003 fputs ("\tpushl $", asm_out_file);
13004 assemble_name (asm_out_file, XSTR (symbol, 0));
13005 fputc ('\n', asm_out_file);
13009 /* Order the registers for register allocator. */
13012 x86_order_regs_for_local_alloc ()
13017 /* First allocate the local general purpose registers. */
13018 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13019 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13020 reg_alloc_order [pos++] = i;
13022 /* Global general purpose registers. */
13023 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13024 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13025 reg_alloc_order [pos++] = i;
13027 /* x87 registers come first in case we are doing FP math
13029 if (!TARGET_SSE_MATH)
13030 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13031 reg_alloc_order [pos++] = i;
13033 /* SSE registers. */
13034 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13035 reg_alloc_order [pos++] = i;
13036 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13037 reg_alloc_order [pos++] = i;
13039 /* x87 registerts. */
13040 if (TARGET_SSE_MATH)
13041 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13042 reg_alloc_order [pos++] = i;
13044 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13045 reg_alloc_order [pos++] = i;
13047 /* Initialize the rest of array as we do not allocate some registers
13049 while (pos < FIRST_PSEUDO_REGISTER)
13050 reg_alloc_order [pos++] = 0;
13054 x86_output_mi_thunk (file, delta, function)
13062 if (ix86_regparm > 0)
13063 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13066 for (; parm; parm = TREE_CHAIN (parm))
13067 if (TREE_VALUE (parm) == void_type_node)
13070 xops[0] = GEN_INT (delta);
13073 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13074 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13075 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13078 fprintf (file, "\tjmp *");
13079 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13080 fprintf (file, "@GOTPCREL(%%rip)\n");
13084 fprintf (file, "\tjmp ");
13085 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13086 fprintf (file, "\n");
13092 xops[1] = gen_rtx_REG (SImode, 0);
13093 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13094 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13096 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13097 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13101 xops[0] = pic_offset_table_rtx;
13102 xops[1] = gen_label_rtx ();
13103 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13105 if (ix86_regparm > 2)
13107 output_asm_insn ("push{l}\t%0", xops);
13108 output_asm_insn ("call\t%P1", xops);
13109 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13110 output_asm_insn ("pop{l}\t%0", xops);
13112 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13113 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13115 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13116 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13117 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13121 fprintf (file, "\tjmp ");
13122 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13123 fprintf (file, "\n");