1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
95 /* Processor costs (relative to an add) */
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
397 const struct processor_costs *ix86_cost = &pentium_cost;
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
452 #define FAST_PROLOGUE_INSN_COUNT 30
454 /* Set by prologue expander and used by epilogue expander to determine
456 static int use_fast_prologue_epilogue;
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
469 AREG, DREG, CREG, BREG,
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
489 /* The "default" register map used in 32bit mode. */
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
502 static int const x86_64_int_parameter_registers[6] =
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
508 static int const x86_64_int_return_registers[4] =
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
596 /* The encoding characters for the four TLS models present in ELF. */
598 static char const tls_model_chars[] = " GLil";
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
616 /* Structure describing stack frame layout.
617 Stack grows downward:
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
630 > to_allocate <- FRAME_POINTER
642 int outgoing_arguments_size;
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string;
657 enum cmodel ix86_cmodel;
659 const char *ix86_asm_string;
660 enum asm_dialect ix86_asm_dialect = ASM_ATT;
662 const char *ix86_tls_dialect_string;
663 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath;
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch;
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string; /* for -march=<xxx> */
676 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string;
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse;
684 /* ix86_regparm_string as a number */
687 /* Alignment to use for loops and jumps: */
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string;
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string;
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string;
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary;
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost;
703 const char *ix86_branch_cost_string;
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string;
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix[16];
710 static int internal_label_prefix_len;
712 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
713 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
714 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
717 static const char *get_some_local_dynamic_name PARAMS ((void));
718 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719 static rtx maybe_get_pool_constant PARAMS ((rtx));
720 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
721 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
723 static rtx get_thread_pointer PARAMS ((void));
724 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
725 static rtx gen_push PARAMS ((rtx));
726 static int memory_address_length PARAMS ((rtx addr));
727 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
729 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730 static void ix86_dump_ppro_packet PARAMS ((FILE *));
731 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
732 static struct machine_function * ix86_init_machine_status PARAMS ((void));
733 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
734 static int ix86_nsaved_regs PARAMS ((void));
735 static void ix86_emit_save_regs PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
737 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
738 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
739 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
740 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
741 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
742 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
743 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
745 static int ix86_issue_rate PARAMS ((void));
746 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747 static void ix86_sched_init PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
750 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins PARAMS ((void));
756 rtx base, index, disp;
760 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
762 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
763 static const char *ix86_strip_name_encoding PARAMS ((const char *))
766 struct builtin_description;
767 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
769 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
771 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
772 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
773 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
774 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
776 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
777 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
778 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
779 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
783 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
785 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
786 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
787 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
788 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
789 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
790 static int ix86_save_reg PARAMS ((unsigned int, int));
791 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
792 static int ix86_comp_type_attributes PARAMS ((tree, tree));
793 const struct attribute_spec ix86_attribute_table[];
794 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
795 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
796 static int ix86_value_regno PARAMS ((enum machine_mode));
798 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
799 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
802 /* Register class used for passing given 64bit part of the argument.
803 These represent classes as documented by the PS ABI, with the exception
804 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
805 use SF or DFmode move instead of DImode to avoid reformating penalties.
807 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
808 whenever possible (upper half does contain padding).
810 enum x86_64_reg_class
813 X86_64_INTEGER_CLASS,
814 X86_64_INTEGERSI_CLASS,
823 static const char * const x86_64_reg_class_name[] =
824 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
826 #define MAX_CLASSES 4
827 static int classify_argument PARAMS ((enum machine_mode, tree,
828 enum x86_64_reg_class [MAX_CLASSES],
830 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
832 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
834 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
835 enum x86_64_reg_class));
837 /* Initialize the GCC target structure. */
838 #undef TARGET_ATTRIBUTE_TABLE
839 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
840 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
841 # undef TARGET_MERGE_DECL_ATTRIBUTES
842 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
845 #undef TARGET_COMP_TYPE_ATTRIBUTES
846 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
848 #undef TARGET_INIT_BUILTINS
849 #define TARGET_INIT_BUILTINS ix86_init_builtins
851 #undef TARGET_EXPAND_BUILTIN
852 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
854 #undef TARGET_ASM_FUNCTION_EPILOGUE
855 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
857 #undef TARGET_ASM_OPEN_PAREN
858 #define TARGET_ASM_OPEN_PAREN ""
859 #undef TARGET_ASM_CLOSE_PAREN
860 #define TARGET_ASM_CLOSE_PAREN ""
862 #undef TARGET_ASM_ALIGNED_HI_OP
863 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
864 #undef TARGET_ASM_ALIGNED_SI_OP
865 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
867 #undef TARGET_ASM_ALIGNED_DI_OP
868 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
871 #undef TARGET_ASM_UNALIGNED_HI_OP
872 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
873 #undef TARGET_ASM_UNALIGNED_SI_OP
874 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
875 #undef TARGET_ASM_UNALIGNED_DI_OP
876 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
878 #undef TARGET_SCHED_ADJUST_COST
879 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
880 #undef TARGET_SCHED_ISSUE_RATE
881 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
882 #undef TARGET_SCHED_VARIABLE_ISSUE
883 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
884 #undef TARGET_SCHED_INIT
885 #define TARGET_SCHED_INIT ix86_sched_init
886 #undef TARGET_SCHED_REORDER
887 #define TARGET_SCHED_REORDER ix86_sched_reorder
888 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
889 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
890 ia32_use_dfa_pipeline_interface
891 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
892 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
893 ia32_multipass_dfa_lookahead
896 #undef TARGET_HAVE_TLS
897 #define TARGET_HAVE_TLS true
900 struct gcc_target targetm = TARGET_INITIALIZER;
902 /* Sometimes certain combinations of command options do not make
903 sense on a particular target machine. You can define a macro
904 `OVERRIDE_OPTIONS' to take account of this. This macro, if
905 defined, is executed once just after all the command options have
908 Don't use this macro to turn on various extra optimizations for
909 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
915 /* Comes from final.c -- no real reason to change it. */
916 #define MAX_CODE_ALIGN 16
920 const struct processor_costs *cost; /* Processor costs */
921 const int target_enable; /* Target flags to enable. */
922 const int target_disable; /* Target flags to disable. */
923 const int align_loop; /* Default alignments. */
924 const int align_loop_max_skip;
925 const int align_jump;
926 const int align_jump_max_skip;
927 const int align_func;
928 const int branch_cost;
930 const processor_target_table[PROCESSOR_max] =
932 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
933 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
934 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
935 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
936 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
937 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
938 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
941 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
944 const char *const name; /* processor name or nickname. */
945 const enum processor_type processor;
951 PTA_PREFETCH_SSE = 8,
956 const processor_alias_table[] =
958 {"i386", PROCESSOR_I386, 0},
959 {"i486", PROCESSOR_I486, 0},
960 {"i586", PROCESSOR_PENTIUM, 0},
961 {"pentium", PROCESSOR_PENTIUM, 0},
962 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
963 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
964 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
965 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
966 {"i686", PROCESSOR_PENTIUMPRO, 0},
967 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
968 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
969 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
970 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
971 PTA_MMX | PTA_PREFETCH_SSE},
972 {"k6", PROCESSOR_K6, PTA_MMX},
973 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
974 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
975 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
977 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
978 | PTA_3DNOW | PTA_3DNOW_A},
979 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
980 | PTA_3DNOW_A | PTA_SSE},
981 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
982 | PTA_3DNOW_A | PTA_SSE},
983 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
984 | PTA_3DNOW_A | PTA_SSE},
987 int const pta_size = ARRAY_SIZE (processor_alias_table);
989 /* By default our XFmode is the 80-bit extended format. If we have
990 use TFmode instead, it's also the 80-bit format, but with padding. */
991 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
992 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
994 #ifdef SUBTARGET_OVERRIDE_OPTIONS
995 SUBTARGET_OVERRIDE_OPTIONS;
998 if (!ix86_cpu_string && ix86_arch_string)
999 ix86_cpu_string = ix86_arch_string;
1000 if (!ix86_cpu_string)
1001 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1002 if (!ix86_arch_string)
1003 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1005 if (ix86_cmodel_string != 0)
1007 if (!strcmp (ix86_cmodel_string, "small"))
1008 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1010 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1011 else if (!strcmp (ix86_cmodel_string, "32"))
1012 ix86_cmodel = CM_32;
1013 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1014 ix86_cmodel = CM_KERNEL;
1015 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1016 ix86_cmodel = CM_MEDIUM;
1017 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1018 ix86_cmodel = CM_LARGE;
1020 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1024 ix86_cmodel = CM_32;
1026 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1028 if (ix86_asm_string != 0)
1030 if (!strcmp (ix86_asm_string, "intel"))
1031 ix86_asm_dialect = ASM_INTEL;
1032 else if (!strcmp (ix86_asm_string, "att"))
1033 ix86_asm_dialect = ASM_ATT;
1035 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1037 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1038 error ("code model `%s' not supported in the %s bit mode",
1039 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1040 if (ix86_cmodel == CM_LARGE)
1041 sorry ("code model `large' not supported yet");
1042 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1043 sorry ("%i-bit mode not compiled in",
1044 (target_flags & MASK_64BIT) ? 64 : 32);
1046 for (i = 0; i < pta_size; i++)
1047 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1049 ix86_arch = processor_alias_table[i].processor;
1050 /* Default cpu tuning to the architecture. */
1051 ix86_cpu = ix86_arch;
1052 if (processor_alias_table[i].flags & PTA_MMX
1053 && !(target_flags_explicit & MASK_MMX))
1054 target_flags |= MASK_MMX;
1055 if (processor_alias_table[i].flags & PTA_3DNOW
1056 && !(target_flags_explicit & MASK_3DNOW))
1057 target_flags |= MASK_3DNOW;
1058 if (processor_alias_table[i].flags & PTA_3DNOW_A
1059 && !(target_flags_explicit & MASK_3DNOW_A))
1060 target_flags |= MASK_3DNOW_A;
1061 if (processor_alias_table[i].flags & PTA_SSE
1062 && !(target_flags_explicit & MASK_SSE))
1063 target_flags |= MASK_SSE;
1064 if (processor_alias_table[i].flags & PTA_SSE2
1065 && !(target_flags_explicit & MASK_SSE2))
1066 target_flags |= MASK_SSE2;
1067 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1068 x86_prefetch_sse = true;
1073 error ("bad value (%s) for -march= switch", ix86_arch_string);
1075 for (i = 0; i < pta_size; i++)
1076 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1078 ix86_cpu = processor_alias_table[i].processor;
1081 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1082 x86_prefetch_sse = true;
1084 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1087 ix86_cost = &size_cost;
1089 ix86_cost = processor_target_table[ix86_cpu].cost;
1090 target_flags |= processor_target_table[ix86_cpu].target_enable;
1091 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1093 /* Arrange to set up i386_stack_locals for all functions. */
1094 init_machine_status = ix86_init_machine_status;
1096 /* Validate -mregparm= value. */
1097 if (ix86_regparm_string)
1099 i = atoi (ix86_regparm_string);
1100 if (i < 0 || i > REGPARM_MAX)
1101 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1107 ix86_regparm = REGPARM_MAX;
1109 /* If the user has provided any of the -malign-* options,
1110 warn and use that value only if -falign-* is not set.
1111 Remove this code in GCC 3.2 or later. */
1112 if (ix86_align_loops_string)
1114 warning ("-malign-loops is obsolete, use -falign-loops");
1115 if (align_loops == 0)
1117 i = atoi (ix86_align_loops_string);
1118 if (i < 0 || i > MAX_CODE_ALIGN)
1119 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1121 align_loops = 1 << i;
1125 if (ix86_align_jumps_string)
1127 warning ("-malign-jumps is obsolete, use -falign-jumps");
1128 if (align_jumps == 0)
1130 i = atoi (ix86_align_jumps_string);
1131 if (i < 0 || i > MAX_CODE_ALIGN)
1132 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1134 align_jumps = 1 << i;
1138 if (ix86_align_funcs_string)
1140 warning ("-malign-functions is obsolete, use -falign-functions");
1141 if (align_functions == 0)
1143 i = atoi (ix86_align_funcs_string);
1144 if (i < 0 || i > MAX_CODE_ALIGN)
1145 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1147 align_functions = 1 << i;
1151 /* Default align_* from the processor table. */
1152 if (align_loops == 0)
1154 align_loops = processor_target_table[ix86_cpu].align_loop;
1155 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1157 if (align_jumps == 0)
1159 align_jumps = processor_target_table[ix86_cpu].align_jump;
1160 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1162 if (align_functions == 0)
1164 align_functions = processor_target_table[ix86_cpu].align_func;
1167 /* Validate -mpreferred-stack-boundary= value, or provide default.
1168 The default of 128 bits is for Pentium III's SSE __m128, but we
1169 don't want additional code to keep the stack aligned when
1170 optimizing for code size. */
1171 ix86_preferred_stack_boundary = (optimize_size
1172 ? TARGET_64BIT ? 128 : 32
1174 if (ix86_preferred_stack_boundary_string)
1176 i = atoi (ix86_preferred_stack_boundary_string);
1177 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1178 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1179 TARGET_64BIT ? 4 : 2);
1181 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1184 /* Validate -mbranch-cost= value, or provide default. */
1185 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1186 if (ix86_branch_cost_string)
1188 i = atoi (ix86_branch_cost_string);
1190 error ("-mbranch-cost=%d is not between 0 and 5", i);
1192 ix86_branch_cost = i;
1195 if (ix86_tls_dialect_string)
1197 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1198 ix86_tls_dialect = TLS_DIALECT_GNU;
1199 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1200 ix86_tls_dialect = TLS_DIALECT_SUN;
1202 error ("bad value (%s) for -mtls-dialect= switch",
1203 ix86_tls_dialect_string);
1207 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1209 /* Keep nonleaf frame pointers. */
1210 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1211 flag_omit_frame_pointer = 1;
1213 /* If we're doing fast math, we don't care about comparison order
1214 wrt NaNs. This lets us use a shorter comparison sequence. */
1215 if (flag_unsafe_math_optimizations)
1216 target_flags &= ~MASK_IEEE_FP;
1218 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1219 since the insns won't need emulation. */
1220 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1221 target_flags &= ~MASK_NO_FANCY_MATH_387;
1225 if (TARGET_ALIGN_DOUBLE)
1226 error ("-malign-double makes no sense in the 64bit mode");
1228 error ("-mrtd calling convention not supported in the 64bit mode");
1229 /* Enable by default the SSE and MMX builtins. */
1230 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1231 ix86_fpmath = FPMATH_SSE;
1234 ix86_fpmath = FPMATH_387;
1236 if (ix86_fpmath_string != 0)
1238 if (! strcmp (ix86_fpmath_string, "387"))
1239 ix86_fpmath = FPMATH_387;
1240 else if (! strcmp (ix86_fpmath_string, "sse"))
1244 warning ("SSE instruction set disabled, using 387 arithmetics");
1245 ix86_fpmath = FPMATH_387;
1248 ix86_fpmath = FPMATH_SSE;
1250 else if (! strcmp (ix86_fpmath_string, "387,sse")
1251 || ! strcmp (ix86_fpmath_string, "sse,387"))
1255 warning ("SSE instruction set disabled, using 387 arithmetics");
1256 ix86_fpmath = FPMATH_387;
1258 else if (!TARGET_80387)
1260 warning ("387 instruction set disabled, using SSE arithmetics");
1261 ix86_fpmath = FPMATH_SSE;
1264 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1267 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1270 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1274 target_flags |= MASK_MMX;
1275 x86_prefetch_sse = true;
1278 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1281 target_flags |= MASK_MMX;
1282 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1283 extensions it adds. */
1284 if (x86_3dnow_a & (1 << ix86_arch))
1285 target_flags |= MASK_3DNOW_A;
1287 if ((x86_accumulate_outgoing_args & CPUMASK)
1288 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1290 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1292 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1295 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1296 p = strchr (internal_label_prefix, 'X');
1297 internal_label_prefix_len = p - internal_label_prefix;
1303 optimization_options (level, size)
1305 int size ATTRIBUTE_UNUSED;
1307 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1308 make the problem with not enough registers even worse. */
1309 #ifdef INSN_SCHEDULING
1311 flag_schedule_insns = 0;
1313 if (TARGET_64BIT && optimize >= 1)
1314 flag_omit_frame_pointer = 1;
1317 flag_pcc_struct_return = 0;
1318 flag_asynchronous_unwind_tables = 1;
1321 flag_omit_frame_pointer = 0;
1324 /* Table of valid machine attributes. */
1325 const struct attribute_spec ix86_attribute_table[] =
1327 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1328 /* Stdcall attribute says callee is responsible for popping arguments
1329 if they are not variable. */
1330 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1331 /* Cdecl attribute says the callee is a normal C declaration */
1332 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1333 /* Regparm attribute specifies how many integer arguments are to be
1334 passed in registers. */
1335 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1336 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1337 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1338 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1339 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1341 { NULL, 0, 0, false, false, false, NULL }
1344 /* Handle a "cdecl" or "stdcall" attribute;
1345 arguments as in struct attribute_spec.handler. */
1347 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1350 tree args ATTRIBUTE_UNUSED;
1351 int flags ATTRIBUTE_UNUSED;
1354 if (TREE_CODE (*node) != FUNCTION_TYPE
1355 && TREE_CODE (*node) != METHOD_TYPE
1356 && TREE_CODE (*node) != FIELD_DECL
1357 && TREE_CODE (*node) != TYPE_DECL)
1359 warning ("`%s' attribute only applies to functions",
1360 IDENTIFIER_POINTER (name));
1361 *no_add_attrs = true;
1366 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1367 *no_add_attrs = true;
1373 /* Handle a "regparm" attribute;
1374 arguments as in struct attribute_spec.handler. */
1376 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1380 int flags ATTRIBUTE_UNUSED;
1383 if (TREE_CODE (*node) != FUNCTION_TYPE
1384 && TREE_CODE (*node) != METHOD_TYPE
1385 && TREE_CODE (*node) != FIELD_DECL
1386 && TREE_CODE (*node) != TYPE_DECL)
1388 warning ("`%s' attribute only applies to functions",
1389 IDENTIFIER_POINTER (name));
1390 *no_add_attrs = true;
1396 cst = TREE_VALUE (args);
1397 if (TREE_CODE (cst) != INTEGER_CST)
1399 warning ("`%s' attribute requires an integer constant argument",
1400 IDENTIFIER_POINTER (name));
1401 *no_add_attrs = true;
1403 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1405 warning ("argument to `%s' attribute larger than %d",
1406 IDENTIFIER_POINTER (name), REGPARM_MAX);
1407 *no_add_attrs = true;
1414 /* Return 0 if the attributes for two types are incompatible, 1 if they
1415 are compatible, and 2 if they are nearly compatible (which causes a
1416 warning to be generated). */
1419 ix86_comp_type_attributes (type1, type2)
1423 /* Check for mismatch of non-default calling convention. */
1424 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1426 if (TREE_CODE (type1) != FUNCTION_TYPE)
1429 /* Check for mismatched return types (cdecl vs stdcall). */
1430 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1431 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1436 /* Value is the number of bytes of arguments automatically
1437 popped when returning from a subroutine call.
1438 FUNDECL is the declaration node of the function (as a tree),
1439 FUNTYPE is the data type of the function (as a tree),
1440 or for a library call it is an identifier node for the subroutine name.
1441 SIZE is the number of bytes of arguments passed on the stack.
1443 On the 80386, the RTD insn may be used to pop them if the number
1444 of args is fixed, but if the number is variable then the caller
1445 must pop them all. RTD can't be used for library calls now
1446 because the library is compiled with the Unix compiler.
1447 Use of RTD is a selectable option, since it is incompatible with
1448 standard Unix calling sequences. If the option is not selected,
1449 the caller must always pop the args.
1451 The attribute stdcall is equivalent to RTD on a per module basis. */
1454 ix86_return_pops_args (fundecl, funtype, size)
1459 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1461 /* Cdecl functions override -mrtd, and never pop the stack. */
1462 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1464 /* Stdcall functions will pop the stack if not variable args. */
1465 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1469 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1470 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1471 == void_type_node)))
1475 /* Lose any fake structure return argument if it is passed on the stack. */
1476 if (aggregate_value_p (TREE_TYPE (funtype))
1479 int nregs = ix86_regparm;
1483 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1486 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1490 return GET_MODE_SIZE (Pmode);
1496 /* Argument support functions. */
1498 /* Return true when register may be used to pass function parameters. */
1500 ix86_function_arg_regno_p (regno)
1505 return (regno < REGPARM_MAX
1506 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1507 if (SSE_REGNO_P (regno) && TARGET_SSE)
1509 /* RAX is used as hidden argument to va_arg functions. */
1512 for (i = 0; i < REGPARM_MAX; i++)
1513 if (regno == x86_64_int_parameter_registers[i])
1518 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1519 for a call to a function whose data type is FNTYPE.
1520 For a library call, FNTYPE is 0. */
1523 init_cumulative_args (cum, fntype, libname)
1524 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1525 tree fntype; /* tree ptr for function decl */
1526 rtx libname; /* SYMBOL_REF of library name or 0 */
1528 static CUMULATIVE_ARGS zero_cum;
1529 tree param, next_param;
1531 if (TARGET_DEBUG_ARG)
1533 fprintf (stderr, "\ninit_cumulative_args (");
1535 fprintf (stderr, "fntype code = %s, ret code = %s",
1536 tree_code_name[(int) TREE_CODE (fntype)],
1537 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1539 fprintf (stderr, "no fntype");
1542 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1547 /* Set up the number of registers to use for passing arguments. */
1548 cum->nregs = ix86_regparm;
1549 cum->sse_nregs = SSE_REGPARM_MAX;
1550 if (fntype && !TARGET_64BIT)
1552 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1555 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1557 cum->maybe_vaarg = false;
1559 /* Determine if this function has variable arguments. This is
1560 indicated by the last argument being 'void_type_mode' if there
1561 are no variable arguments. If there are variable arguments, then
1562 we won't pass anything in registers */
1566 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1567 param != 0; param = next_param)
1569 next_param = TREE_CHAIN (param);
1570 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1574 cum->maybe_vaarg = true;
1578 if ((!fntype && !libname)
1579 || (fntype && !TYPE_ARG_TYPES (fntype)))
1580 cum->maybe_vaarg = 1;
1582 if (TARGET_DEBUG_ARG)
1583 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1588 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1589 of this code is to classify each 8bytes of incoming argument by the register
1590 class and assign registers accordingly. */
1592 /* Return the union class of CLASS1 and CLASS2.
1593 See the x86-64 PS ABI for details. */
1595 static enum x86_64_reg_class
1596 merge_classes (class1, class2)
1597 enum x86_64_reg_class class1, class2;
1599 /* Rule #1: If both classes are equal, this is the resulting class. */
1600 if (class1 == class2)
1603 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1605 if (class1 == X86_64_NO_CLASS)
1607 if (class2 == X86_64_NO_CLASS)
1610 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1611 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1612 return X86_64_MEMORY_CLASS;
1614 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1615 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1616 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1617 return X86_64_INTEGERSI_CLASS;
1618 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1619 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1620 return X86_64_INTEGER_CLASS;
1622 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1623 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1624 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1625 return X86_64_MEMORY_CLASS;
1627 /* Rule #6: Otherwise class SSE is used. */
1628 return X86_64_SSE_CLASS;
1631 /* Classify the argument of type TYPE and mode MODE.
1632 CLASSES will be filled by the register class used to pass each word
1633 of the operand. The number of words is returned. In case the parameter
1634 should be passed in memory, 0 is returned. As a special case for zero
1635 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1637 BIT_OFFSET is used internally for handling records and specifies offset
1638 of the offset in bits modulo 256 to avoid overflow cases.
1640 See the x86-64 PS ABI for details.
1644 classify_argument (mode, type, classes, bit_offset)
1645 enum machine_mode mode;
1647 enum x86_64_reg_class classes[MAX_CLASSES];
1651 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1652 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1654 /* Variable sized entities are always passed/returned in memory. */
1658 if (type && AGGREGATE_TYPE_P (type))
1662 enum x86_64_reg_class subclasses[MAX_CLASSES];
1664 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1668 for (i = 0; i < words; i++)
1669 classes[i] = X86_64_NO_CLASS;
1671 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1672 signalize memory class, so handle it as special case. */
1675 classes[0] = X86_64_NO_CLASS;
1679 /* Classify each field of record and merge classes. */
1680 if (TREE_CODE (type) == RECORD_TYPE)
1682 /* For classes first merge in the field of the subclasses. */
1683 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1685 tree bases = TYPE_BINFO_BASETYPES (type);
1686 int n_bases = TREE_VEC_LENGTH (bases);
1689 for (i = 0; i < n_bases; ++i)
1691 tree binfo = TREE_VEC_ELT (bases, i);
1693 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1694 tree type = BINFO_TYPE (binfo);
1696 num = classify_argument (TYPE_MODE (type),
1698 (offset + bit_offset) % 256);
1701 for (i = 0; i < num; i++)
1703 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1705 merge_classes (subclasses[i], classes[i + pos]);
1709 /* And now merge the fields of structure. */
1710 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1712 if (TREE_CODE (field) == FIELD_DECL)
1716 /* Bitfields are always classified as integer. Handle them
1717 early, since later code would consider them to be
1718 misaligned integers. */
1719 if (DECL_BIT_FIELD (field))
1721 for (i = int_bit_position (field) / 8 / 8;
1722 i < (int_bit_position (field)
1723 + tree_low_cst (DECL_SIZE (field), 0)
1726 merge_classes (X86_64_INTEGER_CLASS,
1731 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1732 TREE_TYPE (field), subclasses,
1733 (int_bit_position (field)
1734 + bit_offset) % 256);
1737 for (i = 0; i < num; i++)
1740 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1742 merge_classes (subclasses[i], classes[i + pos]);
1748 /* Arrays are handled as small records. */
1749 else if (TREE_CODE (type) == ARRAY_TYPE)
1752 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1753 TREE_TYPE (type), subclasses, bit_offset);
1757 /* The partial classes are now full classes. */
1758 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1759 subclasses[0] = X86_64_SSE_CLASS;
1760 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1761 subclasses[0] = X86_64_INTEGER_CLASS;
1763 for (i = 0; i < words; i++)
1764 classes[i] = subclasses[i % num];
1766 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1767 else if (TREE_CODE (type) == UNION_TYPE
1768 || TREE_CODE (type) == QUAL_UNION_TYPE)
1770 /* For classes first merge in the field of the subclasses. */
1771 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1773 tree bases = TYPE_BINFO_BASETYPES (type);
1774 int n_bases = TREE_VEC_LENGTH (bases);
1777 for (i = 0; i < n_bases; ++i)
1779 tree binfo = TREE_VEC_ELT (bases, i);
1781 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1782 tree type = BINFO_TYPE (binfo);
1784 num = classify_argument (TYPE_MODE (type),
1786 (offset + (bit_offset % 64)) % 256);
1789 for (i = 0; i < num; i++)
1791 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1793 merge_classes (subclasses[i], classes[i + pos]);
1797 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1799 if (TREE_CODE (field) == FIELD_DECL)
1802 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1803 TREE_TYPE (field), subclasses,
1807 for (i = 0; i < num; i++)
1808 classes[i] = merge_classes (subclasses[i], classes[i]);
1815 /* Final merger cleanup. */
1816 for (i = 0; i < words; i++)
1818 /* If one class is MEMORY, everything should be passed in
1820 if (classes[i] == X86_64_MEMORY_CLASS)
1823 /* The X86_64_SSEUP_CLASS should be always preceded by
1824 X86_64_SSE_CLASS. */
1825 if (classes[i] == X86_64_SSEUP_CLASS
1826 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1827 classes[i] = X86_64_SSE_CLASS;
1829 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1830 if (classes[i] == X86_64_X87UP_CLASS
1831 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1832 classes[i] = X86_64_SSE_CLASS;
1837 /* Compute alignment needed. We align all types to natural boundaries with
1838 exception of XFmode that is aligned to 64bits. */
1839 if (mode != VOIDmode && mode != BLKmode)
1841 int mode_alignment = GET_MODE_BITSIZE (mode);
1844 mode_alignment = 128;
1845 else if (mode == XCmode)
1846 mode_alignment = 256;
1847 /* Misaligned fields are always returned in memory. */
1848 if (bit_offset % mode_alignment)
1852 /* Classification of atomic types. */
1862 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1863 classes[0] = X86_64_INTEGERSI_CLASS;
1865 classes[0] = X86_64_INTEGER_CLASS;
1869 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1872 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1873 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1876 if (!(bit_offset % 64))
1877 classes[0] = X86_64_SSESF_CLASS;
1879 classes[0] = X86_64_SSE_CLASS;
1882 classes[0] = X86_64_SSEDF_CLASS;
1885 classes[0] = X86_64_X87_CLASS;
1886 classes[1] = X86_64_X87UP_CLASS;
1889 classes[0] = X86_64_X87_CLASS;
1890 classes[1] = X86_64_X87UP_CLASS;
1891 classes[2] = X86_64_X87_CLASS;
1892 classes[3] = X86_64_X87UP_CLASS;
1895 classes[0] = X86_64_SSEDF_CLASS;
1896 classes[1] = X86_64_SSEDF_CLASS;
1899 classes[0] = X86_64_SSE_CLASS;
1907 classes[0] = X86_64_SSE_CLASS;
1908 classes[1] = X86_64_SSEUP_CLASS;
1914 classes[0] = X86_64_SSE_CLASS;
1924 /* Examine the argument and return set number of register required in each
1925 class. Return 0 iff parameter should be passed in memory. */
1927 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1928 enum machine_mode mode;
1930 int *int_nregs, *sse_nregs;
1933 enum x86_64_reg_class class[MAX_CLASSES];
1934 int n = classify_argument (mode, type, class, 0);
1940 for (n--; n >= 0; n--)
1943 case X86_64_INTEGER_CLASS:
1944 case X86_64_INTEGERSI_CLASS:
1947 case X86_64_SSE_CLASS:
1948 case X86_64_SSESF_CLASS:
1949 case X86_64_SSEDF_CLASS:
1952 case X86_64_NO_CLASS:
1953 case X86_64_SSEUP_CLASS:
1955 case X86_64_X87_CLASS:
1956 case X86_64_X87UP_CLASS:
1960 case X86_64_MEMORY_CLASS:
1965 /* Construct container for the argument used by GCC interface. See
1966 FUNCTION_ARG for the detailed description. */
1968 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1969 enum machine_mode mode;
1972 int nintregs, nsseregs;
1976 enum machine_mode tmpmode;
1978 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1979 enum x86_64_reg_class class[MAX_CLASSES];
1983 int needed_sseregs, needed_intregs;
1984 rtx exp[MAX_CLASSES];
1987 n = classify_argument (mode, type, class, 0);
1988 if (TARGET_DEBUG_ARG)
1991 fprintf (stderr, "Memory class\n");
1994 fprintf (stderr, "Classes:");
1995 for (i = 0; i < n; i++)
1997 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1999 fprintf (stderr, "\n");
2004 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2006 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2009 /* First construct simple cases. Avoid SCmode, since we want to use
2010 single register to pass this type. */
2011 if (n == 1 && mode != SCmode)
2014 case X86_64_INTEGER_CLASS:
2015 case X86_64_INTEGERSI_CLASS:
2016 return gen_rtx_REG (mode, intreg[0]);
2017 case X86_64_SSE_CLASS:
2018 case X86_64_SSESF_CLASS:
2019 case X86_64_SSEDF_CLASS:
2020 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2021 case X86_64_X87_CLASS:
2022 return gen_rtx_REG (mode, FIRST_STACK_REG);
2023 case X86_64_NO_CLASS:
2024 /* Zero sized array, struct or class. */
2029 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2030 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2032 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2033 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2034 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2035 && class[1] == X86_64_INTEGER_CLASS
2036 && (mode == CDImode || mode == TImode)
2037 && intreg[0] + 1 == intreg[1])
2038 return gen_rtx_REG (mode, intreg[0]);
2040 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2041 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2042 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2044 /* Otherwise figure out the entries of the PARALLEL. */
2045 for (i = 0; i < n; i++)
2049 case X86_64_NO_CLASS:
2051 case X86_64_INTEGER_CLASS:
2052 case X86_64_INTEGERSI_CLASS:
2053 /* Merge TImodes on aligned occassions here too. */
2054 if (i * 8 + 8 > bytes)
2055 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2056 else if (class[i] == X86_64_INTEGERSI_CLASS)
2060 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2061 if (tmpmode == BLKmode)
2063 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2064 gen_rtx_REG (tmpmode, *intreg),
2068 case X86_64_SSESF_CLASS:
2069 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2070 gen_rtx_REG (SFmode,
2071 SSE_REGNO (sse_regno)),
2075 case X86_64_SSEDF_CLASS:
2076 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2077 gen_rtx_REG (DFmode,
2078 SSE_REGNO (sse_regno)),
2082 case X86_64_SSE_CLASS:
2083 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2084 tmpmode = TImode, i++;
2087 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2088 gen_rtx_REG (tmpmode,
2089 SSE_REGNO (sse_regno)),
2097 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2098 for (i = 0; i < nexps; i++)
2099 XVECEXP (ret, 0, i) = exp [i];
2103 /* Update the data in CUM to advance over an argument
2104 of mode MODE and data type TYPE.
2105 (TYPE is null for libcalls where that information may not be available.) */
2108 function_arg_advance (cum, mode, type, named)
2109 CUMULATIVE_ARGS *cum; /* current arg information */
2110 enum machine_mode mode; /* current arg mode */
2111 tree type; /* type of the argument or 0 if lib support */
2112 int named; /* whether or not the argument was named */
2115 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2116 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2118 if (TARGET_DEBUG_ARG)
2120 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2121 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2124 int int_nregs, sse_nregs;
2125 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2126 cum->words += words;
2127 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2129 cum->nregs -= int_nregs;
2130 cum->sse_nregs -= sse_nregs;
2131 cum->regno += int_nregs;
2132 cum->sse_regno += sse_nregs;
2135 cum->words += words;
2139 if (TARGET_SSE && mode == TImode)
2141 cum->sse_words += words;
2142 cum->sse_nregs -= 1;
2143 cum->sse_regno += 1;
2144 if (cum->sse_nregs <= 0)
2152 cum->words += words;
2153 cum->nregs -= words;
2154 cum->regno += words;
2156 if (cum->nregs <= 0)
2166 /* Define where to put the arguments to a function.
2167 Value is zero to push the argument on the stack,
2168 or a hard register in which to store the argument.
2170 MODE is the argument's machine mode.
2171 TYPE is the data type of the argument (as a tree).
2172 This is null for libcalls where that information may
2174 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2175 the preceding args and about the function being called.
2176 NAMED is nonzero if this argument is a named parameter
2177 (otherwise it is an extra parameter matching an ellipsis). */
2180 function_arg (cum, mode, type, named)
2181 CUMULATIVE_ARGS *cum; /* current arg information */
2182 enum machine_mode mode; /* current arg mode */
2183 tree type; /* type of the argument or 0 if lib support */
2184 int named; /* != 0 for normal args, == 0 for ... args */
2188 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2189 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2191 /* Handle an hidden AL argument containing number of registers for varargs
2192 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2194 if (mode == VOIDmode)
2197 return GEN_INT (cum->maybe_vaarg
2198 ? (cum->sse_nregs < 0
2206 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2207 &x86_64_int_parameter_registers [cum->regno],
2212 /* For now, pass fp/complex values on the stack. */
2221 if (words <= cum->nregs)
2222 ret = gen_rtx_REG (mode, cum->regno);
2226 ret = gen_rtx_REG (mode, cum->sse_regno);
2230 if (TARGET_DEBUG_ARG)
2233 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2234 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2237 print_simple_rtl (stderr, ret);
2239 fprintf (stderr, ", stack");
2241 fprintf (stderr, " )\n");
2247 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2251 ix86_function_arg_boundary (mode, type)
2252 enum machine_mode mode;
2257 return PARM_BOUNDARY;
2259 align = TYPE_ALIGN (type);
2261 align = GET_MODE_ALIGNMENT (mode);
2262 if (align < PARM_BOUNDARY)
2263 align = PARM_BOUNDARY;
2269 /* Return true if N is a possible register number of function value. */
2271 ix86_function_value_regno_p (regno)
2276 return ((regno) == 0
2277 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2278 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2280 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2281 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2282 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2285 /* Define how to find the value returned by a function.
2286 VALTYPE is the data type of the value (as a tree).
2287 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2288 otherwise, FUNC is 0. */
2290 ix86_function_value (valtype)
2295 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2296 REGPARM_MAX, SSE_REGPARM_MAX,
2297 x86_64_int_return_registers, 0);
2298 /* For zero sized structures, construct_continer return NULL, but we need
2299 to keep rest of compiler happy by returning meaningfull value. */
2301 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2305 return gen_rtx_REG (TYPE_MODE (valtype),
2306 ix86_value_regno (TYPE_MODE (valtype)));
2309 /* Return false iff type is returned in memory. */
2311 ix86_return_in_memory (type)
2314 int needed_intregs, needed_sseregs;
2317 return !examine_argument (TYPE_MODE (type), type, 1,
2318 &needed_intregs, &needed_sseregs);
2322 if (TYPE_MODE (type) == BLKmode
2323 || (VECTOR_MODE_P (TYPE_MODE (type))
2324 && int_size_in_bytes (type) == 8)
2325 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2326 && TYPE_MODE (type) != TFmode
2327 && !VECTOR_MODE_P (TYPE_MODE (type))))
2333 /* Define how to find the value returned by a library function
2334 assuming the value has mode MODE. */
2336 ix86_libcall_value (mode)
2337 enum machine_mode mode;
2347 return gen_rtx_REG (mode, FIRST_SSE_REG);
2350 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2352 return gen_rtx_REG (mode, 0);
2356 return gen_rtx_REG (mode, ix86_value_regno (mode));
2359 /* Given a mode, return the register to use for a return value. */
2362 ix86_value_regno (mode)
2363 enum machine_mode mode;
2365 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2366 return FIRST_FLOAT_REG;
2367 if (mode == TImode || VECTOR_MODE_P (mode))
2368 return FIRST_SSE_REG;
2372 /* Create the va_list data type. */
2375 ix86_build_va_list ()
2377 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2379 /* For i386 we use plain pointer to argument area. */
2381 return build_pointer_type (char_type_node);
2383 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2384 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2386 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2387 unsigned_type_node);
2388 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2389 unsigned_type_node);
2390 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2392 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2395 DECL_FIELD_CONTEXT (f_gpr) = record;
2396 DECL_FIELD_CONTEXT (f_fpr) = record;
2397 DECL_FIELD_CONTEXT (f_ovf) = record;
2398 DECL_FIELD_CONTEXT (f_sav) = record;
2400 TREE_CHAIN (record) = type_decl;
2401 TYPE_NAME (record) = type_decl;
2402 TYPE_FIELDS (record) = f_gpr;
2403 TREE_CHAIN (f_gpr) = f_fpr;
2404 TREE_CHAIN (f_fpr) = f_ovf;
2405 TREE_CHAIN (f_ovf) = f_sav;
2407 layout_type (record);
2409 /* The correct type is an array type of one element. */
2410 return build_array_type (record, build_index_type (size_zero_node));
2413 /* Perform any needed actions needed for a function that is receiving a
2414 variable number of arguments.
2418 MODE and TYPE are the mode and type of the current parameter.
2420 PRETEND_SIZE is a variable that should be set to the amount of stack
2421 that must be pushed by the prolog to pretend that our caller pushed
2424 Normally, this macro will push all remaining incoming registers on the
2425 stack and set PRETEND_SIZE to the length of the registers pushed. */
2428 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2429 CUMULATIVE_ARGS *cum;
2430 enum machine_mode mode;
2432 int *pretend_size ATTRIBUTE_UNUSED;
2436 CUMULATIVE_ARGS next_cum;
2437 rtx save_area = NULL_RTX, mem;
2450 /* Indicate to allocate space on the stack for varargs save area. */
2451 ix86_save_varrargs_registers = 1;
2453 fntype = TREE_TYPE (current_function_decl);
2454 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2455 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2456 != void_type_node));
2458 /* For varargs, we do not want to skip the dummy va_dcl argument.
2459 For stdargs, we do want to skip the last named argument. */
2462 function_arg_advance (&next_cum, mode, type, 1);
2465 save_area = frame_pointer_rtx;
2467 set = get_varargs_alias_set ();
2469 for (i = next_cum.regno; i < ix86_regparm; i++)
2471 mem = gen_rtx_MEM (Pmode,
2472 plus_constant (save_area, i * UNITS_PER_WORD));
2473 set_mem_alias_set (mem, set);
2474 emit_move_insn (mem, gen_rtx_REG (Pmode,
2475 x86_64_int_parameter_registers[i]));
2478 if (next_cum.sse_nregs)
2480 /* Now emit code to save SSE registers. The AX parameter contains number
2481 of SSE parameter regsiters used to call this function. We use
2482 sse_prologue_save insn template that produces computed jump across
2483 SSE saves. We need some preparation work to get this working. */
2485 label = gen_label_rtx ();
2486 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2488 /* Compute address to jump to :
2489 label - 5*eax + nnamed_sse_arguments*5 */
2490 tmp_reg = gen_reg_rtx (Pmode);
2491 nsse_reg = gen_reg_rtx (Pmode);
2492 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2493 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2494 gen_rtx_MULT (Pmode, nsse_reg,
2496 if (next_cum.sse_regno)
2499 gen_rtx_CONST (DImode,
2500 gen_rtx_PLUS (DImode,
2502 GEN_INT (next_cum.sse_regno * 4))));
2504 emit_move_insn (nsse_reg, label_ref);
2505 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2507 /* Compute address of memory block we save into. We always use pointer
2508 pointing 127 bytes after first byte to store - this is needed to keep
2509 instruction size limited by 4 bytes. */
2510 tmp_reg = gen_reg_rtx (Pmode);
2511 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2512 plus_constant (save_area,
2513 8 * REGPARM_MAX + 127)));
2514 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2515 set_mem_alias_set (mem, set);
2516 set_mem_align (mem, BITS_PER_WORD);
2518 /* And finally do the dirty job! */
2519 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2520 GEN_INT (next_cum.sse_regno), label));
2525 /* Implement va_start. */
2528 ix86_va_start (valist, nextarg)
2532 HOST_WIDE_INT words, n_gpr, n_fpr;
2533 tree f_gpr, f_fpr, f_ovf, f_sav;
2534 tree gpr, fpr, ovf, sav, t;
2536 /* Only 64bit target needs something special. */
2539 std_expand_builtin_va_start (valist, nextarg);
2543 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2544 f_fpr = TREE_CHAIN (f_gpr);
2545 f_ovf = TREE_CHAIN (f_fpr);
2546 f_sav = TREE_CHAIN (f_ovf);
2548 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2549 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2550 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2551 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2552 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2554 /* Count number of gp and fp argument registers used. */
2555 words = current_function_args_info.words;
2556 n_gpr = current_function_args_info.regno;
2557 n_fpr = current_function_args_info.sse_regno;
2559 if (TARGET_DEBUG_ARG)
2560 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2561 (int) words, (int) n_gpr, (int) n_fpr);
2563 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2564 build_int_2 (n_gpr * 8, 0));
2565 TREE_SIDE_EFFECTS (t) = 1;
2566 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2568 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2569 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2570 TREE_SIDE_EFFECTS (t) = 1;
2571 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2573 /* Find the overflow area. */
2574 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2576 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2577 build_int_2 (words * UNITS_PER_WORD, 0));
2578 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2579 TREE_SIDE_EFFECTS (t) = 1;
2580 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2582 /* Find the register save area.
2583 Prologue of the function save it right above stack frame. */
2584 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2585 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2586 TREE_SIDE_EFFECTS (t) = 1;
2587 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2590 /* Implement va_arg. */
2592 ix86_va_arg (valist, type)
2595 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2596 tree f_gpr, f_fpr, f_ovf, f_sav;
2597 tree gpr, fpr, ovf, sav, t;
2599 rtx lab_false, lab_over = NULL_RTX;
2603 /* Only 64bit target needs something special. */
2606 return std_expand_builtin_va_arg (valist, type);
2609 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2610 f_fpr = TREE_CHAIN (f_gpr);
2611 f_ovf = TREE_CHAIN (f_fpr);
2612 f_sav = TREE_CHAIN (f_ovf);
2614 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2615 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2616 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2617 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2618 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2620 size = int_size_in_bytes (type);
2621 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2623 container = construct_container (TYPE_MODE (type), type, 0,
2624 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2626 * Pull the value out of the saved registers ...
2629 addr_rtx = gen_reg_rtx (Pmode);
2633 rtx int_addr_rtx, sse_addr_rtx;
2634 int needed_intregs, needed_sseregs;
2637 lab_over = gen_label_rtx ();
2638 lab_false = gen_label_rtx ();
2640 examine_argument (TYPE_MODE (type), type, 0,
2641 &needed_intregs, &needed_sseregs);
2644 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2645 || TYPE_ALIGN (type) > 128);
2647 /* In case we are passing structure, verify that it is consetuctive block
2648 on the register save area. If not we need to do moves. */
2649 if (!need_temp && !REG_P (container))
2651 /* Verify that all registers are strictly consetuctive */
2652 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2656 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2658 rtx slot = XVECEXP (container, 0, i);
2659 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2660 || INTVAL (XEXP (slot, 1)) != i * 16)
2668 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2670 rtx slot = XVECEXP (container, 0, i);
2671 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2672 || INTVAL (XEXP (slot, 1)) != i * 8)
2679 int_addr_rtx = addr_rtx;
2680 sse_addr_rtx = addr_rtx;
2684 int_addr_rtx = gen_reg_rtx (Pmode);
2685 sse_addr_rtx = gen_reg_rtx (Pmode);
2687 /* First ensure that we fit completely in registers. */
2690 emit_cmp_and_jump_insns (expand_expr
2691 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2692 GEN_INT ((REGPARM_MAX - needed_intregs +
2693 1) * 8), GE, const1_rtx, SImode,
2698 emit_cmp_and_jump_insns (expand_expr
2699 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2700 GEN_INT ((SSE_REGPARM_MAX -
2701 needed_sseregs + 1) * 16 +
2702 REGPARM_MAX * 8), GE, const1_rtx,
2703 SImode, 1, lab_false);
2706 /* Compute index to start of area used for integer regs. */
2709 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2710 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2711 if (r != int_addr_rtx)
2712 emit_move_insn (int_addr_rtx, r);
2716 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2717 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2718 if (r != sse_addr_rtx)
2719 emit_move_insn (sse_addr_rtx, r);
2726 /* Never use the memory itself, as it has the alias set. */
2727 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2728 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2729 set_mem_alias_set (mem, get_varargs_alias_set ());
2730 set_mem_align (mem, BITS_PER_UNIT);
2732 for (i = 0; i < XVECLEN (container, 0); i++)
2734 rtx slot = XVECEXP (container, 0, i);
2735 rtx reg = XEXP (slot, 0);
2736 enum machine_mode mode = GET_MODE (reg);
2742 if (SSE_REGNO_P (REGNO (reg)))
2744 src_addr = sse_addr_rtx;
2745 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2749 src_addr = int_addr_rtx;
2750 src_offset = REGNO (reg) * 8;
2752 src_mem = gen_rtx_MEM (mode, src_addr);
2753 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2754 src_mem = adjust_address (src_mem, mode, src_offset);
2755 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2756 emit_move_insn (dest_mem, src_mem);
2763 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2764 build_int_2 (needed_intregs * 8, 0));
2765 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2766 TREE_SIDE_EFFECTS (t) = 1;
2767 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2772 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2773 build_int_2 (needed_sseregs * 16, 0));
2774 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2775 TREE_SIDE_EFFECTS (t) = 1;
2776 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2779 emit_jump_insn (gen_jump (lab_over));
2781 emit_label (lab_false);
2784 /* ... otherwise out of the overflow area. */
2786 /* Care for on-stack alignment if needed. */
2787 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2791 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2792 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2793 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2797 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2799 emit_move_insn (addr_rtx, r);
2802 build (PLUS_EXPR, TREE_TYPE (t), t,
2803 build_int_2 (rsize * UNITS_PER_WORD, 0));
2804 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2805 TREE_SIDE_EFFECTS (t) = 1;
2806 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2809 emit_label (lab_over);
2814 /* Return nonzero if OP is either a i387 or SSE fp register. */
2816 any_fp_register_operand (op, mode)
2818 enum machine_mode mode ATTRIBUTE_UNUSED;
2820 return ANY_FP_REG_P (op);
2823 /* Return nonzero if OP is an i387 fp register. */
2825 fp_register_operand (op, mode)
2827 enum machine_mode mode ATTRIBUTE_UNUSED;
2829 return FP_REG_P (op);
2832 /* Return nonzero if OP is a non-fp register_operand. */
2834 register_and_not_any_fp_reg_operand (op, mode)
2836 enum machine_mode mode;
2838 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2841 /* Return nonzero of OP is a register operand other than an
2842 i387 fp register. */
2844 register_and_not_fp_reg_operand (op, mode)
2846 enum machine_mode mode;
2848 return register_operand (op, mode) && !FP_REG_P (op);
2851 /* Return nonzero if OP is general operand representable on x86_64. */
2854 x86_64_general_operand (op, mode)
2856 enum machine_mode mode;
2859 return general_operand (op, mode);
2860 if (nonimmediate_operand (op, mode))
2862 return x86_64_sign_extended_value (op);
2865 /* Return nonzero if OP is general operand representable on x86_64
2866 as either sign extended or zero extended constant. */
2869 x86_64_szext_general_operand (op, mode)
2871 enum machine_mode mode;
2874 return general_operand (op, mode);
2875 if (nonimmediate_operand (op, mode))
2877 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2880 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2883 x86_64_nonmemory_operand (op, mode)
2885 enum machine_mode mode;
2888 return nonmemory_operand (op, mode);
2889 if (register_operand (op, mode))
2891 return x86_64_sign_extended_value (op);
2894 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2897 x86_64_movabs_operand (op, mode)
2899 enum machine_mode mode;
2901 if (!TARGET_64BIT || !flag_pic)
2902 return nonmemory_operand (op, mode);
2903 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2905 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2910 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2913 x86_64_szext_nonmemory_operand (op, mode)
2915 enum machine_mode mode;
2918 return nonmemory_operand (op, mode);
2919 if (register_operand (op, mode))
2921 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2924 /* Return nonzero if OP is immediate operand representable on x86_64. */
2927 x86_64_immediate_operand (op, mode)
2929 enum machine_mode mode;
2932 return immediate_operand (op, mode);
2933 return x86_64_sign_extended_value (op);
2936 /* Return nonzero if OP is immediate operand representable on x86_64. */
2939 x86_64_zext_immediate_operand (op, mode)
2941 enum machine_mode mode ATTRIBUTE_UNUSED;
2943 return x86_64_zero_extended_value (op);
2946 /* Return nonzero if OP is (const_int 1), else return zero. */
2949 const_int_1_operand (op, mode)
2951 enum machine_mode mode ATTRIBUTE_UNUSED;
2953 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2956 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2957 for shift & compare patterns, as shifting by 0 does not change flags),
2958 else return zero. */
2961 const_int_1_31_operand (op, mode)
2963 enum machine_mode mode ATTRIBUTE_UNUSED;
2965 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2968 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2969 reference and a constant. */
2972 symbolic_operand (op, mode)
2974 enum machine_mode mode ATTRIBUTE_UNUSED;
2976 switch (GET_CODE (op))
2984 if (GET_CODE (op) == SYMBOL_REF
2985 || GET_CODE (op) == LABEL_REF
2986 || (GET_CODE (op) == UNSPEC
2987 && (XINT (op, 1) == UNSPEC_GOT
2988 || XINT (op, 1) == UNSPEC_GOTOFF
2989 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2991 if (GET_CODE (op) != PLUS
2992 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2996 if (GET_CODE (op) == SYMBOL_REF
2997 || GET_CODE (op) == LABEL_REF)
2999 /* Only @GOTOFF gets offsets. */
3000 if (GET_CODE (op) != UNSPEC
3001 || XINT (op, 1) != UNSPEC_GOTOFF)
3004 op = XVECEXP (op, 0, 0);
3005 if (GET_CODE (op) == SYMBOL_REF
3006 || GET_CODE (op) == LABEL_REF)
3015 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3018 pic_symbolic_operand (op, mode)
3020 enum machine_mode mode ATTRIBUTE_UNUSED;
3022 if (GET_CODE (op) != CONST)
3027 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3032 if (GET_CODE (op) == UNSPEC)
3034 if (GET_CODE (op) != PLUS
3035 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3038 if (GET_CODE (op) == UNSPEC)
3044 /* Return true if OP is a symbolic operand that resolves locally. */
3047 local_symbolic_operand (op, mode)
3049 enum machine_mode mode ATTRIBUTE_UNUSED;
3051 if (GET_CODE (op) == LABEL_REF)
3054 if (GET_CODE (op) == CONST
3055 && GET_CODE (XEXP (op, 0)) == PLUS
3056 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3057 op = XEXP (XEXP (op, 0), 0);
3059 if (GET_CODE (op) != SYMBOL_REF)
3062 /* These we've been told are local by varasm and encode_section_info
3064 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3067 /* There is, however, a not insubstantial body of code in the rest of
3068 the compiler that assumes it can just stick the results of
3069 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3070 /* ??? This is a hack. Should update the body of the compiler to
3071 always create a DECL an invoke targetm.encode_section_info. */
3072 if (strncmp (XSTR (op, 0), internal_label_prefix,
3073 internal_label_prefix_len) == 0)
3079 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3082 tls_symbolic_operand (op, mode)
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3086 const char *symbol_str;
3088 if (GET_CODE (op) != SYMBOL_REF)
3090 symbol_str = XSTR (op, 0);
3092 if (symbol_str[0] != '%')
3094 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3098 tls_symbolic_operand_1 (op, kind)
3100 enum tls_model kind;
3102 const char *symbol_str;
3104 if (GET_CODE (op) != SYMBOL_REF)
3106 symbol_str = XSTR (op, 0);
3108 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3112 global_dynamic_symbolic_operand (op, mode)
3114 enum machine_mode mode ATTRIBUTE_UNUSED;
3116 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3120 local_dynamic_symbolic_operand (op, mode)
3122 enum machine_mode mode ATTRIBUTE_UNUSED;
3124 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3128 initial_exec_symbolic_operand (op, mode)
3130 enum machine_mode mode ATTRIBUTE_UNUSED;
3132 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3136 local_exec_symbolic_operand (op, mode)
3138 enum machine_mode mode ATTRIBUTE_UNUSED;
3140 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3143 /* Test for a valid operand for a call instruction. Don't allow the
3144 arg pointer register or virtual regs since they may decay into
3145 reg + const, which the patterns can't handle. */
3148 call_insn_operand (op, mode)
3150 enum machine_mode mode ATTRIBUTE_UNUSED;
3152 /* Disallow indirect through a virtual register. This leads to
3153 compiler aborts when trying to eliminate them. */
3154 if (GET_CODE (op) == REG
3155 && (op == arg_pointer_rtx
3156 || op == frame_pointer_rtx
3157 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3158 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3161 /* Disallow `call 1234'. Due to varying assembler lameness this
3162 gets either rejected or translated to `call .+1234'. */
3163 if (GET_CODE (op) == CONST_INT)
3166 /* Explicitly allow SYMBOL_REF even if pic. */
3167 if (GET_CODE (op) == SYMBOL_REF)
3170 /* Otherwise we can allow any general_operand in the address. */
3171 return general_operand (op, Pmode);
3175 constant_call_address_operand (op, mode)
3177 enum machine_mode mode ATTRIBUTE_UNUSED;
3179 if (GET_CODE (op) == CONST
3180 && GET_CODE (XEXP (op, 0)) == PLUS
3181 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3182 op = XEXP (XEXP (op, 0), 0);
3183 return GET_CODE (op) == SYMBOL_REF;
3186 /* Match exactly zero and one. */
3189 const0_operand (op, mode)
3191 enum machine_mode mode;
3193 return op == CONST0_RTX (mode);
3197 const1_operand (op, mode)
3199 enum machine_mode mode ATTRIBUTE_UNUSED;
3201 return op == const1_rtx;
3204 /* Match 2, 4, or 8. Used for leal multiplicands. */
3207 const248_operand (op, mode)
3209 enum machine_mode mode ATTRIBUTE_UNUSED;
3211 return (GET_CODE (op) == CONST_INT
3212 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3215 /* True if this is a constant appropriate for an increment or decremenmt. */
3218 incdec_operand (op, mode)
3220 enum machine_mode mode ATTRIBUTE_UNUSED;
3222 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3223 registers, since carry flag is not set. */
3224 if (TARGET_PENTIUM4 && !optimize_size)
3226 return op == const1_rtx || op == constm1_rtx;
3229 /* Return nonzero if OP is acceptable as operand of DImode shift
3233 shiftdi_operand (op, mode)
3235 enum machine_mode mode ATTRIBUTE_UNUSED;
3238 return nonimmediate_operand (op, mode);
3240 return register_operand (op, mode);
3243 /* Return false if this is the stack pointer, or any other fake
3244 register eliminable to the stack pointer. Otherwise, this is
3247 This is used to prevent esp from being used as an index reg.
3248 Which would only happen in pathological cases. */
3251 reg_no_sp_operand (op, mode)
3253 enum machine_mode mode;
3256 if (GET_CODE (t) == SUBREG)
3258 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3261 return register_operand (op, mode);
3265 mmx_reg_operand (op, mode)
3267 enum machine_mode mode ATTRIBUTE_UNUSED;
3269 return MMX_REG_P (op);
3272 /* Return false if this is any eliminable register. Otherwise
3276 general_no_elim_operand (op, mode)
3278 enum machine_mode mode;
3281 if (GET_CODE (t) == SUBREG)
3283 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3284 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3285 || t == virtual_stack_dynamic_rtx)
3288 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3289 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3292 return general_operand (op, mode);
3295 /* Return false if this is any eliminable register. Otherwise
3296 register_operand or const_int. */
3299 nonmemory_no_elim_operand (op, mode)
3301 enum machine_mode mode;
3304 if (GET_CODE (t) == SUBREG)
3306 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3307 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3308 || t == virtual_stack_dynamic_rtx)
3311 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3314 /* Return false if this is any eliminable register or stack register,
3315 otherwise work like register_operand. */
3318 index_register_operand (op, mode)
3320 enum machine_mode mode;
3323 if (GET_CODE (t) == SUBREG)
3327 if (t == arg_pointer_rtx
3328 || t == frame_pointer_rtx
3329 || t == virtual_incoming_args_rtx
3330 || t == virtual_stack_vars_rtx
3331 || t == virtual_stack_dynamic_rtx
3332 || REGNO (t) == STACK_POINTER_REGNUM)
3335 return general_operand (op, mode);
3338 /* Return true if op is a Q_REGS class register. */
3341 q_regs_operand (op, mode)
3343 enum machine_mode mode;
3345 if (mode != VOIDmode && GET_MODE (op) != mode)
3347 if (GET_CODE (op) == SUBREG)
3348 op = SUBREG_REG (op);
3349 return ANY_QI_REG_P (op);
3352 /* Return true if op is a NON_Q_REGS class register. */
3355 non_q_regs_operand (op, mode)
3357 enum machine_mode mode;
3359 if (mode != VOIDmode && GET_MODE (op) != mode)
3361 if (GET_CODE (op) == SUBREG)
3362 op = SUBREG_REG (op);
3363 return NON_QI_REG_P (op);
3366 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3369 sse_comparison_operator (op, mode)
3371 enum machine_mode mode ATTRIBUTE_UNUSED;
3373 enum rtx_code code = GET_CODE (op);
3376 /* Operations supported directly. */
3386 /* These are equivalent to ones above in non-IEEE comparisons. */
3393 return !TARGET_IEEE_FP;
3398 /* Return 1 if OP is a valid comparison operator in valid mode. */
3400 ix86_comparison_operator (op, mode)
3402 enum machine_mode mode;
3404 enum machine_mode inmode;
3405 enum rtx_code code = GET_CODE (op);
3406 if (mode != VOIDmode && GET_MODE (op) != mode)
3408 if (GET_RTX_CLASS (code) != '<')
3410 inmode = GET_MODE (XEXP (op, 0));
3412 if (inmode == CCFPmode || inmode == CCFPUmode)
3414 enum rtx_code second_code, bypass_code;
3415 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3416 return (bypass_code == NIL && second_code == NIL);
3423 if (inmode == CCmode || inmode == CCGCmode
3424 || inmode == CCGOCmode || inmode == CCNOmode)
3427 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3428 if (inmode == CCmode)
3432 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3440 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3443 fcmov_comparison_operator (op, mode)
3445 enum machine_mode mode;
3447 enum machine_mode inmode;
3448 enum rtx_code code = GET_CODE (op);
3449 if (mode != VOIDmode && GET_MODE (op) != mode)
3451 if (GET_RTX_CLASS (code) != '<')
3453 inmode = GET_MODE (XEXP (op, 0));
3454 if (inmode == CCFPmode || inmode == CCFPUmode)
3456 enum rtx_code second_code, bypass_code;
3457 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3458 if (bypass_code != NIL || second_code != NIL)
3460 code = ix86_fp_compare_code_to_integer (code);
3462 /* i387 supports just limited amount of conditional codes. */
3465 case LTU: case GTU: case LEU: case GEU:
3466 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3469 case ORDERED: case UNORDERED:
3477 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3480 promotable_binary_operator (op, mode)
3482 enum machine_mode mode ATTRIBUTE_UNUSED;
3484 switch (GET_CODE (op))
3487 /* Modern CPUs have same latency for HImode and SImode multiply,
3488 but 386 and 486 do HImode multiply faster. */
3489 return ix86_cpu > PROCESSOR_I486;
3501 /* Nearly general operand, but accept any const_double, since we wish
3502 to be able to drop them into memory rather than have them get pulled
3506 cmp_fp_expander_operand (op, mode)
3508 enum machine_mode mode;
3510 if (mode != VOIDmode && mode != GET_MODE (op))
3512 if (GET_CODE (op) == CONST_DOUBLE)
3514 return general_operand (op, mode);
3517 /* Match an SI or HImode register for a zero_extract. */
3520 ext_register_operand (op, mode)
3522 enum machine_mode mode ATTRIBUTE_UNUSED;
3525 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3526 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3529 if (!register_operand (op, VOIDmode))
3532 /* Be curefull to accept only registers having upper parts. */
3533 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3534 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3537 /* Return 1 if this is a valid binary floating-point operation.
3538 OP is the expression matched, and MODE is its mode. */
3541 binary_fp_operator (op, mode)
3543 enum machine_mode mode;
3545 if (mode != VOIDmode && mode != GET_MODE (op))
3548 switch (GET_CODE (op))
3554 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3562 mult_operator (op, mode)
3564 enum machine_mode mode ATTRIBUTE_UNUSED;
3566 return GET_CODE (op) == MULT;
3570 div_operator (op, mode)
3572 enum machine_mode mode ATTRIBUTE_UNUSED;
3574 return GET_CODE (op) == DIV;
3578 arith_or_logical_operator (op, mode)
3580 enum machine_mode mode;
3582 return ((mode == VOIDmode || GET_MODE (op) == mode)
3583 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3584 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3587 /* Returns 1 if OP is memory operand with a displacement. */
3590 memory_displacement_operand (op, mode)
3592 enum machine_mode mode;
3594 struct ix86_address parts;
3596 if (! memory_operand (op, mode))
3599 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3602 return parts.disp != NULL_RTX;
3605 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3606 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3608 ??? It seems likely that this will only work because cmpsi is an
3609 expander, and no actual insns use this. */
3612 cmpsi_operand (op, mode)
3614 enum machine_mode mode;
3616 if (nonimmediate_operand (op, mode))
3619 if (GET_CODE (op) == AND
3620 && GET_MODE (op) == SImode
3621 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3622 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3623 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3624 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3625 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3626 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3632 /* Returns 1 if OP is memory operand that can not be represented by the
3636 long_memory_operand (op, mode)
3638 enum machine_mode mode;
3640 if (! memory_operand (op, mode))
3643 return memory_address_length (op) != 0;
3646 /* Return nonzero if the rtx is known aligned. */
3649 aligned_operand (op, mode)
3651 enum machine_mode mode;
3653 struct ix86_address parts;
3655 if (!general_operand (op, mode))
3658 /* Registers and immediate operands are always "aligned". */
3659 if (GET_CODE (op) != MEM)
3662 /* Don't even try to do any aligned optimizations with volatiles. */
3663 if (MEM_VOLATILE_P (op))
3668 /* Pushes and pops are only valid on the stack pointer. */
3669 if (GET_CODE (op) == PRE_DEC
3670 || GET_CODE (op) == POST_INC)
3673 /* Decode the address. */
3674 if (! ix86_decompose_address (op, &parts))
3677 if (parts.base && GET_CODE (parts.base) == SUBREG)
3678 parts.base = SUBREG_REG (parts.base);
3679 if (parts.index && GET_CODE (parts.index) == SUBREG)
3680 parts.index = SUBREG_REG (parts.index);
3682 /* Look for some component that isn't known to be aligned. */
3686 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3691 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3696 if (GET_CODE (parts.disp) != CONST_INT
3697 || (INTVAL (parts.disp) & 3) != 0)
3701 /* Didn't find one -- this must be an aligned address. */
3705 /* Return true if the constant is something that can be loaded with
3706 a special instruction. Only handle 0.0 and 1.0; others are less
3710 standard_80387_constant_p (x)
3713 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3715 /* Note that on the 80387, other constants, such as pi, that we should support
3716 too. On some machines, these are much slower to load as standard constant,
3717 than to load from doubles in memory. */
3718 if (x == CONST0_RTX (GET_MODE (x)))
3720 if (x == CONST1_RTX (GET_MODE (x)))
3725 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3728 standard_sse_constant_p (x)
3731 if (GET_CODE (x) != CONST_DOUBLE)
3733 return (x == CONST0_RTX (GET_MODE (x)));
3736 /* Returns 1 if OP contains a symbol reference */
3739 symbolic_reference_mentioned_p (op)
3742 register const char *fmt;
3745 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3748 fmt = GET_RTX_FORMAT (GET_CODE (op));
3749 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3755 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3756 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3760 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3767 /* Return 1 if it is appropriate to emit `ret' instructions in the
3768 body of a function. Do this only if the epilogue is simple, needing a
3769 couple of insns. Prior to reloading, we can't tell how many registers
3770 must be saved, so return 0 then. Return 0 if there is no frame
3771 marker to de-allocate.
3773 If NON_SAVING_SETJMP is defined and true, then it is not possible
3774 for the epilogue to be simple, so return 0. This is a special case
3775 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3776 until final, but jump_optimize may need to know sooner if a
3780 ix86_can_use_return_insn_p ()
3782 struct ix86_frame frame;
3784 #ifdef NON_SAVING_SETJMP
3785 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3789 if (! reload_completed || frame_pointer_needed)
3792 /* Don't allow more than 32 pop, since that's all we can do
3793 with one instruction. */
3794 if (current_function_pops_args
3795 && current_function_args_size >= 32768)
3798 ix86_compute_frame_layout (&frame);
3799 return frame.to_allocate == 0 && frame.nregs == 0;
3802 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3804 x86_64_sign_extended_value (value)
3807 switch (GET_CODE (value))
3809 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3810 to be at least 32 and this all acceptable constants are
3811 represented as CONST_INT. */
3813 if (HOST_BITS_PER_WIDE_INT == 32)
3817 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3818 return trunc_int_for_mode (val, SImode) == val;
3822 /* For certain code models, the symbolic references are known to fit. */
3824 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3826 /* For certain code models, the code is near as well. */
3828 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3830 /* We also may accept the offsetted memory references in certain special
3833 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3834 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3836 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3838 rtx op1 = XEXP (XEXP (value, 0), 0);
3839 rtx op2 = XEXP (XEXP (value, 0), 1);
3840 HOST_WIDE_INT offset;
3842 if (ix86_cmodel == CM_LARGE)
3844 if (GET_CODE (op2) != CONST_INT)
3846 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3847 switch (GET_CODE (op1))
3850 /* For CM_SMALL assume that latest object is 1MB before
3851 end of 31bits boundary. We may also accept pretty
3852 large negative constants knowing that all objects are
3853 in the positive half of address space. */
3854 if (ix86_cmodel == CM_SMALL
3855 && offset < 1024*1024*1024
3856 && trunc_int_for_mode (offset, SImode) == offset)
3858 /* For CM_KERNEL we know that all object resist in the
3859 negative half of 32bits address space. We may not
3860 accept negative offsets, since they may be just off
3861 and we may accept pretty large positive ones. */
3862 if (ix86_cmodel == CM_KERNEL
3864 && trunc_int_for_mode (offset, SImode) == offset)
3868 /* These conditions are similar to SYMBOL_REF ones, just the
3869 constraints for code models differ. */
3870 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3871 && offset < 1024*1024*1024
3872 && trunc_int_for_mode (offset, SImode) == offset)
3874 if (ix86_cmodel == CM_KERNEL
3876 && trunc_int_for_mode (offset, SImode) == offset)
3889 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3891 x86_64_zero_extended_value (value)
3894 switch (GET_CODE (value))
3897 if (HOST_BITS_PER_WIDE_INT == 32)
3898 return (GET_MODE (value) == VOIDmode
3899 && !CONST_DOUBLE_HIGH (value));
3903 if (HOST_BITS_PER_WIDE_INT == 32)
3904 return INTVAL (value) >= 0;
3906 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3909 /* For certain code models, the symbolic references are known to fit. */
3911 return ix86_cmodel == CM_SMALL;
3913 /* For certain code models, the code is near as well. */
3915 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3917 /* We also may accept the offsetted memory references in certain special
3920 if (GET_CODE (XEXP (value, 0)) == PLUS)
3922 rtx op1 = XEXP (XEXP (value, 0), 0);
3923 rtx op2 = XEXP (XEXP (value, 0), 1);
3925 if (ix86_cmodel == CM_LARGE)
3927 switch (GET_CODE (op1))
3931 /* For small code model we may accept pretty large positive
3932 offsets, since one bit is available for free. Negative
3933 offsets are limited by the size of NULL pointer area
3934 specified by the ABI. */
3935 if (ix86_cmodel == CM_SMALL
3936 && GET_CODE (op2) == CONST_INT
3937 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3938 && (trunc_int_for_mode (INTVAL (op2), SImode)
3941 /* ??? For the kernel, we may accept adjustment of
3942 -0x10000000, since we know that it will just convert
3943 negative address space to positive, but perhaps this
3944 is not worthwhile. */
3947 /* These conditions are similar to SYMBOL_REF ones, just the
3948 constraints for code models differ. */
3949 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3950 && GET_CODE (op2) == CONST_INT
3951 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3952 && (trunc_int_for_mode (INTVAL (op2), SImode)
3966 /* Value should be nonzero if functions must have frame pointers.
3967 Zero means the frame pointer need not be set up (and parms may
3968 be accessed via the stack pointer) in functions that seem suitable. */
3971 ix86_frame_pointer_required ()
3973 /* If we accessed previous frames, then the generated code expects
3974 to be able to access the saved ebp value in our frame. */
3975 if (cfun->machine->accesses_prev_frame)
3978 /* Several x86 os'es need a frame pointer for other reasons,
3979 usually pertaining to setjmp. */
3980 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3983 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3984 the frame pointer by default. Turn it back on now if we've not
3985 got a leaf function. */
3986 if (TARGET_OMIT_LEAF_FRAME_POINTER
3987 && (!current_function_is_leaf || current_function_profile))
3993 /* Record that the current function accesses previous call frames. */
3996 ix86_setup_frame_addresses ()
3998 cfun->machine->accesses_prev_frame = 1;
4001 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4002 # define USE_HIDDEN_LINKONCE 1
4004 # define USE_HIDDEN_LINKONCE 0
4007 static int pic_labels_used;
4009 /* Fills in the label name that should be used for a pc thunk for
4010 the given register. */
4013 get_pc_thunk_name (name, regno)
4017 if (USE_HIDDEN_LINKONCE)
4018 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4020 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4024 /* This function generates code for -fpic that loads %ebx with
4025 the return address of the caller and then returns. */
4028 ix86_asm_file_end (file)
4034 for (regno = 0; regno < 8; ++regno)
4038 if (! ((pic_labels_used >> regno) & 1))
4041 get_pc_thunk_name (name, regno);
4043 if (USE_HIDDEN_LINKONCE)
4047 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4049 TREE_PUBLIC (decl) = 1;
4050 TREE_STATIC (decl) = 1;
4051 DECL_ONE_ONLY (decl) = 1;
4053 (*targetm.asm_out.unique_section) (decl, 0);
4054 named_section (decl, NULL, 0);
4056 (*targetm.asm_out.globalize_label) (file, name);
4057 fputs ("\t.hidden\t", file);
4058 assemble_name (file, name);
4060 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4065 ASM_OUTPUT_LABEL (file, name);
4068 xops[0] = gen_rtx_REG (SImode, regno);
4069 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4070 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4071 output_asm_insn ("ret", xops);
4075 /* Emit code for the SET_GOT patterns. */
4078 output_set_got (dest)
4084 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4086 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4088 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4091 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4093 output_asm_insn ("call\t%a2", xops);
4096 /* Output the "canonical" label name ("Lxx$pb") here too. This
4097 is what will be referred to by the Mach-O PIC subsystem. */
4098 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4100 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4101 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4104 output_asm_insn ("pop{l}\t%0", xops);
4109 get_pc_thunk_name (name, REGNO (dest));
4110 pic_labels_used |= 1 << REGNO (dest);
4112 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4113 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4114 output_asm_insn ("call\t%X2", xops);
4117 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4118 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4119 else if (!TARGET_MACHO)
4120 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4125 /* Generate an "push" pattern for input ARG. */
4131 return gen_rtx_SET (VOIDmode,
4133 gen_rtx_PRE_DEC (Pmode,
4134 stack_pointer_rtx)),
4138 /* Return >= 0 if there is an unused call-clobbered register available
4139 for the entire function. */
4142 ix86_select_alt_pic_regnum ()
4144 if (current_function_is_leaf && !current_function_profile)
4147 for (i = 2; i >= 0; --i)
4148 if (!regs_ever_live[i])
4152 return INVALID_REGNUM;
4155 /* Return 1 if we need to save REGNO. */
4157 ix86_save_reg (regno, maybe_eh_return)
4159 int maybe_eh_return;
4161 if (pic_offset_table_rtx
4162 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4163 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4164 || current_function_profile
4165 || current_function_calls_eh_return))
4167 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4172 if (current_function_calls_eh_return && maybe_eh_return)
4177 unsigned test = EH_RETURN_DATA_REGNO (i);
4178 if (test == INVALID_REGNUM)
4185 return (regs_ever_live[regno]
4186 && !call_used_regs[regno]
4187 && !fixed_regs[regno]
4188 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4191 /* Return number of registers to be saved on the stack. */
4199 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4200 if (ix86_save_reg (regno, true))
4205 /* Return the offset between two registers, one to be eliminated, and the other
4206 its replacement, at the start of a routine. */
4209 ix86_initial_elimination_offset (from, to)
4213 struct ix86_frame frame;
4214 ix86_compute_frame_layout (&frame);
4216 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4217 return frame.hard_frame_pointer_offset;
4218 else if (from == FRAME_POINTER_REGNUM
4219 && to == HARD_FRAME_POINTER_REGNUM)
4220 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4223 if (to != STACK_POINTER_REGNUM)
4225 else if (from == ARG_POINTER_REGNUM)
4226 return frame.stack_pointer_offset;
4227 else if (from != FRAME_POINTER_REGNUM)
4230 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4234 /* Fill structure ix86_frame about frame of currently computed function. */
4237 ix86_compute_frame_layout (frame)
4238 struct ix86_frame *frame;
4240 HOST_WIDE_INT total_size;
4241 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4243 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4244 HOST_WIDE_INT size = get_frame_size ();
4246 frame->nregs = ix86_nsaved_regs ();
4249 /* Skip return address and saved base pointer. */
4250 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4252 frame->hard_frame_pointer_offset = offset;
4254 /* Do some sanity checking of stack_alignment_needed and
4255 preferred_alignment, since i386 port is the only using those features
4256 that may break easily. */
4258 if (size && !stack_alignment_needed)
4260 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4262 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4264 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4267 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4268 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4270 /* Register save area */
4271 offset += frame->nregs * UNITS_PER_WORD;
4274 if (ix86_save_varrargs_registers)
4276 offset += X86_64_VARARGS_SIZE;
4277 frame->va_arg_size = X86_64_VARARGS_SIZE;
4280 frame->va_arg_size = 0;
4282 /* Align start of frame for local function. */
4283 frame->padding1 = ((offset + stack_alignment_needed - 1)
4284 & -stack_alignment_needed) - offset;
4286 offset += frame->padding1;
4288 /* Frame pointer points here. */
4289 frame->frame_pointer_offset = offset;
4293 /* Add outgoing arguments area. Can be skipped if we eliminated
4294 all the function calls as dead code. */
4295 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4297 offset += current_function_outgoing_args_size;
4298 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4301 frame->outgoing_arguments_size = 0;
4303 /* Align stack boundary. Only needed if we're calling another function
4305 if (!current_function_is_leaf || current_function_calls_alloca)
4306 frame->padding2 = ((offset + preferred_alignment - 1)
4307 & -preferred_alignment) - offset;
4309 frame->padding2 = 0;
4311 offset += frame->padding2;
4313 /* We've reached end of stack frame. */
4314 frame->stack_pointer_offset = offset;
4316 /* Size prologue needs to allocate. */
4317 frame->to_allocate =
4318 (size + frame->padding1 + frame->padding2
4319 + frame->outgoing_arguments_size + frame->va_arg_size);
4321 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4322 && current_function_is_leaf)
4324 frame->red_zone_size = frame->to_allocate;
4325 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4326 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4329 frame->red_zone_size = 0;
4330 frame->to_allocate -= frame->red_zone_size;
4331 frame->stack_pointer_offset -= frame->red_zone_size;
4333 fprintf (stderr, "nregs: %i\n", frame->nregs);
4334 fprintf (stderr, "size: %i\n", size);
4335 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4336 fprintf (stderr, "padding1: %i\n", frame->padding1);
4337 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4338 fprintf (stderr, "padding2: %i\n", frame->padding2);
4339 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4340 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4341 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4342 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4343 frame->hard_frame_pointer_offset);
4344 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4348 /* Emit code to save registers in the prologue. */
4351 ix86_emit_save_regs ()
4356 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4357 if (ix86_save_reg (regno, true))
4359 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4360 RTX_FRAME_RELATED_P (insn) = 1;
4364 /* Emit code to save registers using MOV insns. First register
4365 is restored from POINTER + OFFSET. */
4367 ix86_emit_save_regs_using_mov (pointer, offset)
4369 HOST_WIDE_INT offset;
4374 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4375 if (ix86_save_reg (regno, true))
4377 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4379 gen_rtx_REG (Pmode, regno));
4380 RTX_FRAME_RELATED_P (insn) = 1;
4381 offset += UNITS_PER_WORD;
4385 /* Expand the prologue into a bunch of separate insns. */
4388 ix86_expand_prologue ()
4392 struct ix86_frame frame;
4394 HOST_WIDE_INT allocate;
4398 use_fast_prologue_epilogue
4399 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4400 if (TARGET_PROLOGUE_USING_MOVE)
4401 use_mov = use_fast_prologue_epilogue;
4403 ix86_compute_frame_layout (&frame);
4405 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4406 slower on all targets. Also sdb doesn't like it. */
4408 if (frame_pointer_needed)
4410 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4411 RTX_FRAME_RELATED_P (insn) = 1;
4413 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4414 RTX_FRAME_RELATED_P (insn) = 1;
4417 allocate = frame.to_allocate;
4418 /* In case we are dealing only with single register and empty frame,
4419 push is equivalent of the mov+add sequence. */
4420 if (allocate == 0 && frame.nregs <= 1)
4424 ix86_emit_save_regs ();
4426 allocate += frame.nregs * UNITS_PER_WORD;
4430 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4432 insn = emit_insn (gen_pro_epilogue_adjust_stack
4433 (stack_pointer_rtx, stack_pointer_rtx,
4434 GEN_INT (-allocate)));
4435 RTX_FRAME_RELATED_P (insn) = 1;
4439 /* ??? Is this only valid for Win32? */
4446 arg0 = gen_rtx_REG (SImode, 0);
4447 emit_move_insn (arg0, GEN_INT (allocate));
4449 sym = gen_rtx_MEM (FUNCTION_MODE,
4450 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4451 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4453 CALL_INSN_FUNCTION_USAGE (insn)
4454 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4455 CALL_INSN_FUNCTION_USAGE (insn));
4459 if (!frame_pointer_needed || !frame.to_allocate)
4460 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4462 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4463 -frame.nregs * UNITS_PER_WORD);
4466 #ifdef SUBTARGET_PROLOGUE
4470 pic_reg_used = false;
4471 if (pic_offset_table_rtx
4472 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4473 || current_function_profile))
4475 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4477 if (alt_pic_reg_used != INVALID_REGNUM)
4478 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4480 pic_reg_used = true;
4485 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4487 /* Even with accurate pre-reload life analysis, we can wind up
4488 deleting all references to the pic register after reload.
4489 Consider if cross-jumping unifies two sides of a branch
4490 controled by a comparison vs the only read from a global.
4491 In which case, allow the set_got to be deleted, though we're
4492 too late to do anything about the ebx save in the prologue. */
4493 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4496 /* Prevent function calls from be scheduled before the call to mcount.
4497 In the pic_reg_used case, make sure that the got load isn't deleted. */
4498 if (current_function_profile)
4499 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4502 /* Emit code to restore saved registers using MOV insns. First register
4503 is restored from POINTER + OFFSET. */
4505 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4508 int maybe_eh_return;
4512 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4513 if (ix86_save_reg (regno, maybe_eh_return))
4515 emit_move_insn (gen_rtx_REG (Pmode, regno),
4516 adjust_address (gen_rtx_MEM (Pmode, pointer),
4518 offset += UNITS_PER_WORD;
4522 /* Restore function stack, frame, and registers. */
4525 ix86_expand_epilogue (style)
4529 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4530 struct ix86_frame frame;
4531 HOST_WIDE_INT offset;
4533 ix86_compute_frame_layout (&frame);
4535 /* Calculate start of saved registers relative to ebp. Special care
4536 must be taken for the normal return case of a function using
4537 eh_return: the eax and edx registers are marked as saved, but not
4538 restored along this path. */
4539 offset = frame.nregs;
4540 if (current_function_calls_eh_return && style != 2)
4542 offset *= -UNITS_PER_WORD;
4544 /* If we're only restoring one register and sp is not valid then
4545 using a move instruction to restore the register since it's
4546 less work than reloading sp and popping the register.
4548 The default code result in stack adjustment using add/lea instruction,
4549 while this code results in LEAVE instruction (or discrete equivalent),
4550 so it is profitable in some other cases as well. Especially when there
4551 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4552 and there is exactly one register to pop. This heruistic may need some
4553 tuning in future. */
4554 if ((!sp_valid && frame.nregs <= 1)
4555 || (TARGET_EPILOGUE_USING_MOVE
4556 && use_fast_prologue_epilogue
4557 && (frame.nregs > 1 || frame.to_allocate))
4558 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4559 || (frame_pointer_needed && TARGET_USE_LEAVE
4560 && use_fast_prologue_epilogue && frame.nregs == 1)
4561 || current_function_calls_eh_return)
4563 /* Restore registers. We can use ebp or esp to address the memory
4564 locations. If both are available, default to ebp, since offsets
4565 are known to be small. Only exception is esp pointing directly to the
4566 end of block of saved registers, where we may simplify addressing
4569 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4570 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4571 frame.to_allocate, style == 2);
4573 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4574 offset, style == 2);
4576 /* eh_return epilogues need %ecx added to the stack pointer. */
4579 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4581 if (frame_pointer_needed)
4583 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4584 tmp = plus_constant (tmp, UNITS_PER_WORD);
4585 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4587 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4588 emit_move_insn (hard_frame_pointer_rtx, tmp);
4590 emit_insn (gen_pro_epilogue_adjust_stack
4591 (stack_pointer_rtx, sa, const0_rtx));
4595 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4596 tmp = plus_constant (tmp, (frame.to_allocate
4597 + frame.nregs * UNITS_PER_WORD));
4598 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4601 else if (!frame_pointer_needed)
4602 emit_insn (gen_pro_epilogue_adjust_stack
4603 (stack_pointer_rtx, stack_pointer_rtx,
4604 GEN_INT (frame.to_allocate
4605 + frame.nregs * UNITS_PER_WORD)));
4606 /* If not an i386, mov & pop is faster than "leave". */
4607 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4608 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4611 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4612 hard_frame_pointer_rtx,
4615 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4617 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4622 /* First step is to deallocate the stack frame so that we can
4623 pop the registers. */
4626 if (!frame_pointer_needed)
4628 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4629 hard_frame_pointer_rtx,
4632 else if (frame.to_allocate)
4633 emit_insn (gen_pro_epilogue_adjust_stack
4634 (stack_pointer_rtx, stack_pointer_rtx,
4635 GEN_INT (frame.to_allocate)));
4637 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4638 if (ix86_save_reg (regno, false))
4641 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4643 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4645 if (frame_pointer_needed)
4647 /* Leave results in shorter dependency chains on CPUs that are
4648 able to grok it fast. */
4649 if (TARGET_USE_LEAVE)
4650 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4651 else if (TARGET_64BIT)
4652 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4654 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4658 /* Sibcall epilogues don't want a return instruction. */
4662 if (current_function_pops_args && current_function_args_size)
4664 rtx popc = GEN_INT (current_function_pops_args);
4666 /* i386 can only pop 64K bytes. If asked to pop more, pop
4667 return address, do explicit add, and jump indirectly to the
4670 if (current_function_pops_args >= 65536)
4672 rtx ecx = gen_rtx_REG (SImode, 2);
4674 /* There are is no "pascal" calling convention in 64bit ABI. */
4678 emit_insn (gen_popsi1 (ecx));
4679 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4680 emit_jump_insn (gen_return_indirect_internal (ecx));
4683 emit_jump_insn (gen_return_pop_internal (popc));
4686 emit_jump_insn (gen_return_internal ());
4689 /* Reset from the function's potential modifications. */
4692 ix86_output_function_epilogue (file, size)
4693 FILE *file ATTRIBUTE_UNUSED;
4694 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4696 if (pic_offset_table_rtx)
4697 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4700 /* Extract the parts of an RTL expression that is a valid memory address
4701 for an instruction. Return 0 if the structure of the address is
4702 grossly off. Return -1 if the address contains ASHIFT, so it is not
4703 strictly valid, but still used for computing length of lea instruction.
4707 ix86_decompose_address (addr, out)
4709 struct ix86_address *out;
4711 rtx base = NULL_RTX;
4712 rtx index = NULL_RTX;
4713 rtx disp = NULL_RTX;
4714 HOST_WIDE_INT scale = 1;
4715 rtx scale_rtx = NULL_RTX;
4718 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4720 else if (GET_CODE (addr) == PLUS)
4722 rtx op0 = XEXP (addr, 0);
4723 rtx op1 = XEXP (addr, 1);
4724 enum rtx_code code0 = GET_CODE (op0);
4725 enum rtx_code code1 = GET_CODE (op1);
4727 if (code0 == REG || code0 == SUBREG)
4729 if (code1 == REG || code1 == SUBREG)
4730 index = op0, base = op1; /* index + base */
4732 base = op0, disp = op1; /* base + displacement */
4734 else if (code0 == MULT)
4736 index = XEXP (op0, 0);
4737 scale_rtx = XEXP (op0, 1);
4738 if (code1 == REG || code1 == SUBREG)
4739 base = op1; /* index*scale + base */
4741 disp = op1; /* index*scale + disp */
4743 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4745 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4746 scale_rtx = XEXP (XEXP (op0, 0), 1);
4747 base = XEXP (op0, 1);
4750 else if (code0 == PLUS)
4752 index = XEXP (op0, 0); /* index + base + disp */
4753 base = XEXP (op0, 1);
4759 else if (GET_CODE (addr) == MULT)
4761 index = XEXP (addr, 0); /* index*scale */
4762 scale_rtx = XEXP (addr, 1);
4764 else if (GET_CODE (addr) == ASHIFT)
4768 /* We're called for lea too, which implements ashift on occasion. */
4769 index = XEXP (addr, 0);
4770 tmp = XEXP (addr, 1);
4771 if (GET_CODE (tmp) != CONST_INT)
4773 scale = INTVAL (tmp);
4774 if ((unsigned HOST_WIDE_INT) scale > 3)
4780 disp = addr; /* displacement */
4782 /* Extract the integral value of scale. */
4785 if (GET_CODE (scale_rtx) != CONST_INT)
4787 scale = INTVAL (scale_rtx);
4790 /* Allow arg pointer and stack pointer as index if there is not scaling */
4791 if (base && index && scale == 1
4792 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4793 || index == stack_pointer_rtx))
4800 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4801 if ((base == hard_frame_pointer_rtx
4802 || base == frame_pointer_rtx
4803 || base == arg_pointer_rtx) && !disp)
4806 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4807 Avoid this by transforming to [%esi+0]. */
4808 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4809 && base && !index && !disp
4811 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4814 /* Special case: encode reg+reg instead of reg*2. */
4815 if (!base && index && scale && scale == 2)
4816 base = index, scale = 1;
4818 /* Special case: scaling cannot be encoded without base or displacement. */
4819 if (!base && !disp && index && scale != 1)
4830 /* Return cost of the memory address x.
4831 For i386, it is better to use a complex address than let gcc copy
4832 the address into a reg and make a new pseudo. But not if the address
4833 requires to two regs - that would mean more pseudos with longer
4836 ix86_address_cost (x)
4839 struct ix86_address parts;
4842 if (!ix86_decompose_address (x, &parts))
4845 if (parts.base && GET_CODE (parts.base) == SUBREG)
4846 parts.base = SUBREG_REG (parts.base);
4847 if (parts.index && GET_CODE (parts.index) == SUBREG)
4848 parts.index = SUBREG_REG (parts.index);
4850 /* More complex memory references are better. */
4851 if (parts.disp && parts.disp != const0_rtx)
4854 /* Attempt to minimize number of registers in the address. */
4856 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4858 && (!REG_P (parts.index)
4859 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4863 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4865 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4866 && parts.base != parts.index)
4869 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4870 since it's predecode logic can't detect the length of instructions
4871 and it degenerates to vector decoded. Increase cost of such
4872 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4873 to split such addresses or even refuse such addresses at all.
4875 Following addressing modes are affected:
4880 The first and last case may be avoidable by explicitly coding the zero in
4881 memory address, but I don't have AMD-K6 machine handy to check this
4885 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4886 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4887 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4893 /* If X is a machine specific address (i.e. a symbol or label being
4894 referenced as a displacement from the GOT implemented using an
4895 UNSPEC), then return the base term. Otherwise return X. */
4898 ix86_find_base_term (x)
4905 if (GET_CODE (x) != CONST)
4908 if (GET_CODE (term) == PLUS
4909 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4910 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4911 term = XEXP (term, 0);
4912 if (GET_CODE (term) != UNSPEC
4913 || XINT (term, 1) != UNSPEC_GOTPCREL)
4916 term = XVECEXP (term, 0, 0);
4918 if (GET_CODE (term) != SYMBOL_REF
4919 && GET_CODE (term) != LABEL_REF)
4925 if (GET_CODE (x) != PLUS
4926 || XEXP (x, 0) != pic_offset_table_rtx
4927 || GET_CODE (XEXP (x, 1)) != CONST)
4930 term = XEXP (XEXP (x, 1), 0);
4932 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4933 term = XEXP (term, 0);
4935 if (GET_CODE (term) != UNSPEC
4936 || XINT (term, 1) != UNSPEC_GOTOFF)
4939 term = XVECEXP (term, 0, 0);
4941 if (GET_CODE (term) != SYMBOL_REF
4942 && GET_CODE (term) != LABEL_REF)
4948 /* Determine if a given RTX is a valid constant. We already know this
4949 satisfies CONSTANT_P. */
4952 legitimate_constant_p (x)
4957 switch (GET_CODE (x))
4960 /* TLS symbols are not constant. */
4961 if (tls_symbolic_operand (x, Pmode))
4966 inner = XEXP (x, 0);
4968 /* Offsets of TLS symbols are never valid.
4969 Discourage CSE from creating them. */
4970 if (GET_CODE (inner) == PLUS
4971 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4974 /* Only some unspecs are valid as "constants". */
4975 if (GET_CODE (inner) == UNSPEC)
4976 switch (XINT (inner, 1))
4979 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4989 /* Otherwise we handle everything else in the move patterns. */
4993 /* Determine if a given RTX is a valid constant address. */
4996 constant_address_p (x)
4999 switch (GET_CODE (x))
5006 return TARGET_64BIT;
5009 /* For Mach-O, really believe the CONST. */
5012 /* Otherwise fall through. */
5014 return !flag_pic && legitimate_constant_p (x);
5021 /* Nonzero if the constant value X is a legitimate general operand
5022 when generating PIC code. It is given that flag_pic is on and
5023 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5026 legitimate_pic_operand_p (x)
5031 switch (GET_CODE (x))
5034 inner = XEXP (x, 0);
5036 /* Only some unspecs are valid as "constants". */
5037 if (GET_CODE (inner) == UNSPEC)
5038 switch (XINT (inner, 1))
5041 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5049 return legitimate_pic_address_disp_p (x);
5056 /* Determine if a given CONST RTX is a valid memory displacement
5060 legitimate_pic_address_disp_p (disp)
5065 /* In 64bit mode we can allow direct addresses of symbols and labels
5066 when they are not dynamic symbols. */
5070 if (GET_CODE (disp) == CONST)
5072 /* ??? Handle PIC code models */
5073 if (GET_CODE (x) == PLUS
5074 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5075 && ix86_cmodel == CM_SMALL_PIC
5076 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5077 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5079 if (local_symbolic_operand (x, Pmode))
5082 if (GET_CODE (disp) != CONST)
5084 disp = XEXP (disp, 0);
5088 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5089 of GOT tables. We should not need these anyway. */
5090 if (GET_CODE (disp) != UNSPEC
5091 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5094 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5095 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5101 if (GET_CODE (disp) == PLUS)
5103 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5105 disp = XEXP (disp, 0);
5109 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5110 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5112 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5113 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5114 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5116 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5117 if (strstr (sym_name, "$pb") != 0)
5122 if (GET_CODE (disp) != UNSPEC)
5125 switch (XINT (disp, 1))
5130 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5132 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5133 case UNSPEC_GOTTPOFF:
5134 case UNSPEC_GOTNTPOFF:
5135 case UNSPEC_INDNTPOFF:
5138 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5140 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5142 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5148 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5149 memory address for an instruction. The MODE argument is the machine mode
5150 for the MEM expression that wants to use this address.
5152 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5153 convert common non-canonical forms to canonical form so that they will
5157 legitimate_address_p (mode, addr, strict)
5158 enum machine_mode mode;
5162 struct ix86_address parts;
5163 rtx base, index, disp;
5164 HOST_WIDE_INT scale;
5165 const char *reason = NULL;
5166 rtx reason_rtx = NULL_RTX;
5168 if (TARGET_DEBUG_ADDR)
5171 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5172 GET_MODE_NAME (mode), strict);
5176 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5178 if (TARGET_DEBUG_ADDR)
5179 fprintf (stderr, "Success.\n");
5183 if (ix86_decompose_address (addr, &parts) <= 0)
5185 reason = "decomposition failed";
5190 index = parts.index;
5192 scale = parts.scale;
5194 /* Validate base register.
5196 Don't allow SUBREG's here, it can lead to spill failures when the base
5197 is one word out of a two word structure, which is represented internally
5205 if (GET_CODE (base) == SUBREG)
5206 reg = SUBREG_REG (base);
5210 if (GET_CODE (reg) != REG)
5212 reason = "base is not a register";
5216 if (GET_MODE (base) != Pmode)
5218 reason = "base is not in Pmode";
5222 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5223 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5225 reason = "base is not valid";
5230 /* Validate index register.
5232 Don't allow SUBREG's here, it can lead to spill failures when the index
5233 is one word out of a two word structure, which is represented internally
5241 if (GET_CODE (index) == SUBREG)
5242 reg = SUBREG_REG (index);
5246 if (GET_CODE (reg) != REG)
5248 reason = "index is not a register";
5252 if (GET_MODE (index) != Pmode)
5254 reason = "index is not in Pmode";
5258 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5259 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5261 reason = "index is not valid";
5266 /* Validate scale factor. */
5269 reason_rtx = GEN_INT (scale);
5272 reason = "scale without index";
5276 if (scale != 2 && scale != 4 && scale != 8)
5278 reason = "scale is not a valid multiplier";
5283 /* Validate displacement. */
5290 if (!x86_64_sign_extended_value (disp))
5292 reason = "displacement is out of range";
5298 if (GET_CODE (disp) == CONST_DOUBLE)
5300 reason = "displacement is a const_double";
5305 if (GET_CODE (disp) == CONST
5306 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5307 switch (XINT (XEXP (disp, 0), 1))
5311 case UNSPEC_GOTPCREL:
5314 goto is_legitimate_pic;
5316 case UNSPEC_GOTTPOFF:
5317 case UNSPEC_GOTNTPOFF:
5318 case UNSPEC_INDNTPOFF:
5324 reason = "invalid address unspec";
5328 else if (flag_pic && (SYMBOLIC_CONST (disp)
5330 && !machopic_operand_p (disp)
5335 if (TARGET_64BIT && (index || base))
5337 reason = "non-constant pic memory reference";
5340 if (! legitimate_pic_address_disp_p (disp))
5342 reason = "displacement is an invalid pic construct";
5346 /* This code used to verify that a symbolic pic displacement
5347 includes the pic_offset_table_rtx register.
5349 While this is good idea, unfortunately these constructs may
5350 be created by "adds using lea" optimization for incorrect
5359 This code is nonsensical, but results in addressing
5360 GOT table with pic_offset_table_rtx base. We can't
5361 just refuse it easily, since it gets matched by
5362 "addsi3" pattern, that later gets split to lea in the
5363 case output register differs from input. While this
5364 can be handled by separate addsi pattern for this case
5365 that never results in lea, this seems to be easier and
5366 correct fix for crash to disable this test. */
5368 else if (!CONSTANT_ADDRESS_P (disp))
5370 reason = "displacement is not constant";
5375 /* Everything looks valid. */
5376 if (TARGET_DEBUG_ADDR)
5377 fprintf (stderr, "Success.\n");
5381 if (TARGET_DEBUG_ADDR)
5383 fprintf (stderr, "Error: %s\n", reason);
5384 debug_rtx (reason_rtx);
5389 /* Return an unique alias set for the GOT. */
5391 static HOST_WIDE_INT
5392 ix86_GOT_alias_set ()
5394 static HOST_WIDE_INT set = -1;
5396 set = new_alias_set ();
5400 /* Return a legitimate reference for ORIG (an address) using the
5401 register REG. If REG is 0, a new pseudo is generated.
5403 There are two types of references that must be handled:
5405 1. Global data references must load the address from the GOT, via
5406 the PIC reg. An insn is emitted to do this load, and the reg is
5409 2. Static data references, constant pool addresses, and code labels
5410 compute the address as an offset from the GOT, whose base is in
5411 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5412 differentiate them from global data objects. The returned
5413 address is the PIC reg + an unspec constant.
5415 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5416 reg also appears in the address. */
5419 legitimize_pic_address (orig, reg)
5429 reg = gen_reg_rtx (Pmode);
5430 /* Use the generic Mach-O PIC machinery. */
5431 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5434 if (local_symbolic_operand (addr, Pmode))
5436 /* In 64bit mode we can address such objects directly. */
5441 /* This symbol may be referenced via a displacement from the PIC
5442 base address (@GOTOFF). */
5444 if (reload_in_progress)
5445 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5446 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5447 new = gen_rtx_CONST (Pmode, new);
5448 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5452 emit_move_insn (reg, new);
5457 else if (GET_CODE (addr) == SYMBOL_REF)
5461 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5462 new = gen_rtx_CONST (Pmode, new);
5463 new = gen_rtx_MEM (Pmode, new);
5464 RTX_UNCHANGING_P (new) = 1;
5465 set_mem_alias_set (new, ix86_GOT_alias_set ());
5468 reg = gen_reg_rtx (Pmode);
5469 /* Use directly gen_movsi, otherwise the address is loaded
5470 into register for CSE. We don't want to CSE this addresses,
5471 instead we CSE addresses from the GOT table, so skip this. */
5472 emit_insn (gen_movsi (reg, new));
5477 /* This symbol must be referenced via a load from the
5478 Global Offset Table (@GOT). */
5480 if (reload_in_progress)
5481 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5482 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5483 new = gen_rtx_CONST (Pmode, new);
5484 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5485 new = gen_rtx_MEM (Pmode, new);
5486 RTX_UNCHANGING_P (new) = 1;
5487 set_mem_alias_set (new, ix86_GOT_alias_set ());
5490 reg = gen_reg_rtx (Pmode);
5491 emit_move_insn (reg, new);
5497 if (GET_CODE (addr) == CONST)
5499 addr = XEXP (addr, 0);
5501 /* We must match stuff we generate before. Assume the only
5502 unspecs that can get here are ours. Not that we could do
5503 anything with them anyway... */
5504 if (GET_CODE (addr) == UNSPEC
5505 || (GET_CODE (addr) == PLUS
5506 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5508 if (GET_CODE (addr) != PLUS)
5511 if (GET_CODE (addr) == PLUS)
5513 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5515 /* Check first to see if this is a constant offset from a @GOTOFF
5516 symbol reference. */
5517 if (local_symbolic_operand (op0, Pmode)
5518 && GET_CODE (op1) == CONST_INT)
5522 if (reload_in_progress)
5523 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5524 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5526 new = gen_rtx_PLUS (Pmode, new, op1);
5527 new = gen_rtx_CONST (Pmode, new);
5528 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5532 emit_move_insn (reg, new);
5538 /* ??? We need to limit offsets here. */
5543 base = legitimize_pic_address (XEXP (addr, 0), reg);
5544 new = legitimize_pic_address (XEXP (addr, 1),
5545 base == reg ? NULL_RTX : reg);
5547 if (GET_CODE (new) == CONST_INT)
5548 new = plus_constant (base, INTVAL (new));
5551 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5553 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5554 new = XEXP (new, 1);
5556 new = gen_rtx_PLUS (Pmode, base, new);
5565 ix86_encode_section_info (decl, first)
5567 int first ATTRIBUTE_UNUSED;
5569 bool local_p = (*targetm.binds_local_p) (decl);
5572 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5573 if (GET_CODE (rtl) != MEM)
5575 symbol = XEXP (rtl, 0);
5576 if (GET_CODE (symbol) != SYMBOL_REF)
5579 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5580 symbol so that we may access it directly in the GOT. */
5583 SYMBOL_REF_FLAG (symbol) = local_p;
5585 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5586 "local dynamic", "initial exec" or "local exec" TLS models
5589 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5591 const char *symbol_str;
5594 enum tls_model kind = decl_tls_model (decl);
5596 symbol_str = XSTR (symbol, 0);
5598 if (symbol_str[0] == '%')
5600 if (symbol_str[1] == tls_model_chars[kind])
5604 len = strlen (symbol_str) + 1;
5605 newstr = alloca (len + 2);
5608 newstr[1] = tls_model_chars[kind];
5609 memcpy (newstr + 2, symbol_str, len);
5611 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5615 /* Undo the above when printing symbol names. */
5618 ix86_strip_name_encoding (str)
5628 /* Load the thread pointer into a register. */
5631 get_thread_pointer ()
5635 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5636 tp = gen_rtx_MEM (Pmode, tp);
5637 RTX_UNCHANGING_P (tp) = 1;
5638 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5639 tp = force_reg (Pmode, tp);
5644 /* Try machine-dependent ways of modifying an illegitimate address
5645 to be legitimate. If we find one, return the new, valid address.
5646 This macro is used in only one place: `memory_address' in explow.c.
5648 OLDX is the address as it was before break_out_memory_refs was called.
5649 In some cases it is useful to look at this to decide what needs to be done.
5651 MODE and WIN are passed so that this macro can use
5652 GO_IF_LEGITIMATE_ADDRESS.
5654 It is always safe for this macro to do nothing. It exists to recognize
5655 opportunities to optimize the output.
5657 For the 80386, we handle X+REG by loading X into a register R and
5658 using R+REG. R will go in a general reg and indexing will be used.
5659 However, if REG is a broken-out memory address or multiplication,
5660 nothing needs to be done because REG can certainly go in a general reg.
5662 When -fpic is used, special handling is needed for symbolic references.
5663 See comments by legitimize_pic_address in i386.c for details. */
5666 legitimize_address (x, oldx, mode)
5668 register rtx oldx ATTRIBUTE_UNUSED;
5669 enum machine_mode mode;
5674 if (TARGET_DEBUG_ADDR)
5676 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5677 GET_MODE_NAME (mode));
5681 log = tls_symbolic_operand (x, mode);
5684 rtx dest, base, off, pic;
5688 case TLS_MODEL_GLOBAL_DYNAMIC:
5689 dest = gen_reg_rtx (Pmode);
5690 emit_insn (gen_tls_global_dynamic (dest, x));
5693 case TLS_MODEL_LOCAL_DYNAMIC:
5694 base = gen_reg_rtx (Pmode);
5695 emit_insn (gen_tls_local_dynamic_base (base));
5697 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5698 off = gen_rtx_CONST (Pmode, off);
5700 return gen_rtx_PLUS (Pmode, base, off);
5702 case TLS_MODEL_INITIAL_EXEC:
5705 if (reload_in_progress)
5706 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5707 pic = pic_offset_table_rtx;
5709 else if (!TARGET_GNU_TLS)
5711 pic = gen_reg_rtx (Pmode);
5712 emit_insn (gen_set_got (pic));
5717 base = get_thread_pointer ();
5719 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5722 : flag_pic ? UNSPEC_GOTNTPOFF
5723 : UNSPEC_INDNTPOFF);
5724 off = gen_rtx_CONST (Pmode, off);
5725 if (flag_pic || !TARGET_GNU_TLS)
5726 off = gen_rtx_PLUS (Pmode, pic, off);
5727 off = gen_rtx_MEM (Pmode, off);
5728 RTX_UNCHANGING_P (off) = 1;
5729 set_mem_alias_set (off, ix86_GOT_alias_set ());
5730 dest = gen_reg_rtx (Pmode);
5734 emit_move_insn (dest, off);
5735 return gen_rtx_PLUS (Pmode, base, dest);
5738 emit_insn (gen_subsi3 (dest, base, off));
5741 case TLS_MODEL_LOCAL_EXEC:
5742 base = get_thread_pointer ();
5744 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5745 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5746 off = gen_rtx_CONST (Pmode, off);
5749 return gen_rtx_PLUS (Pmode, base, off);
5752 dest = gen_reg_rtx (Pmode);
5753 emit_insn (gen_subsi3 (dest, base, off));
5764 if (flag_pic && SYMBOLIC_CONST (x))
5765 return legitimize_pic_address (x, 0);
5767 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5768 if (GET_CODE (x) == ASHIFT
5769 && GET_CODE (XEXP (x, 1)) == CONST_INT
5770 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5773 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5774 GEN_INT (1 << log));
5777 if (GET_CODE (x) == PLUS)
5779 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5781 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5782 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5783 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5786 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5787 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5788 GEN_INT (1 << log));
5791 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5792 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5793 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5796 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5797 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5798 GEN_INT (1 << log));
5801 /* Put multiply first if it isn't already. */
5802 if (GET_CODE (XEXP (x, 1)) == MULT)
5804 rtx tmp = XEXP (x, 0);
5805 XEXP (x, 0) = XEXP (x, 1);
5810 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5811 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5812 created by virtual register instantiation, register elimination, and
5813 similar optimizations. */
5814 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5817 x = gen_rtx_PLUS (Pmode,
5818 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5819 XEXP (XEXP (x, 1), 0)),
5820 XEXP (XEXP (x, 1), 1));
5824 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5825 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5826 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5827 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5828 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5829 && CONSTANT_P (XEXP (x, 1)))
5832 rtx other = NULL_RTX;
5834 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5836 constant = XEXP (x, 1);
5837 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5839 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5841 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5842 other = XEXP (x, 1);
5850 x = gen_rtx_PLUS (Pmode,
5851 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5852 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5853 plus_constant (other, INTVAL (constant)));
5857 if (changed && legitimate_address_p (mode, x, FALSE))
5860 if (GET_CODE (XEXP (x, 0)) == MULT)
5863 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5866 if (GET_CODE (XEXP (x, 1)) == MULT)
5869 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5873 && GET_CODE (XEXP (x, 1)) == REG
5874 && GET_CODE (XEXP (x, 0)) == REG)
5877 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5880 x = legitimize_pic_address (x, 0);
5883 if (changed && legitimate_address_p (mode, x, FALSE))
5886 if (GET_CODE (XEXP (x, 0)) == REG)
5888 register rtx temp = gen_reg_rtx (Pmode);
5889 register rtx val = force_operand (XEXP (x, 1), temp);
5891 emit_move_insn (temp, val);
5897 else if (GET_CODE (XEXP (x, 1)) == REG)
5899 register rtx temp = gen_reg_rtx (Pmode);
5900 register rtx val = force_operand (XEXP (x, 0), temp);
5902 emit_move_insn (temp, val);
5912 /* Print an integer constant expression in assembler syntax. Addition
5913 and subtraction are the only arithmetic that may appear in these
5914 expressions. FILE is the stdio stream to write to, X is the rtx, and
5915 CODE is the operand print code from the output string. */
5918 output_pic_addr_const (file, x, code)
5925 switch (GET_CODE (x))
5935 assemble_name (file, XSTR (x, 0));
5936 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5937 fputs ("@PLT", file);
5944 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5945 assemble_name (asm_out_file, buf);
5949 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5953 /* This used to output parentheses around the expression,
5954 but that does not work on the 386 (either ATT or BSD assembler). */
5955 output_pic_addr_const (file, XEXP (x, 0), code);
5959 if (GET_MODE (x) == VOIDmode)
5961 /* We can use %d if the number is <32 bits and positive. */
5962 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5963 fprintf (file, "0x%lx%08lx",
5964 (unsigned long) CONST_DOUBLE_HIGH (x),
5965 (unsigned long) CONST_DOUBLE_LOW (x));
5967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5970 /* We can't handle floating point constants;
5971 PRINT_OPERAND must handle them. */
5972 output_operand_lossage ("floating constant misused");
5976 /* Some assemblers need integer constants to appear first. */
5977 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5979 output_pic_addr_const (file, XEXP (x, 0), code);
5981 output_pic_addr_const (file, XEXP (x, 1), code);
5983 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5985 output_pic_addr_const (file, XEXP (x, 1), code);
5987 output_pic_addr_const (file, XEXP (x, 0), code);
5995 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5996 output_pic_addr_const (file, XEXP (x, 0), code);
5998 output_pic_addr_const (file, XEXP (x, 1), code);
6000 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6004 if (XVECLEN (x, 0) != 1)
6006 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6007 switch (XINT (x, 1))
6010 fputs ("@GOT", file);
6013 fputs ("@GOTOFF", file);
6015 case UNSPEC_GOTPCREL:
6016 fputs ("@GOTPCREL(%rip)", file);
6018 case UNSPEC_GOTTPOFF:
6019 /* FIXME: This might be @TPOFF in Sun ld too. */
6020 fputs ("@GOTTPOFF", file);
6023 fputs ("@TPOFF", file);
6026 fputs ("@NTPOFF", file);
6029 fputs ("@DTPOFF", file);
6031 case UNSPEC_GOTNTPOFF:
6032 fputs ("@GOTNTPOFF", file);
6034 case UNSPEC_INDNTPOFF:
6035 fputs ("@INDNTPOFF", file);
6038 output_operand_lossage ("invalid UNSPEC as operand");
6044 output_operand_lossage ("invalid expression as operand");
6048 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6049 We need to handle our special PIC relocations. */
6052 i386_dwarf_output_addr_const (file, x)
6057 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6061 fprintf (file, "%s", ASM_LONG);
6064 output_pic_addr_const (file, x, '\0');
6066 output_addr_const (file, x);
6070 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6071 We need to emit DTP-relative relocations. */
6074 i386_output_dwarf_dtprel (file, size, x)
6082 fputs (ASM_LONG, file);
6086 fputs (ASM_QUAD, file);
6093 output_addr_const (file, x);
6094 fputs ("@DTPOFF", file);
6097 /* In the name of slightly smaller debug output, and to cater to
6098 general assembler losage, recognize PIC+GOTOFF and turn it back
6099 into a direct symbol reference. */
6102 i386_simplify_dwarf_addr (orig_x)
6107 if (GET_CODE (x) == MEM)
6112 if (GET_CODE (x) != CONST
6113 || GET_CODE (XEXP (x, 0)) != UNSPEC
6114 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6115 || GET_CODE (orig_x) != MEM)
6117 return XVECEXP (XEXP (x, 0), 0, 0);
6120 if (GET_CODE (x) != PLUS
6121 || GET_CODE (XEXP (x, 1)) != CONST)
6124 if (GET_CODE (XEXP (x, 0)) == REG
6125 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6126 /* %ebx + GOT/GOTOFF */
6128 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6130 /* %ebx + %reg * scale + GOT/GOTOFF */
6132 if (GET_CODE (XEXP (y, 0)) == REG
6133 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6135 else if (GET_CODE (XEXP (y, 1)) == REG
6136 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6140 if (GET_CODE (y) != REG
6141 && GET_CODE (y) != MULT
6142 && GET_CODE (y) != ASHIFT)
6148 x = XEXP (XEXP (x, 1), 0);
6149 if (GET_CODE (x) == UNSPEC
6150 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6151 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6154 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6155 return XVECEXP (x, 0, 0);
6158 if (GET_CODE (x) == PLUS
6159 && GET_CODE (XEXP (x, 0)) == UNSPEC
6160 && GET_CODE (XEXP (x, 1)) == CONST_INT
6161 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6162 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6163 && GET_CODE (orig_x) != MEM)))
6165 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6167 return gen_rtx_PLUS (Pmode, y, x);
6175 put_condition_code (code, mode, reverse, fp, file)
6177 enum machine_mode mode;
6183 if (mode == CCFPmode || mode == CCFPUmode)
6185 enum rtx_code second_code, bypass_code;
6186 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6187 if (bypass_code != NIL || second_code != NIL)
6189 code = ix86_fp_compare_code_to_integer (code);
6193 code = reverse_condition (code);
6204 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6209 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6210 Those same assemblers have the same but opposite losage on cmov. */
6213 suffix = fp ? "nbe" : "a";
6216 if (mode == CCNOmode || mode == CCGOCmode)
6218 else if (mode == CCmode || mode == CCGCmode)
6229 if (mode == CCNOmode || mode == CCGOCmode)
6231 else if (mode == CCmode || mode == CCGCmode)
6240 suffix = fp ? "nb" : "ae";
6243 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6253 suffix = fp ? "u" : "p";
6256 suffix = fp ? "nu" : "np";
6261 fputs (suffix, file);
6265 print_reg (x, code, file)
6270 if (REGNO (x) == ARG_POINTER_REGNUM
6271 || REGNO (x) == FRAME_POINTER_REGNUM
6272 || REGNO (x) == FLAGS_REG
6273 || REGNO (x) == FPSR_REG)
6276 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6279 if (code == 'w' || MMX_REG_P (x))
6281 else if (code == 'b')
6283 else if (code == 'k')
6285 else if (code == 'q')
6287 else if (code == 'y')
6289 else if (code == 'h')
6292 code = GET_MODE_SIZE (GET_MODE (x));
6294 /* Irritatingly, AMD extended registers use different naming convention
6295 from the normal registers. */
6296 if (REX_INT_REG_P (x))
6303 error ("extended registers have no high halves");
6306 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6309 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6312 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6315 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6318 error ("unsupported operand size for extended register");
6326 if (STACK_TOP_P (x))
6328 fputs ("st(0)", file);
6335 if (! ANY_FP_REG_P (x))
6336 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6340 fputs (hi_reg_name[REGNO (x)], file);
6343 fputs (qi_reg_name[REGNO (x)], file);
6346 fputs (qi_high_reg_name[REGNO (x)], file);
6353 /* Locate some local-dynamic symbol still in use by this function
6354 so that we can print its name in some tls_local_dynamic_base
6358 get_some_local_dynamic_name ()
6362 if (cfun->machine->some_ld_name)
6363 return cfun->machine->some_ld_name;
6365 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6367 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6368 return cfun->machine->some_ld_name;
6374 get_some_local_dynamic_name_1 (px, data)
6376 void *data ATTRIBUTE_UNUSED;
6380 if (GET_CODE (x) == SYMBOL_REF
6381 && local_dynamic_symbolic_operand (x, Pmode))
6383 cfun->machine->some_ld_name = XSTR (x, 0);
6391 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6392 C -- print opcode suffix for set/cmov insn.
6393 c -- like C, but print reversed condition
6394 F,f -- likewise, but for floating-point.
6395 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6397 R -- print the prefix for register names.
6398 z -- print the opcode suffix for the size of the current operand.
6399 * -- print a star (in certain assembler syntax)
6400 A -- print an absolute memory reference.
6401 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6402 s -- print a shift double count, followed by the assemblers argument
6404 b -- print the QImode name of the register for the indicated operand.
6405 %b0 would print %al if operands[0] is reg 0.
6406 w -- likewise, print the HImode name of the register.
6407 k -- likewise, print the SImode name of the register.
6408 q -- likewise, print the DImode name of the register.
6409 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6410 y -- print "st(0)" instead of "st" as a register.
6411 D -- print condition for SSE cmp instruction.
6412 P -- if PIC, print an @PLT suffix.
6413 X -- don't print any sort of PIC '@' suffix for a symbol.
6414 & -- print some in-use local-dynamic symbol name.
6418 print_operand (file, x, code)
6428 if (ASSEMBLER_DIALECT == ASM_ATT)
6433 assemble_name (file, get_some_local_dynamic_name ());
6437 if (ASSEMBLER_DIALECT == ASM_ATT)
6439 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6441 /* Intel syntax. For absolute addresses, registers should not
6442 be surrounded by braces. */
6443 if (GET_CODE (x) != REG)
6446 PRINT_OPERAND (file, x, 0);
6454 PRINT_OPERAND (file, x, 0);
6459 if (ASSEMBLER_DIALECT == ASM_ATT)
6464 if (ASSEMBLER_DIALECT == ASM_ATT)
6469 if (ASSEMBLER_DIALECT == ASM_ATT)
6474 if (ASSEMBLER_DIALECT == ASM_ATT)
6479 if (ASSEMBLER_DIALECT == ASM_ATT)
6484 if (ASSEMBLER_DIALECT == ASM_ATT)
6489 /* 387 opcodes don't get size suffixes if the operands are
6491 if (STACK_REG_P (x))
6494 /* Likewise if using Intel opcodes. */
6495 if (ASSEMBLER_DIALECT == ASM_INTEL)
6498 /* This is the size of op from size of operand. */
6499 switch (GET_MODE_SIZE (GET_MODE (x)))
6502 #ifdef HAVE_GAS_FILDS_FISTS
6508 if (GET_MODE (x) == SFmode)
6523 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6525 #ifdef GAS_MNEMONICS
6551 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6553 PRINT_OPERAND (file, x, 0);
6559 /* Little bit of braindamage here. The SSE compare instructions
6560 does use completely different names for the comparisons that the
6561 fp conditional moves. */
6562 switch (GET_CODE (x))
6577 fputs ("unord", file);
6581 fputs ("neq", file);
6585 fputs ("nlt", file);
6589 fputs ("nle", file);
6592 fputs ("ord", file);
6600 #ifdef CMOV_SUN_AS_SYNTAX
6601 if (ASSEMBLER_DIALECT == ASM_ATT)
6603 switch (GET_MODE (x))
6605 case HImode: putc ('w', file); break;
6607 case SFmode: putc ('l', file); break;
6609 case DFmode: putc ('q', file); break;
6617 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6620 #ifdef CMOV_SUN_AS_SYNTAX
6621 if (ASSEMBLER_DIALECT == ASM_ATT)
6624 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6627 /* Like above, but reverse condition */
6629 /* Check to see if argument to %c is really a constant
6630 and not a condition code which needs to be reversed. */
6631 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6633 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6636 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6639 #ifdef CMOV_SUN_AS_SYNTAX
6640 if (ASSEMBLER_DIALECT == ASM_ATT)
6643 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6649 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6652 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6655 int pred_val = INTVAL (XEXP (x, 0));
6657 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6658 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6660 int taken = pred_val > REG_BR_PROB_BASE / 2;
6661 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6663 /* Emit hints only in the case default branch prediction
6664 heruistics would fail. */
6665 if (taken != cputaken)
6667 /* We use 3e (DS) prefix for taken branches and
6668 2e (CS) prefix for not taken branches. */
6670 fputs ("ds ; ", file);
6672 fputs ("cs ; ", file);
6679 output_operand_lossage ("invalid operand code `%c'", code);
6683 if (GET_CODE (x) == REG)
6685 PRINT_REG (x, code, file);
6688 else if (GET_CODE (x) == MEM)
6690 /* No `byte ptr' prefix for call instructions. */
6691 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6694 switch (GET_MODE_SIZE (GET_MODE (x)))
6696 case 1: size = "BYTE"; break;
6697 case 2: size = "WORD"; break;
6698 case 4: size = "DWORD"; break;
6699 case 8: size = "QWORD"; break;
6700 case 12: size = "XWORD"; break;
6701 case 16: size = "XMMWORD"; break;
6706 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6709 else if (code == 'w')
6711 else if (code == 'k')
6715 fputs (" PTR ", file);
6719 if (flag_pic && CONSTANT_ADDRESS_P (x))
6720 output_pic_addr_const (file, x, code);
6721 /* Avoid (%rip) for call operands. */
6722 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6723 && GET_CODE (x) != CONST_INT)
6724 output_addr_const (file, x);
6725 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6726 output_operand_lossage ("invalid constraints for operand");
6731 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6736 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6737 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6739 if (ASSEMBLER_DIALECT == ASM_ATT)
6741 fprintf (file, "0x%lx", l);
6744 /* These float cases don't actually occur as immediate operands. */
6745 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6750 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6751 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6752 fprintf (file, "%s", dstr);
6755 else if (GET_CODE (x) == CONST_DOUBLE
6756 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6761 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6762 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6763 fprintf (file, "%s", dstr);
6770 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6772 if (ASSEMBLER_DIALECT == ASM_ATT)
6775 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6776 || GET_CODE (x) == LABEL_REF)
6778 if (ASSEMBLER_DIALECT == ASM_ATT)
6781 fputs ("OFFSET FLAT:", file);
6784 if (GET_CODE (x) == CONST_INT)
6785 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6787 output_pic_addr_const (file, x, code);
6789 output_addr_const (file, x);
6793 /* Print a memory operand whose address is ADDR. */
6796 print_operand_address (file, addr)
6800 struct ix86_address parts;
6801 rtx base, index, disp;
6804 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6806 if (ASSEMBLER_DIALECT == ASM_INTEL)
6807 fputs ("DWORD PTR ", file);
6808 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6810 fputs ("gs:0", file);
6814 if (! ix86_decompose_address (addr, &parts))
6818 index = parts.index;
6820 scale = parts.scale;
6822 if (!base && !index)
6824 /* Displacement only requires special attention. */
6826 if (GET_CODE (disp) == CONST_INT)
6828 if (ASSEMBLER_DIALECT == ASM_INTEL)
6830 if (USER_LABEL_PREFIX[0] == 0)
6832 fputs ("ds:", file);
6834 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6837 output_pic_addr_const (file, addr, 0);
6839 output_addr_const (file, addr);
6841 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6843 && (GET_CODE (addr) == SYMBOL_REF
6844 || GET_CODE (addr) == LABEL_REF
6845 || (GET_CODE (addr) == CONST
6846 && GET_CODE (XEXP (addr, 0)) == PLUS
6847 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6848 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
6849 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6850 fputs ("(%rip)", file);
6854 if (ASSEMBLER_DIALECT == ASM_ATT)
6859 output_pic_addr_const (file, disp, 0);
6860 else if (GET_CODE (disp) == LABEL_REF)
6861 output_asm_label (disp);
6863 output_addr_const (file, disp);
6868 PRINT_REG (base, 0, file);
6872 PRINT_REG (index, 0, file);
6874 fprintf (file, ",%d", scale);
6880 rtx offset = NULL_RTX;
6884 /* Pull out the offset of a symbol; print any symbol itself. */
6885 if (GET_CODE (disp) == CONST
6886 && GET_CODE (XEXP (disp, 0)) == PLUS
6887 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6889 offset = XEXP (XEXP (disp, 0), 1);
6890 disp = gen_rtx_CONST (VOIDmode,
6891 XEXP (XEXP (disp, 0), 0));
6895 output_pic_addr_const (file, disp, 0);
6896 else if (GET_CODE (disp) == LABEL_REF)
6897 output_asm_label (disp);
6898 else if (GET_CODE (disp) == CONST_INT)
6901 output_addr_const (file, disp);
6907 PRINT_REG (base, 0, file);
6910 if (INTVAL (offset) >= 0)
6912 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6916 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6923 PRINT_REG (index, 0, file);
6925 fprintf (file, "*%d", scale);
6933 output_addr_const_extra (file, x)
6939 if (GET_CODE (x) != UNSPEC)
6942 op = XVECEXP (x, 0, 0);
6943 switch (XINT (x, 1))
6945 case UNSPEC_GOTTPOFF:
6946 output_addr_const (file, op);
6947 /* FIXME: This might be @TPOFF in Sun ld. */
6948 fputs ("@GOTTPOFF", file);
6951 output_addr_const (file, op);
6952 fputs ("@TPOFF", file);
6955 output_addr_const (file, op);
6956 fputs ("@NTPOFF", file);
6959 output_addr_const (file, op);
6960 fputs ("@DTPOFF", file);
6962 case UNSPEC_GOTNTPOFF:
6963 output_addr_const (file, op);
6964 fputs ("@GOTNTPOFF", file);
6966 case UNSPEC_INDNTPOFF:
6967 output_addr_const (file, op);
6968 fputs ("@INDNTPOFF", file);
6978 /* Split one or more DImode RTL references into pairs of SImode
6979 references. The RTL can be REG, offsettable MEM, integer constant, or
6980 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6981 split and "num" is its length. lo_half and hi_half are output arrays
6982 that parallel "operands". */
6985 split_di (operands, num, lo_half, hi_half)
6988 rtx lo_half[], hi_half[];
6992 rtx op = operands[num];
6994 /* simplify_subreg refuse to split volatile memory addresses,
6995 but we still have to handle it. */
6996 if (GET_CODE (op) == MEM)
6998 lo_half[num] = adjust_address (op, SImode, 0);
6999 hi_half[num] = adjust_address (op, SImode, 4);
7003 lo_half[num] = simplify_gen_subreg (SImode, op,
7004 GET_MODE (op) == VOIDmode
7005 ? DImode : GET_MODE (op), 0);
7006 hi_half[num] = simplify_gen_subreg (SImode, op,
7007 GET_MODE (op) == VOIDmode
7008 ? DImode : GET_MODE (op), 4);
7012 /* Split one or more TImode RTL references into pairs of SImode
7013 references. The RTL can be REG, offsettable MEM, integer constant, or
7014 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7015 split and "num" is its length. lo_half and hi_half are output arrays
7016 that parallel "operands". */
7019 split_ti (operands, num, lo_half, hi_half)
7022 rtx lo_half[], hi_half[];
7026 rtx op = operands[num];
7028 /* simplify_subreg refuse to split volatile memory addresses, but we
7029 still have to handle it. */
7030 if (GET_CODE (op) == MEM)
7032 lo_half[num] = adjust_address (op, DImode, 0);
7033 hi_half[num] = adjust_address (op, DImode, 8);
7037 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7038 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7043 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7044 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7045 is the expression of the binary operation. The output may either be
7046 emitted here, or returned to the caller, like all output_* functions.
7048 There is no guarantee that the operands are the same mode, as they
7049 might be within FLOAT or FLOAT_EXTEND expressions. */
7051 #ifndef SYSV386_COMPAT
7052 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7053 wants to fix the assemblers because that causes incompatibility
7054 with gcc. No-one wants to fix gcc because that causes
7055 incompatibility with assemblers... You can use the option of
7056 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7057 #define SYSV386_COMPAT 1
7061 output_387_binary_op (insn, operands)
7065 static char buf[30];
7068 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7070 #ifdef ENABLE_CHECKING
7071 /* Even if we do not want to check the inputs, this documents input
7072 constraints. Which helps in understanding the following code. */
7073 if (STACK_REG_P (operands[0])
7074 && ((REG_P (operands[1])
7075 && REGNO (operands[0]) == REGNO (operands[1])
7076 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7077 || (REG_P (operands[2])
7078 && REGNO (operands[0]) == REGNO (operands[2])
7079 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7080 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7086 switch (GET_CODE (operands[3]))
7089 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7090 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7098 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7099 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7107 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7108 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7116 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7117 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7131 if (GET_MODE (operands[0]) == SFmode)
7132 strcat (buf, "ss\t{%2, %0|%0, %2}");
7134 strcat (buf, "sd\t{%2, %0|%0, %2}");
7139 switch (GET_CODE (operands[3]))
7143 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7145 rtx temp = operands[2];
7146 operands[2] = operands[1];
7150 /* know operands[0] == operands[1]. */
7152 if (GET_CODE (operands[2]) == MEM)
7158 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7160 if (STACK_TOP_P (operands[0]))
7161 /* How is it that we are storing to a dead operand[2]?
7162 Well, presumably operands[1] is dead too. We can't
7163 store the result to st(0) as st(0) gets popped on this
7164 instruction. Instead store to operands[2] (which I
7165 think has to be st(1)). st(1) will be popped later.
7166 gcc <= 2.8.1 didn't have this check and generated
7167 assembly code that the Unixware assembler rejected. */
7168 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7170 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7174 if (STACK_TOP_P (operands[0]))
7175 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7177 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7182 if (GET_CODE (operands[1]) == MEM)
7188 if (GET_CODE (operands[2]) == MEM)
7194 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7197 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7198 derived assemblers, confusingly reverse the direction of
7199 the operation for fsub{r} and fdiv{r} when the
7200 destination register is not st(0). The Intel assembler
7201 doesn't have this brain damage. Read !SYSV386_COMPAT to
7202 figure out what the hardware really does. */
7203 if (STACK_TOP_P (operands[0]))
7204 p = "{p\t%0, %2|rp\t%2, %0}";
7206 p = "{rp\t%2, %0|p\t%0, %2}";
7208 if (STACK_TOP_P (operands[0]))
7209 /* As above for fmul/fadd, we can't store to st(0). */
7210 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7212 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7217 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7220 if (STACK_TOP_P (operands[0]))
7221 p = "{rp\t%0, %1|p\t%1, %0}";
7223 p = "{p\t%1, %0|rp\t%0, %1}";
7225 if (STACK_TOP_P (operands[0]))
7226 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7228 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7233 if (STACK_TOP_P (operands[0]))
7235 if (STACK_TOP_P (operands[1]))
7236 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7238 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7241 else if (STACK_TOP_P (operands[1]))
7244 p = "{\t%1, %0|r\t%0, %1}";
7246 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7252 p = "{r\t%2, %0|\t%0, %2}";
7254 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7267 /* Output code to initialize control word copies used by
7268 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7269 is set to control word rounding downwards. */
7271 emit_i387_cw_initialization (normal, round_down)
7272 rtx normal, round_down;
7274 rtx reg = gen_reg_rtx (HImode);
7276 emit_insn (gen_x86_fnstcw_1 (normal));
7277 emit_move_insn (reg, normal);
7278 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7280 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7282 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7283 emit_move_insn (round_down, reg);
7286 /* Output code for INSN to convert a float to a signed int. OPERANDS
7287 are the insn operands. The output may be [HSD]Imode and the input
7288 operand may be [SDX]Fmode. */
7291 output_fix_trunc (insn, operands)
7295 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7296 int dimode_p = GET_MODE (operands[0]) == DImode;
7298 /* Jump through a hoop or two for DImode, since the hardware has no
7299 non-popping instruction. We used to do this a different way, but
7300 that was somewhat fragile and broke with post-reload splitters. */
7301 if (dimode_p && !stack_top_dies)
7302 output_asm_insn ("fld\t%y1", operands);
7304 if (!STACK_TOP_P (operands[1]))
7307 if (GET_CODE (operands[0]) != MEM)
7310 output_asm_insn ("fldcw\t%3", operands);
7311 if (stack_top_dies || dimode_p)
7312 output_asm_insn ("fistp%z0\t%0", operands);
7314 output_asm_insn ("fist%z0\t%0", operands);
7315 output_asm_insn ("fldcw\t%2", operands);
7320 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7321 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7322 when fucom should be used. */
7325 output_fp_compare (insn, operands, eflags_p, unordered_p)
7328 int eflags_p, unordered_p;
7331 rtx cmp_op0 = operands[0];
7332 rtx cmp_op1 = operands[1];
7333 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7338 cmp_op1 = operands[2];
7342 if (GET_MODE (operands[0]) == SFmode)
7344 return "ucomiss\t{%1, %0|%0, %1}";
7346 return "comiss\t{%1, %0|%0, %y}";
7349 return "ucomisd\t{%1, %0|%0, %1}";
7351 return "comisd\t{%1, %0|%0, %y}";
7354 if (! STACK_TOP_P (cmp_op0))
7357 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7359 if (STACK_REG_P (cmp_op1)
7361 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7362 && REGNO (cmp_op1) != FIRST_STACK_REG)
7364 /* If both the top of the 387 stack dies, and the other operand
7365 is also a stack register that dies, then this must be a
7366 `fcompp' float compare */
7370 /* There is no double popping fcomi variant. Fortunately,
7371 eflags is immune from the fstp's cc clobbering. */
7373 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7375 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7383 return "fucompp\n\tfnstsw\t%0";
7385 return "fcompp\n\tfnstsw\t%0";
7398 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7400 static const char * const alt[24] =
7412 "fcomi\t{%y1, %0|%0, %y1}",
7413 "fcomip\t{%y1, %0|%0, %y1}",
7414 "fucomi\t{%y1, %0|%0, %y1}",
7415 "fucomip\t{%y1, %0|%0, %y1}",
7422 "fcom%z2\t%y2\n\tfnstsw\t%0",
7423 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7424 "fucom%z2\t%y2\n\tfnstsw\t%0",
7425 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7427 "ficom%z2\t%y2\n\tfnstsw\t%0",
7428 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7436 mask = eflags_p << 3;
7437 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7438 mask |= unordered_p << 1;
7439 mask |= stack_top_dies;
7452 ix86_output_addr_vec_elt (file, value)
7456 const char *directive = ASM_LONG;
7461 directive = ASM_QUAD;
7467 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7471 ix86_output_addr_diff_elt (file, value, rel)
7476 fprintf (file, "%s%s%d-%s%d\n",
7477 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7478 else if (HAVE_AS_GOTOFF_IN_DATA)
7479 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7481 else if (TARGET_MACHO)
7482 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7483 machopic_function_base_name () + 1);
7486 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7487 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7490 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7494 ix86_expand_clear (dest)
7499 /* We play register width games, which are only valid after reload. */
7500 if (!reload_completed)
7503 /* Avoid HImode and its attendant prefix byte. */
7504 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7505 dest = gen_rtx_REG (SImode, REGNO (dest));
7507 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7509 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7510 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7512 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7513 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7519 /* X is an unchanging MEM. If it is a constant pool reference, return
7520 the constant pool rtx, else NULL. */
7523 maybe_get_pool_constant (x)
7530 if (GET_CODE (x) != PLUS)
7532 if (XEXP (x, 0) != pic_offset_table_rtx)
7535 if (GET_CODE (x) != CONST)
7538 if (GET_CODE (x) != UNSPEC)
7540 if (XINT (x, 1) != UNSPEC_GOTOFF)
7542 x = XVECEXP (x, 0, 0);
7545 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7546 return get_pool_constant (x);
7552 ix86_expand_move (mode, operands)
7553 enum machine_mode mode;
7556 int strict = (reload_in_progress || reload_completed);
7557 rtx insn, op0, op1, tmp;
7562 /* ??? We have a slight problem. We need to say that tls symbols are
7563 not legitimate constants so that reload does not helpfully reload
7564 these constants from a REG_EQUIV, which we cannot handle. (Recall
7565 that general- and local-dynamic address resolution requires a
7568 However, if we say that tls symbols are not legitimate constants,
7569 then emit_move_insn helpfully drop them into the constant pool.
7571 It is far easier to work around emit_move_insn than reload. Recognize
7572 the MEM that we would have created and extract the symbol_ref. */
7575 && GET_CODE (op1) == MEM
7576 && RTX_UNCHANGING_P (op1))
7578 tmp = maybe_get_pool_constant (op1);
7579 /* Note that we only care about symbolic constants here, which
7580 unlike CONST_INT will always have a proper mode. */
7581 if (tmp && GET_MODE (tmp) == Pmode)
7585 if (tls_symbolic_operand (op1, Pmode))
7587 op1 = legitimize_address (op1, op1, VOIDmode);
7588 if (GET_CODE (op0) == MEM)
7590 tmp = gen_reg_rtx (mode);
7591 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7595 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7600 rtx temp = ((reload_in_progress
7601 || ((op0 && GET_CODE (op0) == REG)
7603 ? op0 : gen_reg_rtx (Pmode));
7604 op1 = machopic_indirect_data_reference (op1, temp);
7605 op1 = machopic_legitimize_pic_address (op1, mode,
7606 temp == op1 ? 0 : temp);
7610 if (MACHOPIC_INDIRECT)
7611 op1 = machopic_indirect_data_reference (op1, 0);
7615 insn = gen_rtx_SET (VOIDmode, op0, op1);
7619 #endif /* TARGET_MACHO */
7620 if (GET_CODE (op0) == MEM)
7621 op1 = force_reg (Pmode, op1);
7625 if (GET_CODE (temp) != REG)
7626 temp = gen_reg_rtx (Pmode);
7627 temp = legitimize_pic_address (op1, temp);
7635 if (GET_CODE (op0) == MEM
7636 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7637 || !push_operand (op0, mode))
7638 && GET_CODE (op1) == MEM)
7639 op1 = force_reg (mode, op1);
7641 if (push_operand (op0, mode)
7642 && ! general_no_elim_operand (op1, mode))
7643 op1 = copy_to_mode_reg (mode, op1);
7645 /* Force large constants in 64bit compilation into register
7646 to get them CSEed. */
7647 if (TARGET_64BIT && mode == DImode
7648 && immediate_operand (op1, mode)
7649 && !x86_64_zero_extended_value (op1)
7650 && !register_operand (op0, mode)
7651 && optimize && !reload_completed && !reload_in_progress)
7652 op1 = copy_to_mode_reg (mode, op1);
7654 if (FLOAT_MODE_P (mode))
7656 /* If we are loading a floating point constant to a register,
7657 force the value to memory now, since we'll get better code
7658 out the back end. */
7662 else if (GET_CODE (op1) == CONST_DOUBLE
7663 && register_operand (op0, mode))
7664 op1 = validize_mem (force_const_mem (mode, op1));
7668 insn = gen_rtx_SET (VOIDmode, op0, op1);
7674 ix86_expand_vector_move (mode, operands)
7675 enum machine_mode mode;
7678 /* Force constants other than zero into memory. We do not know how
7679 the instructions used to build constants modify the upper 64 bits
7680 of the register, once we have that information we may be able
7681 to handle some of them more efficiently. */
7682 if ((reload_in_progress | reload_completed) == 0
7683 && register_operand (operands[0], mode)
7684 && CONSTANT_P (operands[1]))
7686 rtx addr = gen_reg_rtx (Pmode);
7687 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7688 operands[1] = gen_rtx_MEM (mode, addr);
7691 /* Make operand1 a register if it isn't already. */
7692 if ((reload_in_progress | reload_completed) == 0
7693 && !register_operand (operands[0], mode)
7694 && !register_operand (operands[1], mode))
7696 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7697 emit_move_insn (operands[0], temp);
7701 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7704 /* Attempt to expand a binary operator. Make the expansion closer to the
7705 actual machine, then just general_operand, which will allow 3 separate
7706 memory references (one output, two input) in a single insn. */
7709 ix86_expand_binary_operator (code, mode, operands)
7711 enum machine_mode mode;
7714 int matching_memory;
7715 rtx src1, src2, dst, op, clob;
7721 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7722 if (GET_RTX_CLASS (code) == 'c'
7723 && (rtx_equal_p (dst, src2)
7724 || immediate_operand (src1, mode)))
7731 /* If the destination is memory, and we do not have matching source
7732 operands, do things in registers. */
7733 matching_memory = 0;
7734 if (GET_CODE (dst) == MEM)
7736 if (rtx_equal_p (dst, src1))
7737 matching_memory = 1;
7738 else if (GET_RTX_CLASS (code) == 'c'
7739 && rtx_equal_p (dst, src2))
7740 matching_memory = 2;
7742 dst = gen_reg_rtx (mode);
7745 /* Both source operands cannot be in memory. */
7746 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7748 if (matching_memory != 2)
7749 src2 = force_reg (mode, src2);
7751 src1 = force_reg (mode, src1);
7754 /* If the operation is not commutable, source 1 cannot be a constant
7755 or non-matching memory. */
7756 if ((CONSTANT_P (src1)
7757 || (!matching_memory && GET_CODE (src1) == MEM))
7758 && GET_RTX_CLASS (code) != 'c')
7759 src1 = force_reg (mode, src1);
7761 /* If optimizing, copy to regs to improve CSE */
7762 if (optimize && ! no_new_pseudos)
7764 if (GET_CODE (dst) == MEM)
7765 dst = gen_reg_rtx (mode);
7766 if (GET_CODE (src1) == MEM)
7767 src1 = force_reg (mode, src1);
7768 if (GET_CODE (src2) == MEM)
7769 src2 = force_reg (mode, src2);
7772 /* Emit the instruction. */
7774 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7775 if (reload_in_progress)
7777 /* Reload doesn't know about the flags register, and doesn't know that
7778 it doesn't want to clobber it. We can only do this with PLUS. */
7785 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7786 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7789 /* Fix up the destination if needed. */
7790 if (dst != operands[0])
7791 emit_move_insn (operands[0], dst);
7794 /* Return TRUE or FALSE depending on whether the binary operator meets the
7795 appropriate constraints. */
7798 ix86_binary_operator_ok (code, mode, operands)
7800 enum machine_mode mode ATTRIBUTE_UNUSED;
7803 /* Both source operands cannot be in memory. */
7804 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7806 /* If the operation is not commutable, source 1 cannot be a constant. */
7807 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7809 /* If the destination is memory, we must have a matching source operand. */
7810 if (GET_CODE (operands[0]) == MEM
7811 && ! (rtx_equal_p (operands[0], operands[1])
7812 || (GET_RTX_CLASS (code) == 'c'
7813 && rtx_equal_p (operands[0], operands[2]))))
7815 /* If the operation is not commutable and the source 1 is memory, we must
7816 have a matching destination. */
7817 if (GET_CODE (operands[1]) == MEM
7818 && GET_RTX_CLASS (code) != 'c'
7819 && ! rtx_equal_p (operands[0], operands[1]))
7824 /* Attempt to expand a unary operator. Make the expansion closer to the
7825 actual machine, then just general_operand, which will allow 2 separate
7826 memory references (one output, one input) in a single insn. */
7829 ix86_expand_unary_operator (code, mode, operands)
7831 enum machine_mode mode;
7834 int matching_memory;
7835 rtx src, dst, op, clob;
7840 /* If the destination is memory, and we do not have matching source
7841 operands, do things in registers. */
7842 matching_memory = 0;
7843 if (GET_CODE (dst) == MEM)
7845 if (rtx_equal_p (dst, src))
7846 matching_memory = 1;
7848 dst = gen_reg_rtx (mode);
7851 /* When source operand is memory, destination must match. */
7852 if (!matching_memory && GET_CODE (src) == MEM)
7853 src = force_reg (mode, src);
7855 /* If optimizing, copy to regs to improve CSE */
7856 if (optimize && ! no_new_pseudos)
7858 if (GET_CODE (dst) == MEM)
7859 dst = gen_reg_rtx (mode);
7860 if (GET_CODE (src) == MEM)
7861 src = force_reg (mode, src);
7864 /* Emit the instruction. */
7866 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7867 if (reload_in_progress || code == NOT)
7869 /* Reload doesn't know about the flags register, and doesn't know that
7870 it doesn't want to clobber it. */
7877 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7878 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7881 /* Fix up the destination if needed. */
7882 if (dst != operands[0])
7883 emit_move_insn (operands[0], dst);
7886 /* Return TRUE or FALSE depending on whether the unary operator meets the
7887 appropriate constraints. */
7890 ix86_unary_operator_ok (code, mode, operands)
7891 enum rtx_code code ATTRIBUTE_UNUSED;
7892 enum machine_mode mode ATTRIBUTE_UNUSED;
7893 rtx operands[2] ATTRIBUTE_UNUSED;
7895 /* If one of operands is memory, source and destination must match. */
7896 if ((GET_CODE (operands[0]) == MEM
7897 || GET_CODE (operands[1]) == MEM)
7898 && ! rtx_equal_p (operands[0], operands[1]))
7903 /* Return TRUE or FALSE depending on whether the first SET in INSN
7904 has source and destination with matching CC modes, and that the
7905 CC mode is at least as constrained as REQ_MODE. */
7908 ix86_match_ccmode (insn, req_mode)
7910 enum machine_mode req_mode;
7913 enum machine_mode set_mode;
7915 set = PATTERN (insn);
7916 if (GET_CODE (set) == PARALLEL)
7917 set = XVECEXP (set, 0, 0);
7918 if (GET_CODE (set) != SET)
7920 if (GET_CODE (SET_SRC (set)) != COMPARE)
7923 set_mode = GET_MODE (SET_DEST (set));
7927 if (req_mode != CCNOmode
7928 && (req_mode != CCmode
7929 || XEXP (SET_SRC (set), 1) != const0_rtx))
7933 if (req_mode == CCGCmode)
7937 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7941 if (req_mode == CCZmode)
7951 return (GET_MODE (SET_SRC (set)) == set_mode);
7954 /* Generate insn patterns to do an integer compare of OPERANDS. */
7957 ix86_expand_int_compare (code, op0, op1)
7961 enum machine_mode cmpmode;
7964 cmpmode = SELECT_CC_MODE (code, op0, op1);
7965 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7967 /* This is very simple, but making the interface the same as in the
7968 FP case makes the rest of the code easier. */
7969 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7970 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7972 /* Return the test that should be put into the flags user, i.e.
7973 the bcc, scc, or cmov instruction. */
7974 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7977 /* Figure out whether to use ordered or unordered fp comparisons.
7978 Return the appropriate mode to use. */
7981 ix86_fp_compare_mode (code)
7982 enum rtx_code code ATTRIBUTE_UNUSED;
7984 /* ??? In order to make all comparisons reversible, we do all comparisons
7985 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7986 all forms trapping and nontrapping comparisons, we can make inequality
7987 comparisons trapping again, since it results in better code when using
7988 FCOM based compares. */
7989 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7993 ix86_cc_mode (code, op0, op1)
7997 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7998 return ix86_fp_compare_mode (code);
8001 /* Only zero flag is needed. */
8003 case NE: /* ZF!=0 */
8005 /* Codes needing carry flag. */
8006 case GEU: /* CF=0 */
8007 case GTU: /* CF=0 & ZF=0 */
8008 case LTU: /* CF=1 */
8009 case LEU: /* CF=1 | ZF=1 */
8011 /* Codes possibly doable only with sign flag when
8012 comparing against zero. */
8013 case GE: /* SF=OF or SF=0 */
8014 case LT: /* SF<>OF or SF=1 */
8015 if (op1 == const0_rtx)
8018 /* For other cases Carry flag is not required. */
8020 /* Codes doable only with sign flag when comparing
8021 against zero, but we miss jump instruction for it
8022 so we need to use relational tests agains overflow
8023 that thus needs to be zero. */
8024 case GT: /* ZF=0 & SF=OF */
8025 case LE: /* ZF=1 | SF<>OF */
8026 if (op1 == const0_rtx)
8030 /* strcmp pattern do (use flags) and combine may ask us for proper
8039 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8042 ix86_use_fcomi_compare (code)
8043 enum rtx_code code ATTRIBUTE_UNUSED;
8045 enum rtx_code swapped_code = swap_condition (code);
8046 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8047 || (ix86_fp_comparison_cost (swapped_code)
8048 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8051 /* Swap, force into registers, or otherwise massage the two operands
8052 to a fp comparison. The operands are updated in place; the new
8053 comparsion code is returned. */
8055 static enum rtx_code
8056 ix86_prepare_fp_compare_args (code, pop0, pop1)
8060 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8061 rtx op0 = *pop0, op1 = *pop1;
8062 enum machine_mode op_mode = GET_MODE (op0);
8063 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8065 /* All of the unordered compare instructions only work on registers.
8066 The same is true of the XFmode compare instructions. The same is
8067 true of the fcomi compare instructions. */
8070 && (fpcmp_mode == CCFPUmode
8071 || op_mode == XFmode
8072 || op_mode == TFmode
8073 || ix86_use_fcomi_compare (code)))
8075 op0 = force_reg (op_mode, op0);
8076 op1 = force_reg (op_mode, op1);
8080 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8081 things around if they appear profitable, otherwise force op0
8084 if (standard_80387_constant_p (op0) == 0
8085 || (GET_CODE (op0) == MEM
8086 && ! (standard_80387_constant_p (op1) == 0
8087 || GET_CODE (op1) == MEM)))
8090 tmp = op0, op0 = op1, op1 = tmp;
8091 code = swap_condition (code);
8094 if (GET_CODE (op0) != REG)
8095 op0 = force_reg (op_mode, op0);
8097 if (CONSTANT_P (op1))
8099 if (standard_80387_constant_p (op1))
8100 op1 = force_reg (op_mode, op1);
8102 op1 = validize_mem (force_const_mem (op_mode, op1));
8106 /* Try to rearrange the comparison to make it cheaper. */
8107 if (ix86_fp_comparison_cost (code)
8108 > ix86_fp_comparison_cost (swap_condition (code))
8109 && (GET_CODE (op1) == REG || !no_new_pseudos))
8112 tmp = op0, op0 = op1, op1 = tmp;
8113 code = swap_condition (code);
8114 if (GET_CODE (op0) != REG)
8115 op0 = force_reg (op_mode, op0);
8123 /* Convert comparison codes we use to represent FP comparison to integer
8124 code that will result in proper branch. Return UNKNOWN if no such code
8126 static enum rtx_code
8127 ix86_fp_compare_code_to_integer (code)
8157 /* Split comparison code CODE into comparisons we can do using branch
8158 instructions. BYPASS_CODE is comparison code for branch that will
8159 branch around FIRST_CODE and SECOND_CODE. If some of branches
8160 is not required, set value to NIL.
8161 We never require more than two branches. */
8163 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8164 enum rtx_code code, *bypass_code, *first_code, *second_code;
8170 /* The fcomi comparison sets flags as follows:
8180 case GT: /* GTU - CF=0 & ZF=0 */
8181 case GE: /* GEU - CF=0 */
8182 case ORDERED: /* PF=0 */
8183 case UNORDERED: /* PF=1 */
8184 case UNEQ: /* EQ - ZF=1 */
8185 case UNLT: /* LTU - CF=1 */
8186 case UNLE: /* LEU - CF=1 | ZF=1 */
8187 case LTGT: /* EQ - ZF=0 */
8189 case LT: /* LTU - CF=1 - fails on unordered */
8191 *bypass_code = UNORDERED;
8193 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8195 *bypass_code = UNORDERED;
8197 case EQ: /* EQ - ZF=1 - fails on unordered */
8199 *bypass_code = UNORDERED;
8201 case NE: /* NE - ZF=0 - fails on unordered */
8203 *second_code = UNORDERED;
8205 case UNGE: /* GEU - CF=0 - fails on unordered */
8207 *second_code = UNORDERED;
8209 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8211 *second_code = UNORDERED;
8216 if (!TARGET_IEEE_FP)
8223 /* Return cost of comparison done fcom + arithmetics operations on AX.
8224 All following functions do use number of instructions as an cost metrics.
8225 In future this should be tweaked to compute bytes for optimize_size and
8226 take into account performance of various instructions on various CPUs. */
8228 ix86_fp_comparison_arithmetics_cost (code)
8231 if (!TARGET_IEEE_FP)
8233 /* The cost of code output by ix86_expand_fp_compare. */
8261 /* Return cost of comparison done using fcomi operation.
8262 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8264 ix86_fp_comparison_fcomi_cost (code)
8267 enum rtx_code bypass_code, first_code, second_code;
8268 /* Return arbitarily high cost when instruction is not supported - this
8269 prevents gcc from using it. */
8272 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8273 return (bypass_code != NIL || second_code != NIL) + 2;
8276 /* Return cost of comparison done using sahf operation.
8277 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8279 ix86_fp_comparison_sahf_cost (code)
8282 enum rtx_code bypass_code, first_code, second_code;
8283 /* Return arbitarily high cost when instruction is not preferred - this
8284 avoids gcc from using it. */
8285 if (!TARGET_USE_SAHF && !optimize_size)
8287 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8288 return (bypass_code != NIL || second_code != NIL) + 3;
8291 /* Compute cost of the comparison done using any method.
8292 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8294 ix86_fp_comparison_cost (code)
8297 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8300 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8301 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8303 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8304 if (min > sahf_cost)
8306 if (min > fcomi_cost)
8311 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8314 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8316 rtx op0, op1, scratch;
8320 enum machine_mode fpcmp_mode, intcmp_mode;
8322 int cost = ix86_fp_comparison_cost (code);
8323 enum rtx_code bypass_code, first_code, second_code;
8325 fpcmp_mode = ix86_fp_compare_mode (code);
8326 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8329 *second_test = NULL_RTX;
8331 *bypass_test = NULL_RTX;
8333 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8335 /* Do fcomi/sahf based test when profitable. */
8336 if ((bypass_code == NIL || bypass_test)
8337 && (second_code == NIL || second_test)
8338 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8342 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8343 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8349 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8350 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8352 scratch = gen_reg_rtx (HImode);
8353 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8354 emit_insn (gen_x86_sahf_1 (scratch));
8357 /* The FP codes work out to act like unsigned. */
8358 intcmp_mode = fpcmp_mode;
8360 if (bypass_code != NIL)
8361 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8362 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8364 if (second_code != NIL)
8365 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8366 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8371 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8372 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8373 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8375 scratch = gen_reg_rtx (HImode);
8376 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8378 /* In the unordered case, we have to check C2 for NaN's, which
8379 doesn't happen to work out to anything nice combination-wise.
8380 So do some bit twiddling on the value we've got in AH to come
8381 up with an appropriate set of condition codes. */
8383 intcmp_mode = CCNOmode;
8388 if (code == GT || !TARGET_IEEE_FP)
8390 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8395 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8396 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8397 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8398 intcmp_mode = CCmode;
8404 if (code == LT && TARGET_IEEE_FP)
8406 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8407 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8408 intcmp_mode = CCmode;
8413 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8419 if (code == GE || !TARGET_IEEE_FP)
8421 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8426 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8427 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8434 if (code == LE && TARGET_IEEE_FP)
8436 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8437 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8438 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8439 intcmp_mode = CCmode;
8444 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8450 if (code == EQ && TARGET_IEEE_FP)
8452 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8453 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8454 intcmp_mode = CCmode;
8459 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8466 if (code == NE && TARGET_IEEE_FP)
8468 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8469 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8475 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8481 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8485 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8494 /* Return the test that should be put into the flags user, i.e.
8495 the bcc, scc, or cmov instruction. */
8496 return gen_rtx_fmt_ee (code, VOIDmode,
8497 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8502 ix86_expand_compare (code, second_test, bypass_test)
8504 rtx *second_test, *bypass_test;
8507 op0 = ix86_compare_op0;
8508 op1 = ix86_compare_op1;
8511 *second_test = NULL_RTX;
8513 *bypass_test = NULL_RTX;
8515 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8516 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8517 second_test, bypass_test);
8519 ret = ix86_expand_int_compare (code, op0, op1);
8524 /* Return true if the CODE will result in nontrivial jump sequence. */
8526 ix86_fp_jump_nontrivial_p (code)
8529 enum rtx_code bypass_code, first_code, second_code;
8532 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8533 return bypass_code != NIL || second_code != NIL;
8537 ix86_expand_branch (code, label)
8543 switch (GET_MODE (ix86_compare_op0))
8549 tmp = ix86_expand_compare (code, NULL, NULL);
8550 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8551 gen_rtx_LABEL_REF (VOIDmode, label),
8553 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8563 enum rtx_code bypass_code, first_code, second_code;
8565 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8568 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8570 /* Check whether we will use the natural sequence with one jump. If
8571 so, we can expand jump early. Otherwise delay expansion by
8572 creating compound insn to not confuse optimizers. */
8573 if (bypass_code == NIL && second_code == NIL
8576 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8577 gen_rtx_LABEL_REF (VOIDmode, label),
8582 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8583 ix86_compare_op0, ix86_compare_op1);
8584 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8585 gen_rtx_LABEL_REF (VOIDmode, label),
8587 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8589 use_fcomi = ix86_use_fcomi_compare (code);
8590 vec = rtvec_alloc (3 + !use_fcomi);
8591 RTVEC_ELT (vec, 0) = tmp;
8593 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8595 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8598 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8600 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8608 /* Expand DImode branch into multiple compare+branch. */
8610 rtx lo[2], hi[2], label2;
8611 enum rtx_code code1, code2, code3;
8613 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8615 tmp = ix86_compare_op0;
8616 ix86_compare_op0 = ix86_compare_op1;
8617 ix86_compare_op1 = tmp;
8618 code = swap_condition (code);
8620 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8621 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8623 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8624 avoid two branches. This costs one extra insn, so disable when
8625 optimizing for size. */
8627 if ((code == EQ || code == NE)
8629 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8634 if (hi[1] != const0_rtx)
8635 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8636 NULL_RTX, 0, OPTAB_WIDEN);
8639 if (lo[1] != const0_rtx)
8640 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8641 NULL_RTX, 0, OPTAB_WIDEN);
8643 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8644 NULL_RTX, 0, OPTAB_WIDEN);
8646 ix86_compare_op0 = tmp;
8647 ix86_compare_op1 = const0_rtx;
8648 ix86_expand_branch (code, label);
8652 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8653 op1 is a constant and the low word is zero, then we can just
8654 examine the high word. */
8656 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8659 case LT: case LTU: case GE: case GEU:
8660 ix86_compare_op0 = hi[0];
8661 ix86_compare_op1 = hi[1];
8662 ix86_expand_branch (code, label);
8668 /* Otherwise, we need two or three jumps. */
8670 label2 = gen_label_rtx ();
8673 code2 = swap_condition (code);
8674 code3 = unsigned_condition (code);
8678 case LT: case GT: case LTU: case GTU:
8681 case LE: code1 = LT; code2 = GT; break;
8682 case GE: code1 = GT; code2 = LT; break;
8683 case LEU: code1 = LTU; code2 = GTU; break;
8684 case GEU: code1 = GTU; code2 = LTU; break;
8686 case EQ: code1 = NIL; code2 = NE; break;
8687 case NE: code2 = NIL; break;
8695 * if (hi(a) < hi(b)) goto true;
8696 * if (hi(a) > hi(b)) goto false;
8697 * if (lo(a) < lo(b)) goto true;
8701 ix86_compare_op0 = hi[0];
8702 ix86_compare_op1 = hi[1];
8705 ix86_expand_branch (code1, label);
8707 ix86_expand_branch (code2, label2);
8709 ix86_compare_op0 = lo[0];
8710 ix86_compare_op1 = lo[1];
8711 ix86_expand_branch (code3, label);
8714 emit_label (label2);
8723 /* Split branch based on floating point condition. */
8725 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8727 rtx op1, op2, target1, target2, tmp;
8730 rtx label = NULL_RTX;
8732 int bypass_probability = -1, second_probability = -1, probability = -1;
8735 if (target2 != pc_rtx)
8738 code = reverse_condition_maybe_unordered (code);
8743 condition = ix86_expand_fp_compare (code, op1, op2,
8744 tmp, &second, &bypass);
8746 if (split_branch_probability >= 0)
8748 /* Distribute the probabilities across the jumps.
8749 Assume the BYPASS and SECOND to be always test
8751 probability = split_branch_probability;
8753 /* Value of 1 is low enough to make no need for probability
8754 to be updated. Later we may run some experiments and see
8755 if unordered values are more frequent in practice. */
8757 bypass_probability = 1;
8759 second_probability = 1;
8761 if (bypass != NULL_RTX)
8763 label = gen_label_rtx ();
8764 i = emit_jump_insn (gen_rtx_SET
8766 gen_rtx_IF_THEN_ELSE (VOIDmode,
8768 gen_rtx_LABEL_REF (VOIDmode,
8771 if (bypass_probability >= 0)
8773 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8774 GEN_INT (bypass_probability),
8777 i = emit_jump_insn (gen_rtx_SET
8779 gen_rtx_IF_THEN_ELSE (VOIDmode,
8780 condition, target1, target2)));
8781 if (probability >= 0)
8783 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8784 GEN_INT (probability),
8786 if (second != NULL_RTX)
8788 i = emit_jump_insn (gen_rtx_SET
8790 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8792 if (second_probability >= 0)
8794 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8795 GEN_INT (second_probability),
8798 if (label != NULL_RTX)
8803 ix86_expand_setcc (code, dest)
8807 rtx ret, tmp, tmpreg;
8808 rtx second_test, bypass_test;
8810 if (GET_MODE (ix86_compare_op0) == DImode
8812 return 0; /* FAIL */
8814 if (GET_MODE (dest) != QImode)
8817 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8818 PUT_MODE (ret, QImode);
8823 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8824 if (bypass_test || second_test)
8826 rtx test = second_test;
8828 rtx tmp2 = gen_reg_rtx (QImode);
8835 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8837 PUT_MODE (test, QImode);
8838 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8841 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8843 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8846 return 1; /* DONE */
8850 ix86_expand_int_movcc (operands)
8853 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8854 rtx compare_seq, compare_op;
8855 rtx second_test, bypass_test;
8856 enum machine_mode mode = GET_MODE (operands[0]);
8858 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8859 In case comparsion is done with immediate, we can convert it to LTU or
8860 GEU by altering the integer. */
8862 if ((code == LEU || code == GTU)
8863 && GET_CODE (ix86_compare_op1) == CONST_INT
8865 && INTVAL (ix86_compare_op1) != -1
8866 /* For x86-64, the immediate field in the instruction is 32-bit
8867 signed, so we can't increment a DImode value above 0x7fffffff. */
8869 || GET_MODE (ix86_compare_op0) != DImode
8870 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8871 && GET_CODE (operands[2]) == CONST_INT
8872 && GET_CODE (operands[3]) == CONST_INT)
8878 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8879 GET_MODE (ix86_compare_op0));
8883 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8884 compare_seq = get_insns ();
8887 compare_code = GET_CODE (compare_op);
8889 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8890 HImode insns, we'd be swallowed in word prefix ops. */
8893 && (mode != DImode || TARGET_64BIT)
8894 && GET_CODE (operands[2]) == CONST_INT
8895 && GET_CODE (operands[3]) == CONST_INT)
8897 rtx out = operands[0];
8898 HOST_WIDE_INT ct = INTVAL (operands[2]);
8899 HOST_WIDE_INT cf = INTVAL (operands[3]);
8902 if ((compare_code == LTU || compare_code == GEU)
8903 && !second_test && !bypass_test)
8905 /* Detect overlap between destination and compare sources. */
8908 /* To simplify rest of code, restrict to the GEU case. */
8909 if (compare_code == LTU)
8914 compare_code = reverse_condition (compare_code);
8915 code = reverse_condition (code);
8919 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8920 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8921 tmp = gen_reg_rtx (mode);
8923 emit_insn (compare_seq);
8925 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8927 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8939 tmp = expand_simple_binop (mode, PLUS,
8941 tmp, 1, OPTAB_DIRECT);
8952 tmp = expand_simple_binop (mode, IOR,
8954 tmp, 1, OPTAB_DIRECT);
8956 else if (diff == -1 && ct)
8966 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8968 tmp = expand_simple_binop (mode, PLUS,
8970 tmp, 1, OPTAB_DIRECT);
8978 * andl cf - ct, dest
8988 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8991 tmp = expand_simple_binop (mode, AND,
8993 gen_int_mode (cf - ct, mode),
8994 tmp, 1, OPTAB_DIRECT);
8996 tmp = expand_simple_binop (mode, PLUS,
8998 tmp, 1, OPTAB_DIRECT);
9002 emit_move_insn (out, tmp);
9004 return 1; /* DONE */
9011 tmp = ct, ct = cf, cf = tmp;
9013 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9015 /* We may be reversing unordered compare to normal compare, that
9016 is not valid in general (we may convert non-trapping condition
9017 to trapping one), however on i386 we currently emit all
9018 comparisons unordered. */
9019 compare_code = reverse_condition_maybe_unordered (compare_code);
9020 code = reverse_condition_maybe_unordered (code);
9024 compare_code = reverse_condition (compare_code);
9025 code = reverse_condition (code);
9030 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9031 && GET_CODE (ix86_compare_op1) == CONST_INT)
9033 if (ix86_compare_op1 == const0_rtx
9034 && (code == LT || code == GE))
9035 compare_code = code;
9036 else if (ix86_compare_op1 == constm1_rtx)
9040 else if (code == GT)
9045 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9046 if (compare_code != NIL
9047 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9048 && (cf == -1 || ct == -1))
9050 /* If lea code below could be used, only optimize
9051 if it results in a 2 insn sequence. */
9053 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9054 || diff == 3 || diff == 5 || diff == 9)
9055 || (compare_code == LT && ct == -1)
9056 || (compare_code == GE && cf == -1))
9059 * notl op1 (if necessary)
9067 code = reverse_condition (code);
9070 out = emit_store_flag (out, code, ix86_compare_op0,
9071 ix86_compare_op1, VOIDmode, 0, -1);
9073 out = expand_simple_binop (mode, IOR,
9075 out, 1, OPTAB_DIRECT);
9076 if (out != operands[0])
9077 emit_move_insn (operands[0], out);
9079 return 1; /* DONE */
9083 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9084 || diff == 3 || diff == 5 || diff == 9)
9085 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9091 * lea cf(dest*(ct-cf)),dest
9095 * This also catches the degenerate setcc-only case.
9101 out = emit_store_flag (out, code, ix86_compare_op0,
9102 ix86_compare_op1, VOIDmode, 0, 1);
9105 /* On x86_64 the lea instruction operates on Pmode, so we need
9106 to get arithmetics done in proper mode to match. */
9113 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9117 tmp = gen_rtx_PLUS (mode, tmp, out1);
9123 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9127 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9133 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9134 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9136 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9137 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9141 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9143 if (out != operands[0])
9144 emit_move_insn (operands[0], copy_rtx (out));
9146 return 1; /* DONE */
9150 * General case: Jumpful:
9151 * xorl dest,dest cmpl op1, op2
9152 * cmpl op1, op2 movl ct, dest
9154 * decl dest movl cf, dest
9155 * andl (cf-ct),dest 1:
9160 * This is reasonably steep, but branch mispredict costs are
9161 * high on modern cpus, so consider failing only if optimizing
9164 * %%% Parameterize branch_cost on the tuning architecture, then
9165 * use that. The 80386 couldn't care less about mispredicts.
9168 if (!optimize_size && !TARGET_CMOVE)
9174 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9175 /* We may be reversing unordered compare to normal compare,
9176 that is not valid in general (we may convert non-trapping
9177 condition to trapping one), however on i386 we currently
9178 emit all comparisons unordered. */
9179 code = reverse_condition_maybe_unordered (code);
9182 code = reverse_condition (code);
9183 if (compare_code != NIL)
9184 compare_code = reverse_condition (compare_code);
9188 if (compare_code != NIL)
9190 /* notl op1 (if needed)
9195 For x < 0 (resp. x <= -1) there will be no notl,
9196 so if possible swap the constants to get rid of the
9198 True/false will be -1/0 while code below (store flag
9199 followed by decrement) is 0/-1, so the constants need
9200 to be exchanged once more. */
9202 if (compare_code == GE || !cf)
9204 code = reverse_condition (code);
9209 HOST_WIDE_INT tmp = cf;
9214 out = emit_store_flag (out, code, ix86_compare_op0,
9215 ix86_compare_op1, VOIDmode, 0, -1);
9219 out = emit_store_flag (out, code, ix86_compare_op0,
9220 ix86_compare_op1, VOIDmode, 0, 1);
9222 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9223 out, 1, OPTAB_DIRECT);
9226 out = expand_simple_binop (mode, AND, out,
9227 gen_int_mode (cf - ct, mode),
9228 out, 1, OPTAB_DIRECT);
9230 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9231 out, 1, OPTAB_DIRECT);
9232 if (out != operands[0])
9233 emit_move_insn (operands[0], out);
9235 return 1; /* DONE */
9241 /* Try a few things more with specific constants and a variable. */
9244 rtx var, orig_out, out, tmp;
9247 return 0; /* FAIL */
9249 /* If one of the two operands is an interesting constant, load a
9250 constant with the above and mask it in with a logical operation. */
9252 if (GET_CODE (operands[2]) == CONST_INT)
9255 if (INTVAL (operands[2]) == 0)
9256 operands[3] = constm1_rtx, op = and_optab;
9257 else if (INTVAL (operands[2]) == -1)
9258 operands[3] = const0_rtx, op = ior_optab;
9260 return 0; /* FAIL */
9262 else if (GET_CODE (operands[3]) == CONST_INT)
9265 if (INTVAL (operands[3]) == 0)
9266 operands[2] = constm1_rtx, op = and_optab;
9267 else if (INTVAL (operands[3]) == -1)
9268 operands[2] = const0_rtx, op = ior_optab;
9270 return 0; /* FAIL */
9273 return 0; /* FAIL */
9275 orig_out = operands[0];
9276 tmp = gen_reg_rtx (mode);
9279 /* Recurse to get the constant loaded. */
9280 if (ix86_expand_int_movcc (operands) == 0)
9281 return 0; /* FAIL */
9283 /* Mask in the interesting variable. */
9284 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9286 if (out != orig_out)
9287 emit_move_insn (orig_out, out);
9289 return 1; /* DONE */
9293 * For comparison with above,
9303 if (! nonimmediate_operand (operands[2], mode))
9304 operands[2] = force_reg (mode, operands[2]);
9305 if (! nonimmediate_operand (operands[3], mode))
9306 operands[3] = force_reg (mode, operands[3]);
9308 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9310 rtx tmp = gen_reg_rtx (mode);
9311 emit_move_insn (tmp, operands[3]);
9314 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9316 rtx tmp = gen_reg_rtx (mode);
9317 emit_move_insn (tmp, operands[2]);
9320 if (! register_operand (operands[2], VOIDmode)
9321 && ! register_operand (operands[3], VOIDmode))
9322 operands[2] = force_reg (mode, operands[2]);
9324 emit_insn (compare_seq);
9325 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9326 gen_rtx_IF_THEN_ELSE (mode,
9327 compare_op, operands[2],
9330 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9331 gen_rtx_IF_THEN_ELSE (mode,
9336 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9337 gen_rtx_IF_THEN_ELSE (mode,
9342 return 1; /* DONE */
9346 ix86_expand_fp_movcc (operands)
9351 rtx compare_op, second_test, bypass_test;
9353 /* For SF/DFmode conditional moves based on comparisons
9354 in same mode, we may want to use SSE min/max instructions. */
9355 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9356 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9357 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9358 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9360 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9361 /* We may be called from the post-reload splitter. */
9362 && (!REG_P (operands[0])
9363 || SSE_REG_P (operands[0])
9364 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9366 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9367 code = GET_CODE (operands[1]);
9369 /* See if we have (cross) match between comparison operands and
9370 conditional move operands. */
9371 if (rtx_equal_p (operands[2], op1))
9376 code = reverse_condition_maybe_unordered (code);
9378 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9380 /* Check for min operation. */
9383 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9384 if (memory_operand (op0, VOIDmode))
9385 op0 = force_reg (GET_MODE (operands[0]), op0);
9386 if (GET_MODE (operands[0]) == SFmode)
9387 emit_insn (gen_minsf3 (operands[0], op0, op1));
9389 emit_insn (gen_mindf3 (operands[0], op0, op1));
9392 /* Check for max operation. */
9395 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9396 if (memory_operand (op0, VOIDmode))
9397 op0 = force_reg (GET_MODE (operands[0]), op0);
9398 if (GET_MODE (operands[0]) == SFmode)
9399 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9401 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9405 /* Manage condition to be sse_comparison_operator. In case we are
9406 in non-ieee mode, try to canonicalize the destination operand
9407 to be first in the comparison - this helps reload to avoid extra
9409 if (!sse_comparison_operator (operands[1], VOIDmode)
9410 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9412 rtx tmp = ix86_compare_op0;
9413 ix86_compare_op0 = ix86_compare_op1;
9414 ix86_compare_op1 = tmp;
9415 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9416 VOIDmode, ix86_compare_op0,
9419 /* Similary try to manage result to be first operand of conditional
9420 move. We also don't support the NE comparison on SSE, so try to
9422 if ((rtx_equal_p (operands[0], operands[3])
9423 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9424 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9426 rtx tmp = operands[2];
9427 operands[2] = operands[3];
9429 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9430 (GET_CODE (operands[1])),
9431 VOIDmode, ix86_compare_op0,
9434 if (GET_MODE (operands[0]) == SFmode)
9435 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9436 operands[2], operands[3],
9437 ix86_compare_op0, ix86_compare_op1));
9439 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9440 operands[2], operands[3],
9441 ix86_compare_op0, ix86_compare_op1));
9445 /* The floating point conditional move instructions don't directly
9446 support conditions resulting from a signed integer comparison. */
9448 code = GET_CODE (operands[1]);
9449 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9451 /* The floating point conditional move instructions don't directly
9452 support signed integer comparisons. */
9454 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9456 if (second_test != NULL || bypass_test != NULL)
9458 tmp = gen_reg_rtx (QImode);
9459 ix86_expand_setcc (code, tmp);
9461 ix86_compare_op0 = tmp;
9462 ix86_compare_op1 = const0_rtx;
9463 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9465 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9467 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9468 emit_move_insn (tmp, operands[3]);
9471 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9473 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9474 emit_move_insn (tmp, operands[2]);
9478 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9479 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9484 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9485 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9490 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9491 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9499 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9500 works for floating pointer parameters and nonoffsetable memories.
9501 For pushes, it returns just stack offsets; the values will be saved
9502 in the right order. Maximally three parts are generated. */
9505 ix86_split_to_parts (operand, parts, mode)
9508 enum machine_mode mode;
9513 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9515 size = (GET_MODE_SIZE (mode) + 4) / 8;
9517 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9519 if (size < 2 || size > 3)
9522 /* Optimize constant pool reference to immediates. This is used by fp
9523 moves, that force all constants to memory to allow combining. */
9524 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9526 rtx tmp = maybe_get_pool_constant (operand);
9531 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9533 /* The only non-offsetable memories we handle are pushes. */
9534 if (! push_operand (operand, VOIDmode))
9537 operand = copy_rtx (operand);
9538 PUT_MODE (operand, Pmode);
9539 parts[0] = parts[1] = parts[2] = operand;
9541 else if (!TARGET_64BIT)
9544 split_di (&operand, 1, &parts[0], &parts[1]);
9547 if (REG_P (operand))
9549 if (!reload_completed)
9551 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9552 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9554 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9556 else if (offsettable_memref_p (operand))
9558 operand = adjust_address (operand, SImode, 0);
9560 parts[1] = adjust_address (operand, SImode, 4);
9562 parts[2] = adjust_address (operand, SImode, 8);
9564 else if (GET_CODE (operand) == CONST_DOUBLE)
9569 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9574 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9575 parts[2] = gen_int_mode (l[2], SImode);
9578 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9583 parts[1] = gen_int_mode (l[1], SImode);
9584 parts[0] = gen_int_mode (l[0], SImode);
9593 split_ti (&operand, 1, &parts[0], &parts[1]);
9594 if (mode == XFmode || mode == TFmode)
9596 if (REG_P (operand))
9598 if (!reload_completed)
9600 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9601 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9603 else if (offsettable_memref_p (operand))
9605 operand = adjust_address (operand, DImode, 0);
9607 parts[1] = adjust_address (operand, SImode, 8);
9609 else if (GET_CODE (operand) == CONST_DOUBLE)
9614 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9615 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9616 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9617 if (HOST_BITS_PER_WIDE_INT >= 64)
9620 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9621 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9624 parts[0] = immed_double_const (l[0], l[1], DImode);
9625 parts[1] = gen_int_mode (l[2], SImode);
9635 /* Emit insns to perform a move or push of DI, DF, and XF values.
9636 Return false when normal moves are needed; true when all required
9637 insns have been emitted. Operands 2-4 contain the input values
9638 int the correct order; operands 5-7 contain the output values. */
9641 ix86_split_long_move (operands)
9648 enum machine_mode mode = GET_MODE (operands[0]);
9650 /* The DFmode expanders may ask us to move double.
9651 For 64bit target this is single move. By hiding the fact
9652 here we simplify i386.md splitters. */
9653 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9655 /* Optimize constant pool reference to immediates. This is used by
9656 fp moves, that force all constants to memory to allow combining. */
9658 if (GET_CODE (operands[1]) == MEM
9659 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9660 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9661 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9662 if (push_operand (operands[0], VOIDmode))
9664 operands[0] = copy_rtx (operands[0]);
9665 PUT_MODE (operands[0], Pmode);
9668 operands[0] = gen_lowpart (DImode, operands[0]);
9669 operands[1] = gen_lowpart (DImode, operands[1]);
9670 emit_move_insn (operands[0], operands[1]);
9674 /* The only non-offsettable memory we handle is push. */
9675 if (push_operand (operands[0], VOIDmode))
9677 else if (GET_CODE (operands[0]) == MEM
9678 && ! offsettable_memref_p (operands[0]))
9681 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9682 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9684 /* When emitting push, take care for source operands on the stack. */
9685 if (push && GET_CODE (operands[1]) == MEM
9686 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9689 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9690 XEXP (part[1][2], 0));
9691 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9692 XEXP (part[1][1], 0));
9695 /* We need to do copy in the right order in case an address register
9696 of the source overlaps the destination. */
9697 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9699 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9701 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9704 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9707 /* Collision in the middle part can be handled by reordering. */
9708 if (collisions == 1 && nparts == 3
9709 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9712 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9713 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9716 /* If there are more collisions, we can't handle it by reordering.
9717 Do an lea to the last part and use only one colliding move. */
9718 else if (collisions > 1)
9721 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9722 XEXP (part[1][0], 0)));
9723 part[1][0] = change_address (part[1][0],
9724 TARGET_64BIT ? DImode : SImode,
9725 part[0][nparts - 1]);
9726 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9728 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9738 /* We use only first 12 bytes of TFmode value, but for pushing we
9739 are required to adjust stack as if we were pushing real 16byte
9741 if (mode == TFmode && !TARGET_64BIT)
9742 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9744 emit_move_insn (part[0][2], part[1][2]);
9749 /* In 64bit mode we don't have 32bit push available. In case this is
9750 register, it is OK - we will just use larger counterpart. We also
9751 retype memory - these comes from attempt to avoid REX prefix on
9752 moving of second half of TFmode value. */
9753 if (GET_MODE (part[1][1]) == SImode)
9755 if (GET_CODE (part[1][1]) == MEM)
9756 part[1][1] = adjust_address (part[1][1], DImode, 0);
9757 else if (REG_P (part[1][1]))
9758 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9761 if (GET_MODE (part[1][0]) == SImode)
9762 part[1][0] = part[1][1];
9765 emit_move_insn (part[0][1], part[1][1]);
9766 emit_move_insn (part[0][0], part[1][0]);
9770 /* Choose correct order to not overwrite the source before it is copied. */
9771 if ((REG_P (part[0][0])
9772 && REG_P (part[1][1])
9773 && (REGNO (part[0][0]) == REGNO (part[1][1])
9775 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9777 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9781 operands[2] = part[0][2];
9782 operands[3] = part[0][1];
9783 operands[4] = part[0][0];
9784 operands[5] = part[1][2];
9785 operands[6] = part[1][1];
9786 operands[7] = part[1][0];
9790 operands[2] = part[0][1];
9791 operands[3] = part[0][0];
9792 operands[5] = part[1][1];
9793 operands[6] = part[1][0];
9800 operands[2] = part[0][0];
9801 operands[3] = part[0][1];
9802 operands[4] = part[0][2];
9803 operands[5] = part[1][0];
9804 operands[6] = part[1][1];
9805 operands[7] = part[1][2];
9809 operands[2] = part[0][0];
9810 operands[3] = part[0][1];
9811 operands[5] = part[1][0];
9812 operands[6] = part[1][1];
9815 emit_move_insn (operands[2], operands[5]);
9816 emit_move_insn (operands[3], operands[6]);
9818 emit_move_insn (operands[4], operands[7]);
9824 ix86_split_ashldi (operands, scratch)
9825 rtx *operands, scratch;
9827 rtx low[2], high[2];
9830 if (GET_CODE (operands[2]) == CONST_INT)
9832 split_di (operands, 2, low, high);
9833 count = INTVAL (operands[2]) & 63;
9837 emit_move_insn (high[0], low[1]);
9838 emit_move_insn (low[0], const0_rtx);
9841 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9845 if (!rtx_equal_p (operands[0], operands[1]))
9846 emit_move_insn (operands[0], operands[1]);
9847 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9848 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9853 if (!rtx_equal_p (operands[0], operands[1]))
9854 emit_move_insn (operands[0], operands[1]);
9856 split_di (operands, 1, low, high);
9858 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9859 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9861 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9863 if (! no_new_pseudos)
9864 scratch = force_reg (SImode, const0_rtx);
9866 emit_move_insn (scratch, const0_rtx);
9868 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9872 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9877 ix86_split_ashrdi (operands, scratch)
9878 rtx *operands, scratch;
9880 rtx low[2], high[2];
9883 if (GET_CODE (operands[2]) == CONST_INT)
9885 split_di (operands, 2, low, high);
9886 count = INTVAL (operands[2]) & 63;
9890 emit_move_insn (low[0], high[1]);
9892 if (! reload_completed)
9893 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9896 emit_move_insn (high[0], low[0]);
9897 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9901 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9905 if (!rtx_equal_p (operands[0], operands[1]))
9906 emit_move_insn (operands[0], operands[1]);
9907 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9908 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9913 if (!rtx_equal_p (operands[0], operands[1]))
9914 emit_move_insn (operands[0], operands[1]);
9916 split_di (operands, 1, low, high);
9918 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9919 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9921 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9923 if (! no_new_pseudos)
9924 scratch = gen_reg_rtx (SImode);
9925 emit_move_insn (scratch, high[0]);
9926 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9927 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9931 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9936 ix86_split_lshrdi (operands, scratch)
9937 rtx *operands, scratch;
9939 rtx low[2], high[2];
9942 if (GET_CODE (operands[2]) == CONST_INT)
9944 split_di (operands, 2, low, high);
9945 count = INTVAL (operands[2]) & 63;
9949 emit_move_insn (low[0], high[1]);
9950 emit_move_insn (high[0], const0_rtx);
9953 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9957 if (!rtx_equal_p (operands[0], operands[1]))
9958 emit_move_insn (operands[0], operands[1]);
9959 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9960 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9965 if (!rtx_equal_p (operands[0], operands[1]))
9966 emit_move_insn (operands[0], operands[1]);
9968 split_di (operands, 1, low, high);
9970 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9971 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9973 /* Heh. By reversing the arguments, we can reuse this pattern. */
9974 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9976 if (! no_new_pseudos)
9977 scratch = force_reg (SImode, const0_rtx);
9979 emit_move_insn (scratch, const0_rtx);
9981 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9985 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9989 /* Helper function for the string operations below. Dest VARIABLE whether
9990 it is aligned to VALUE bytes. If true, jump to the label. */
9992 ix86_expand_aligntest (variable, value)
9996 rtx label = gen_label_rtx ();
9997 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9998 if (GET_MODE (variable) == DImode)
9999 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10001 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10002 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10007 /* Adjust COUNTER by the VALUE. */
10009 ix86_adjust_counter (countreg, value)
10011 HOST_WIDE_INT value;
10013 if (GET_MODE (countreg) == DImode)
10014 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10016 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10019 /* Zero extend possibly SImode EXP to Pmode register. */
10021 ix86_zero_extend_to_Pmode (exp)
10025 if (GET_MODE (exp) == VOIDmode)
10026 return force_reg (Pmode, exp);
10027 if (GET_MODE (exp) == Pmode)
10028 return copy_to_mode_reg (Pmode, exp);
10029 r = gen_reg_rtx (Pmode);
10030 emit_insn (gen_zero_extendsidi2 (r, exp));
10034 /* Expand string move (memcpy) operation. Use i386 string operations when
10035 profitable. expand_clrstr contains similar code. */
10037 ix86_expand_movstr (dst, src, count_exp, align_exp)
10038 rtx dst, src, count_exp, align_exp;
10040 rtx srcreg, destreg, countreg;
10041 enum machine_mode counter_mode;
10042 HOST_WIDE_INT align = 0;
10043 unsigned HOST_WIDE_INT count = 0;
10048 if (GET_CODE (align_exp) == CONST_INT)
10049 align = INTVAL (align_exp);
10051 /* This simple hack avoids all inlining code and simplifies code below. */
10052 if (!TARGET_ALIGN_STRINGOPS)
10055 if (GET_CODE (count_exp) == CONST_INT)
10056 count = INTVAL (count_exp);
10058 /* Figure out proper mode for counter. For 32bits it is always SImode,
10059 for 64bits use SImode when possible, otherwise DImode.
10060 Set count to number of bytes copied when known at compile time. */
10061 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10062 || x86_64_zero_extended_value (count_exp))
10063 counter_mode = SImode;
10065 counter_mode = DImode;
10067 if (counter_mode != SImode && counter_mode != DImode)
10070 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10071 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10073 emit_insn (gen_cld ());
10075 /* When optimizing for size emit simple rep ; movsb instruction for
10076 counts not divisible by 4. */
10078 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10080 countreg = ix86_zero_extend_to_Pmode (count_exp);
10082 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10083 destreg, srcreg, countreg));
10085 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10086 destreg, srcreg, countreg));
10089 /* For constant aligned (or small unaligned) copies use rep movsl
10090 followed by code copying the rest. For PentiumPro ensure 8 byte
10091 alignment to allow rep movsl acceleration. */
10093 else if (count != 0
10095 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10096 || optimize_size || count < (unsigned int) 64))
10098 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10099 if (count & ~(size - 1))
10101 countreg = copy_to_mode_reg (counter_mode,
10102 GEN_INT ((count >> (size == 4 ? 2 : 3))
10103 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10104 countreg = ix86_zero_extend_to_Pmode (countreg);
10108 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10109 destreg, srcreg, countreg));
10111 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10112 destreg, srcreg, countreg));
10115 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10116 destreg, srcreg, countreg));
10118 if (size == 8 && (count & 0x04))
10119 emit_insn (gen_strmovsi (destreg, srcreg));
10121 emit_insn (gen_strmovhi (destreg, srcreg));
10123 emit_insn (gen_strmovqi (destreg, srcreg));
10125 /* The generic code based on the glibc implementation:
10126 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10127 allowing accelerated copying there)
10128 - copy the data using rep movsl
10129 - copy the rest. */
10134 int desired_alignment = (TARGET_PENTIUMPRO
10135 && (count == 0 || count >= (unsigned int) 260)
10136 ? 8 : UNITS_PER_WORD);
10138 /* In case we don't know anything about the alignment, default to
10139 library version, since it is usually equally fast and result in
10141 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10147 if (TARGET_SINGLE_STRINGOP)
10148 emit_insn (gen_cld ());
10150 countreg2 = gen_reg_rtx (Pmode);
10151 countreg = copy_to_mode_reg (counter_mode, count_exp);
10153 /* We don't use loops to align destination and to copy parts smaller
10154 than 4 bytes, because gcc is able to optimize such code better (in
10155 the case the destination or the count really is aligned, gcc is often
10156 able to predict the branches) and also it is friendlier to the
10157 hardware branch prediction.
10159 Using loops is benefical for generic case, because we can
10160 handle small counts using the loops. Many CPUs (such as Athlon)
10161 have large REP prefix setup costs.
10163 This is quite costy. Maybe we can revisit this decision later or
10164 add some customizability to this code. */
10166 if (count == 0 && align < desired_alignment)
10168 label = gen_label_rtx ();
10169 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10170 LEU, 0, counter_mode, 1, label);
10174 rtx label = ix86_expand_aligntest (destreg, 1);
10175 emit_insn (gen_strmovqi (destreg, srcreg));
10176 ix86_adjust_counter (countreg, 1);
10177 emit_label (label);
10178 LABEL_NUSES (label) = 1;
10182 rtx label = ix86_expand_aligntest (destreg, 2);
10183 emit_insn (gen_strmovhi (destreg, srcreg));
10184 ix86_adjust_counter (countreg, 2);
10185 emit_label (label);
10186 LABEL_NUSES (label) = 1;
10188 if (align <= 4 && desired_alignment > 4)
10190 rtx label = ix86_expand_aligntest (destreg, 4);
10191 emit_insn (gen_strmovsi (destreg, srcreg));
10192 ix86_adjust_counter (countreg, 4);
10193 emit_label (label);
10194 LABEL_NUSES (label) = 1;
10197 if (label && desired_alignment > 4 && !TARGET_64BIT)
10199 emit_label (label);
10200 LABEL_NUSES (label) = 1;
10203 if (!TARGET_SINGLE_STRINGOP)
10204 emit_insn (gen_cld ());
10207 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10209 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10210 destreg, srcreg, countreg2));
10214 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10215 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10216 destreg, srcreg, countreg2));
10221 emit_label (label);
10222 LABEL_NUSES (label) = 1;
10224 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10225 emit_insn (gen_strmovsi (destreg, srcreg));
10226 if ((align <= 4 || count == 0) && TARGET_64BIT)
10228 rtx label = ix86_expand_aligntest (countreg, 4);
10229 emit_insn (gen_strmovsi (destreg, srcreg));
10230 emit_label (label);
10231 LABEL_NUSES (label) = 1;
10233 if (align > 2 && count != 0 && (count & 2))
10234 emit_insn (gen_strmovhi (destreg, srcreg));
10235 if (align <= 2 || count == 0)
10237 rtx label = ix86_expand_aligntest (countreg, 2);
10238 emit_insn (gen_strmovhi (destreg, srcreg));
10239 emit_label (label);
10240 LABEL_NUSES (label) = 1;
10242 if (align > 1 && count != 0 && (count & 1))
10243 emit_insn (gen_strmovqi (destreg, srcreg));
10244 if (align <= 1 || count == 0)
10246 rtx label = ix86_expand_aligntest (countreg, 1);
10247 emit_insn (gen_strmovqi (destreg, srcreg));
10248 emit_label (label);
10249 LABEL_NUSES (label) = 1;
10253 insns = get_insns ();
10256 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10261 /* Expand string clear operation (bzero). Use i386 string operations when
10262 profitable. expand_movstr contains similar code. */
10264 ix86_expand_clrstr (src, count_exp, align_exp)
10265 rtx src, count_exp, align_exp;
10267 rtx destreg, zeroreg, countreg;
10268 enum machine_mode counter_mode;
10269 HOST_WIDE_INT align = 0;
10270 unsigned HOST_WIDE_INT count = 0;
10272 if (GET_CODE (align_exp) == CONST_INT)
10273 align = INTVAL (align_exp);
10275 /* This simple hack avoids all inlining code and simplifies code below. */
10276 if (!TARGET_ALIGN_STRINGOPS)
10279 if (GET_CODE (count_exp) == CONST_INT)
10280 count = INTVAL (count_exp);
10281 /* Figure out proper mode for counter. For 32bits it is always SImode,
10282 for 64bits use SImode when possible, otherwise DImode.
10283 Set count to number of bytes copied when known at compile time. */
10284 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10285 || x86_64_zero_extended_value (count_exp))
10286 counter_mode = SImode;
10288 counter_mode = DImode;
10290 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10292 emit_insn (gen_cld ());
10294 /* When optimizing for size emit simple rep ; movsb instruction for
10295 counts not divisible by 4. */
10297 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10299 countreg = ix86_zero_extend_to_Pmode (count_exp);
10300 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10302 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10303 destreg, countreg));
10305 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10306 destreg, countreg));
10308 else if (count != 0
10310 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10311 || optimize_size || count < (unsigned int) 64))
10313 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10314 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10315 if (count & ~(size - 1))
10317 countreg = copy_to_mode_reg (counter_mode,
10318 GEN_INT ((count >> (size == 4 ? 2 : 3))
10319 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10320 countreg = ix86_zero_extend_to_Pmode (countreg);
10324 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10325 destreg, countreg));
10327 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10328 destreg, countreg));
10331 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10332 destreg, countreg));
10334 if (size == 8 && (count & 0x04))
10335 emit_insn (gen_strsetsi (destreg,
10336 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10338 emit_insn (gen_strsethi (destreg,
10339 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10341 emit_insn (gen_strsetqi (destreg,
10342 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10348 /* Compute desired alignment of the string operation. */
10349 int desired_alignment = (TARGET_PENTIUMPRO
10350 && (count == 0 || count >= (unsigned int) 260)
10351 ? 8 : UNITS_PER_WORD);
10353 /* In case we don't know anything about the alignment, default to
10354 library version, since it is usually equally fast and result in
10356 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10359 if (TARGET_SINGLE_STRINGOP)
10360 emit_insn (gen_cld ());
10362 countreg2 = gen_reg_rtx (Pmode);
10363 countreg = copy_to_mode_reg (counter_mode, count_exp);
10364 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10366 if (count == 0 && align < desired_alignment)
10368 label = gen_label_rtx ();
10369 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10370 LEU, 0, counter_mode, 1, label);
10374 rtx label = ix86_expand_aligntest (destreg, 1);
10375 emit_insn (gen_strsetqi (destreg,
10376 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10377 ix86_adjust_counter (countreg, 1);
10378 emit_label (label);
10379 LABEL_NUSES (label) = 1;
10383 rtx label = ix86_expand_aligntest (destreg, 2);
10384 emit_insn (gen_strsethi (destreg,
10385 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10386 ix86_adjust_counter (countreg, 2);
10387 emit_label (label);
10388 LABEL_NUSES (label) = 1;
10390 if (align <= 4 && desired_alignment > 4)
10392 rtx label = ix86_expand_aligntest (destreg, 4);
10393 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10394 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10396 ix86_adjust_counter (countreg, 4);
10397 emit_label (label);
10398 LABEL_NUSES (label) = 1;
10401 if (label && desired_alignment > 4 && !TARGET_64BIT)
10403 emit_label (label);
10404 LABEL_NUSES (label) = 1;
10408 if (!TARGET_SINGLE_STRINGOP)
10409 emit_insn (gen_cld ());
10412 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10414 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10415 destreg, countreg2));
10419 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10420 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10421 destreg, countreg2));
10425 emit_label (label);
10426 LABEL_NUSES (label) = 1;
10429 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10430 emit_insn (gen_strsetsi (destreg,
10431 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10432 if (TARGET_64BIT && (align <= 4 || count == 0))
10434 rtx label = ix86_expand_aligntest (countreg, 4);
10435 emit_insn (gen_strsetsi (destreg,
10436 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10437 emit_label (label);
10438 LABEL_NUSES (label) = 1;
10440 if (align > 2 && count != 0 && (count & 2))
10441 emit_insn (gen_strsethi (destreg,
10442 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10443 if (align <= 2 || count == 0)
10445 rtx label = ix86_expand_aligntest (countreg, 2);
10446 emit_insn (gen_strsethi (destreg,
10447 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10448 emit_label (label);
10449 LABEL_NUSES (label) = 1;
10451 if (align > 1 && count != 0 && (count & 1))
10452 emit_insn (gen_strsetqi (destreg,
10453 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10454 if (align <= 1 || count == 0)
10456 rtx label = ix86_expand_aligntest (countreg, 1);
10457 emit_insn (gen_strsetqi (destreg,
10458 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10459 emit_label (label);
10460 LABEL_NUSES (label) = 1;
10465 /* Expand strlen. */
10467 ix86_expand_strlen (out, src, eoschar, align)
10468 rtx out, src, eoschar, align;
10470 rtx addr, scratch1, scratch2, scratch3, scratch4;
10472 /* The generic case of strlen expander is long. Avoid it's
10473 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10475 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10476 && !TARGET_INLINE_ALL_STRINGOPS
10478 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10481 addr = force_reg (Pmode, XEXP (src, 0));
10482 scratch1 = gen_reg_rtx (Pmode);
10484 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10487 /* Well it seems that some optimizer does not combine a call like
10488 foo(strlen(bar), strlen(bar));
10489 when the move and the subtraction is done here. It does calculate
10490 the length just once when these instructions are done inside of
10491 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10492 often used and I use one fewer register for the lifetime of
10493 output_strlen_unroll() this is better. */
10495 emit_move_insn (out, addr);
10497 ix86_expand_strlensi_unroll_1 (out, align);
10499 /* strlensi_unroll_1 returns the address of the zero at the end of
10500 the string, like memchr(), so compute the length by subtracting
10501 the start address. */
10503 emit_insn (gen_subdi3 (out, out, addr));
10505 emit_insn (gen_subsi3 (out, out, addr));
10509 scratch2 = gen_reg_rtx (Pmode);
10510 scratch3 = gen_reg_rtx (Pmode);
10511 scratch4 = force_reg (Pmode, constm1_rtx);
10513 emit_move_insn (scratch3, addr);
10514 eoschar = force_reg (QImode, eoschar);
10516 emit_insn (gen_cld ());
10519 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10520 align, scratch4, scratch3));
10521 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10522 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10526 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10527 align, scratch4, scratch3));
10528 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10529 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10535 /* Expand the appropriate insns for doing strlen if not just doing
10538 out = result, initialized with the start address
10539 align_rtx = alignment of the address.
10540 scratch = scratch register, initialized with the startaddress when
10541 not aligned, otherwise undefined
10543 This is just the body. It needs the initialisations mentioned above and
10544 some address computing at the end. These things are done in i386.md. */
10547 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10548 rtx out, align_rtx;
10552 rtx align_2_label = NULL_RTX;
10553 rtx align_3_label = NULL_RTX;
10554 rtx align_4_label = gen_label_rtx ();
10555 rtx end_0_label = gen_label_rtx ();
10557 rtx tmpreg = gen_reg_rtx (SImode);
10558 rtx scratch = gen_reg_rtx (SImode);
10561 if (GET_CODE (align_rtx) == CONST_INT)
10562 align = INTVAL (align_rtx);
10564 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10566 /* Is there a known alignment and is it less than 4? */
10569 rtx scratch1 = gen_reg_rtx (Pmode);
10570 emit_move_insn (scratch1, out);
10571 /* Is there a known alignment and is it not 2? */
10574 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10575 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10577 /* Leave just the 3 lower bits. */
10578 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10579 NULL_RTX, 0, OPTAB_WIDEN);
10581 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10582 Pmode, 1, align_4_label);
10583 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10584 Pmode, 1, align_2_label);
10585 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10586 Pmode, 1, align_3_label);
10590 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10591 check if is aligned to 4 - byte. */
10593 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10594 NULL_RTX, 0, OPTAB_WIDEN);
10596 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10597 Pmode, 1, align_4_label);
10600 mem = gen_rtx_MEM (QImode, out);
10602 /* Now compare the bytes. */
10604 /* Compare the first n unaligned byte on a byte per byte basis. */
10605 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10606 QImode, 1, end_0_label);
10608 /* Increment the address. */
10610 emit_insn (gen_adddi3 (out, out, const1_rtx));
10612 emit_insn (gen_addsi3 (out, out, const1_rtx));
10614 /* Not needed with an alignment of 2 */
10617 emit_label (align_2_label);
10619 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10623 emit_insn (gen_adddi3 (out, out, const1_rtx));
10625 emit_insn (gen_addsi3 (out, out, const1_rtx));
10627 emit_label (align_3_label);
10630 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10634 emit_insn (gen_adddi3 (out, out, const1_rtx));
10636 emit_insn (gen_addsi3 (out, out, const1_rtx));
10639 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10640 align this loop. It gives only huge programs, but does not help to
10642 emit_label (align_4_label);
10644 mem = gen_rtx_MEM (SImode, out);
10645 emit_move_insn (scratch, mem);
10647 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10649 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10651 /* This formula yields a nonzero result iff one of the bytes is zero.
10652 This saves three branches inside loop and many cycles. */
10654 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10655 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10656 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10657 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10658 gen_int_mode (0x80808080, SImode)));
10659 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10664 rtx reg = gen_reg_rtx (SImode);
10665 rtx reg2 = gen_reg_rtx (Pmode);
10666 emit_move_insn (reg, tmpreg);
10667 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10669 /* If zero is not in the first two bytes, move two bytes forward. */
10670 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10671 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10672 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10673 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10674 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10677 /* Emit lea manually to avoid clobbering of flags. */
10678 emit_insn (gen_rtx_SET (SImode, reg2,
10679 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10681 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10682 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10683 emit_insn (gen_rtx_SET (VOIDmode, out,
10684 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10691 rtx end_2_label = gen_label_rtx ();
10692 /* Is zero in the first two bytes? */
10694 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10695 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10696 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10697 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10698 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10700 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10701 JUMP_LABEL (tmp) = end_2_label;
10703 /* Not in the first two. Move two bytes forward. */
10704 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10706 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10708 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10710 emit_label (end_2_label);
10714 /* Avoid branch in fixing the byte. */
10715 tmpreg = gen_lowpart (QImode, tmpreg);
10716 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10718 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10720 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10722 emit_label (end_0_label);
10726 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10727 rtx retval, fnaddr, callarg1, callarg2, pop;
10729 rtx use = NULL, call;
10731 if (pop == const0_rtx)
10733 if (TARGET_64BIT && pop)
10737 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10738 fnaddr = machopic_indirect_call_target (fnaddr);
10740 /* Static functions and indirect calls don't need the pic register. */
10741 if (! TARGET_64BIT && flag_pic
10742 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10743 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10744 use_reg (&use, pic_offset_table_rtx);
10746 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10748 rtx al = gen_rtx_REG (QImode, 0);
10749 emit_move_insn (al, callarg2);
10750 use_reg (&use, al);
10752 #endif /* TARGET_MACHO */
10754 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10756 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10757 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10760 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10762 call = gen_rtx_SET (VOIDmode, retval, call);
10765 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10766 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10767 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10770 call = emit_call_insn (call);
10772 CALL_INSN_FUNCTION_USAGE (call) = use;
10776 /* Clear stack slot assignments remembered from previous functions.
10777 This is called from INIT_EXPANDERS once before RTL is emitted for each
10780 static struct machine_function *
10781 ix86_init_machine_status ()
10783 return ggc_alloc_cleared (sizeof (struct machine_function));
10786 /* Return a MEM corresponding to a stack slot with mode MODE.
10787 Allocate a new slot if necessary.
10789 The RTL for a function can have several slots available: N is
10790 which slot to use. */
10793 assign_386_stack_local (mode, n)
10794 enum machine_mode mode;
10797 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10800 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10801 ix86_stack_locals[(int) mode][n]
10802 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10804 return ix86_stack_locals[(int) mode][n];
10807 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10809 static GTY(()) rtx ix86_tls_symbol;
10811 ix86_tls_get_addr ()
10814 if (!ix86_tls_symbol)
10816 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10817 ? "___tls_get_addr"
10818 : "__tls_get_addr"));
10821 return ix86_tls_symbol;
10824 /* Calculate the length of the memory address in the instruction
10825 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10828 memory_address_length (addr)
10831 struct ix86_address parts;
10832 rtx base, index, disp;
10835 if (GET_CODE (addr) == PRE_DEC
10836 || GET_CODE (addr) == POST_INC
10837 || GET_CODE (addr) == PRE_MODIFY
10838 || GET_CODE (addr) == POST_MODIFY)
10841 if (! ix86_decompose_address (addr, &parts))
10845 index = parts.index;
10849 /* Register Indirect. */
10850 if (base && !index && !disp)
10852 /* Special cases: ebp and esp need the two-byte modrm form. */
10853 if (addr == stack_pointer_rtx
10854 || addr == arg_pointer_rtx
10855 || addr == frame_pointer_rtx
10856 || addr == hard_frame_pointer_rtx)
10860 /* Direct Addressing. */
10861 else if (disp && !base && !index)
10866 /* Find the length of the displacement constant. */
10869 if (GET_CODE (disp) == CONST_INT
10870 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10876 /* An index requires the two-byte modrm form. */
10884 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10885 is set, expect that insn have 8bit immediate alternative. */
10887 ix86_attr_length_immediate_default (insn, shortform)
10893 extract_insn_cached (insn);
10894 for (i = recog_data.n_operands - 1; i >= 0; --i)
10895 if (CONSTANT_P (recog_data.operand[i]))
10900 && GET_CODE (recog_data.operand[i]) == CONST_INT
10901 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10905 switch (get_attr_mode (insn))
10916 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10921 fatal_insn ("unknown insn mode", insn);
10927 /* Compute default value for "length_address" attribute. */
10929 ix86_attr_length_address_default (insn)
10933 extract_insn_cached (insn);
10934 for (i = recog_data.n_operands - 1; i >= 0; --i)
10935 if (GET_CODE (recog_data.operand[i]) == MEM)
10937 return memory_address_length (XEXP (recog_data.operand[i], 0));
10943 /* Return the maximum number of instructions a cpu can issue. */
10950 case PROCESSOR_PENTIUM:
10954 case PROCESSOR_PENTIUMPRO:
10955 case PROCESSOR_PENTIUM4:
10956 case PROCESSOR_ATHLON:
10964 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10965 by DEP_INSN and nothing set by DEP_INSN. */
10968 ix86_flags_dependant (insn, dep_insn, insn_type)
10969 rtx insn, dep_insn;
10970 enum attr_type insn_type;
10974 /* Simplify the test for uninteresting insns. */
10975 if (insn_type != TYPE_SETCC
10976 && insn_type != TYPE_ICMOV
10977 && insn_type != TYPE_FCMOV
10978 && insn_type != TYPE_IBR)
10981 if ((set = single_set (dep_insn)) != 0)
10983 set = SET_DEST (set);
10986 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10987 && XVECLEN (PATTERN (dep_insn), 0) == 2
10988 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10989 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10991 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10992 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10997 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11000 /* This test is true if the dependent insn reads the flags but
11001 not any other potentially set register. */
11002 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11005 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11011 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11012 address with operands set by DEP_INSN. */
11015 ix86_agi_dependant (insn, dep_insn, insn_type)
11016 rtx insn, dep_insn;
11017 enum attr_type insn_type;
11021 if (insn_type == TYPE_LEA
11024 addr = PATTERN (insn);
11025 if (GET_CODE (addr) == SET)
11027 else if (GET_CODE (addr) == PARALLEL
11028 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11029 addr = XVECEXP (addr, 0, 0);
11032 addr = SET_SRC (addr);
11037 extract_insn_cached (insn);
11038 for (i = recog_data.n_operands - 1; i >= 0; --i)
11039 if (GET_CODE (recog_data.operand[i]) == MEM)
11041 addr = XEXP (recog_data.operand[i], 0);
11048 return modified_in_p (addr, dep_insn);
11052 ix86_adjust_cost (insn, link, dep_insn, cost)
11053 rtx insn, link, dep_insn;
11056 enum attr_type insn_type, dep_insn_type;
11057 enum attr_memory memory, dep_memory;
11059 int dep_insn_code_number;
11061 /* Anti and output depenancies have zero cost on all CPUs. */
11062 if (REG_NOTE_KIND (link) != 0)
11065 dep_insn_code_number = recog_memoized (dep_insn);
11067 /* If we can't recognize the insns, we can't really do anything. */
11068 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11071 insn_type = get_attr_type (insn);
11072 dep_insn_type = get_attr_type (dep_insn);
11076 case PROCESSOR_PENTIUM:
11077 /* Address Generation Interlock adds a cycle of latency. */
11078 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11081 /* ??? Compares pair with jump/setcc. */
11082 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11085 /* Floating point stores require value to be ready one cycle ealier. */
11086 if (insn_type == TYPE_FMOV
11087 && get_attr_memory (insn) == MEMORY_STORE
11088 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11092 case PROCESSOR_PENTIUMPRO:
11093 memory = get_attr_memory (insn);
11094 dep_memory = get_attr_memory (dep_insn);
11096 /* Since we can't represent delayed latencies of load+operation,
11097 increase the cost here for non-imov insns. */
11098 if (dep_insn_type != TYPE_IMOV
11099 && dep_insn_type != TYPE_FMOV
11100 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11103 /* INT->FP conversion is expensive. */
11104 if (get_attr_fp_int_src (dep_insn))
11107 /* There is one cycle extra latency between an FP op and a store. */
11108 if (insn_type == TYPE_FMOV
11109 && (set = single_set (dep_insn)) != NULL_RTX
11110 && (set2 = single_set (insn)) != NULL_RTX
11111 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11112 && GET_CODE (SET_DEST (set2)) == MEM)
11115 /* Show ability of reorder buffer to hide latency of load by executing
11116 in parallel with previous instruction in case
11117 previous instruction is not needed to compute the address. */
11118 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11119 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11121 /* Claim moves to take one cycle, as core can issue one load
11122 at time and the next load can start cycle later. */
11123 if (dep_insn_type == TYPE_IMOV
11124 || dep_insn_type == TYPE_FMOV)
11132 memory = get_attr_memory (insn);
11133 dep_memory = get_attr_memory (dep_insn);
11134 /* The esp dependency is resolved before the instruction is really
11136 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11137 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11140 /* Since we can't represent delayed latencies of load+operation,
11141 increase the cost here for non-imov insns. */
11142 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11143 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11145 /* INT->FP conversion is expensive. */
11146 if (get_attr_fp_int_src (dep_insn))
11149 /* Show ability of reorder buffer to hide latency of load by executing
11150 in parallel with previous instruction in case
11151 previous instruction is not needed to compute the address. */
11152 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11153 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11155 /* Claim moves to take one cycle, as core can issue one load
11156 at time and the next load can start cycle later. */
11157 if (dep_insn_type == TYPE_IMOV
11158 || dep_insn_type == TYPE_FMOV)
11167 case PROCESSOR_ATHLON:
11168 memory = get_attr_memory (insn);
11169 dep_memory = get_attr_memory (dep_insn);
11171 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11173 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11178 /* Show ability of reorder buffer to hide latency of load by executing
11179 in parallel with previous instruction in case
11180 previous instruction is not needed to compute the address. */
11181 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11182 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11184 /* Claim moves to take one cycle, as core can issue one load
11185 at time and the next load can start cycle later. */
11186 if (dep_insn_type == TYPE_IMOV
11187 || dep_insn_type == TYPE_FMOV)
11189 else if (cost >= 3)
11204 struct ppro_sched_data
11207 int issued_this_cycle;
11211 static enum attr_ppro_uops
11212 ix86_safe_ppro_uops (insn)
11215 if (recog_memoized (insn) >= 0)
11216 return get_attr_ppro_uops (insn);
11218 return PPRO_UOPS_MANY;
11222 ix86_dump_ppro_packet (dump)
11225 if (ix86_sched_data.ppro.decode[0])
11227 fprintf (dump, "PPRO packet: %d",
11228 INSN_UID (ix86_sched_data.ppro.decode[0]));
11229 if (ix86_sched_data.ppro.decode[1])
11230 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11231 if (ix86_sched_data.ppro.decode[2])
11232 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11233 fputc ('\n', dump);
11237 /* We're beginning a new block. Initialize data structures as necessary. */
11240 ix86_sched_init (dump, sched_verbose, veclen)
11241 FILE *dump ATTRIBUTE_UNUSED;
11242 int sched_verbose ATTRIBUTE_UNUSED;
11243 int veclen ATTRIBUTE_UNUSED;
11245 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11248 /* Shift INSN to SLOT, and shift everything else down. */
11251 ix86_reorder_insn (insnp, slot)
11258 insnp[0] = insnp[1];
11259 while (++insnp != slot);
11265 ix86_sched_reorder_ppro (ready, e_ready)
11270 enum attr_ppro_uops cur_uops;
11271 int issued_this_cycle;
11275 /* At this point .ppro.decode contains the state of the three
11276 decoders from last "cycle". That is, those insns that were
11277 actually independent. But here we're scheduling for the
11278 decoder, and we may find things that are decodable in the
11281 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11282 issued_this_cycle = 0;
11285 cur_uops = ix86_safe_ppro_uops (*insnp);
11287 /* If the decoders are empty, and we've a complex insn at the
11288 head of the priority queue, let it issue without complaint. */
11289 if (decode[0] == NULL)
11291 if (cur_uops == PPRO_UOPS_MANY)
11293 decode[0] = *insnp;
11297 /* Otherwise, search for a 2-4 uop unsn to issue. */
11298 while (cur_uops != PPRO_UOPS_FEW)
11300 if (insnp == ready)
11302 cur_uops = ix86_safe_ppro_uops (*--insnp);
11305 /* If so, move it to the head of the line. */
11306 if (cur_uops == PPRO_UOPS_FEW)
11307 ix86_reorder_insn (insnp, e_ready);
11309 /* Issue the head of the queue. */
11310 issued_this_cycle = 1;
11311 decode[0] = *e_ready--;
11314 /* Look for simple insns to fill in the other two slots. */
11315 for (i = 1; i < 3; ++i)
11316 if (decode[i] == NULL)
11318 if (ready > e_ready)
11322 cur_uops = ix86_safe_ppro_uops (*insnp);
11323 while (cur_uops != PPRO_UOPS_ONE)
11325 if (insnp == ready)
11327 cur_uops = ix86_safe_ppro_uops (*--insnp);
11330 /* Found one. Move it to the head of the queue and issue it. */
11331 if (cur_uops == PPRO_UOPS_ONE)
11333 ix86_reorder_insn (insnp, e_ready);
11334 decode[i] = *e_ready--;
11335 issued_this_cycle++;
11339 /* ??? Didn't find one. Ideally, here we would do a lazy split
11340 of 2-uop insns, issue one and queue the other. */
11344 if (issued_this_cycle == 0)
11345 issued_this_cycle = 1;
11346 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11349 /* We are about to being issuing insns for this clock cycle.
11350 Override the default sort algorithm to better slot instructions. */
11352 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11353 FILE *dump ATTRIBUTE_UNUSED;
11354 int sched_verbose ATTRIBUTE_UNUSED;
11357 int clock_var ATTRIBUTE_UNUSED;
11359 int n_ready = *n_readyp;
11360 rtx *e_ready = ready + n_ready - 1;
11362 /* Make sure to go ahead and initialize key items in
11363 ix86_sched_data if we are not going to bother trying to
11364 reorder the ready queue. */
11367 ix86_sched_data.ppro.issued_this_cycle = 1;
11376 case PROCESSOR_PENTIUMPRO:
11377 ix86_sched_reorder_ppro (ready, e_ready);
11382 return ix86_issue_rate ();
11385 /* We are about to issue INSN. Return the number of insns left on the
11386 ready queue that can be issued this cycle. */
11389 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11393 int can_issue_more;
11399 return can_issue_more - 1;
11401 case PROCESSOR_PENTIUMPRO:
11403 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11405 if (uops == PPRO_UOPS_MANY)
11408 ix86_dump_ppro_packet (dump);
11409 ix86_sched_data.ppro.decode[0] = insn;
11410 ix86_sched_data.ppro.decode[1] = NULL;
11411 ix86_sched_data.ppro.decode[2] = NULL;
11413 ix86_dump_ppro_packet (dump);
11414 ix86_sched_data.ppro.decode[0] = NULL;
11416 else if (uops == PPRO_UOPS_FEW)
11419 ix86_dump_ppro_packet (dump);
11420 ix86_sched_data.ppro.decode[0] = insn;
11421 ix86_sched_data.ppro.decode[1] = NULL;
11422 ix86_sched_data.ppro.decode[2] = NULL;
11426 for (i = 0; i < 3; ++i)
11427 if (ix86_sched_data.ppro.decode[i] == NULL)
11429 ix86_sched_data.ppro.decode[i] = insn;
11437 ix86_dump_ppro_packet (dump);
11438 ix86_sched_data.ppro.decode[0] = NULL;
11439 ix86_sched_data.ppro.decode[1] = NULL;
11440 ix86_sched_data.ppro.decode[2] = NULL;
11444 return --ix86_sched_data.ppro.issued_this_cycle;
11449 ia32_use_dfa_pipeline_interface ()
11451 if (ix86_cpu == PROCESSOR_PENTIUM)
11456 /* How many alternative schedules to try. This should be as wide as the
11457 scheduling freedom in the DFA, but no wider. Making this value too
11458 large results extra work for the scheduler. */
11461 ia32_multipass_dfa_lookahead ()
11463 if (ix86_cpu == PROCESSOR_PENTIUM)
11470 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11471 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11475 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11477 rtx dstref, srcref, dstreg, srcreg;
11481 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11483 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11487 /* Subroutine of above to actually do the updating by recursively walking
11491 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11493 rtx dstref, srcref, dstreg, srcreg;
11495 enum rtx_code code = GET_CODE (x);
11496 const char *format_ptr = GET_RTX_FORMAT (code);
11499 if (code == MEM && XEXP (x, 0) == dstreg)
11500 MEM_COPY_ATTRIBUTES (x, dstref);
11501 else if (code == MEM && XEXP (x, 0) == srcreg)
11502 MEM_COPY_ATTRIBUTES (x, srcref);
11504 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11506 if (*format_ptr == 'e')
11507 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11509 else if (*format_ptr == 'E')
11510 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11511 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11516 /* Compute the alignment given to a constant that is being placed in memory.
11517 EXP is the constant and ALIGN is the alignment that the object would
11519 The value of this function is used instead of that alignment to align
11523 ix86_constant_alignment (exp, align)
11527 if (TREE_CODE (exp) == REAL_CST)
11529 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11531 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11534 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11541 /* Compute the alignment for a static variable.
11542 TYPE is the data type, and ALIGN is the alignment that
11543 the object would ordinarily have. The value of this function is used
11544 instead of that alignment to align the object. */
11547 ix86_data_alignment (type, align)
11551 if (AGGREGATE_TYPE_P (type)
11552 && TYPE_SIZE (type)
11553 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11554 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11555 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11558 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11559 to 16byte boundary. */
11562 if (AGGREGATE_TYPE_P (type)
11563 && TYPE_SIZE (type)
11564 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11565 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11566 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11570 if (TREE_CODE (type) == ARRAY_TYPE)
11572 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11574 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11577 else if (TREE_CODE (type) == COMPLEX_TYPE)
11580 if (TYPE_MODE (type) == DCmode && align < 64)
11582 if (TYPE_MODE (type) == XCmode && align < 128)
11585 else if ((TREE_CODE (type) == RECORD_TYPE
11586 || TREE_CODE (type) == UNION_TYPE
11587 || TREE_CODE (type) == QUAL_UNION_TYPE)
11588 && TYPE_FIELDS (type))
11590 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11592 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11595 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11596 || TREE_CODE (type) == INTEGER_TYPE)
11598 if (TYPE_MODE (type) == DFmode && align < 64)
11600 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11607 /* Compute the alignment for a local variable.
11608 TYPE is the data type, and ALIGN is the alignment that
11609 the object would ordinarily have. The value of this macro is used
11610 instead of that alignment to align the object. */
11613 ix86_local_alignment (type, align)
11617 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11618 to 16byte boundary. */
11621 if (AGGREGATE_TYPE_P (type)
11622 && TYPE_SIZE (type)
11623 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11624 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11625 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11628 if (TREE_CODE (type) == ARRAY_TYPE)
11630 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11632 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11635 else if (TREE_CODE (type) == COMPLEX_TYPE)
11637 if (TYPE_MODE (type) == DCmode && align < 64)
11639 if (TYPE_MODE (type) == XCmode && align < 128)
11642 else if ((TREE_CODE (type) == RECORD_TYPE
11643 || TREE_CODE (type) == UNION_TYPE
11644 || TREE_CODE (type) == QUAL_UNION_TYPE)
11645 && TYPE_FIELDS (type))
11647 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11649 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11652 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11653 || TREE_CODE (type) == INTEGER_TYPE)
11656 if (TYPE_MODE (type) == DFmode && align < 64)
11658 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11664 /* Emit RTL insns to initialize the variable parts of a trampoline.
11665 FNADDR is an RTX for the address of the function's pure code.
11666 CXT is an RTX for the static chain value for the function. */
11668 x86_initialize_trampoline (tramp, fnaddr, cxt)
11669 rtx tramp, fnaddr, cxt;
11673 /* Compute offset from the end of the jmp to the target function. */
11674 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11675 plus_constant (tramp, 10),
11676 NULL_RTX, 1, OPTAB_DIRECT);
11677 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11678 gen_int_mode (0xb9, QImode));
11679 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11680 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11681 gen_int_mode (0xe9, QImode));
11682 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11687 /* Try to load address using shorter movl instead of movabs.
11688 We may want to support movq for kernel mode, but kernel does not use
11689 trampolines at the moment. */
11690 if (x86_64_zero_extended_value (fnaddr))
11692 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11693 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11694 gen_int_mode (0xbb41, HImode));
11695 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11696 gen_lowpart (SImode, fnaddr));
11701 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11702 gen_int_mode (0xbb49, HImode));
11703 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11707 /* Load static chain using movabs to r10. */
11708 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11709 gen_int_mode (0xba49, HImode));
11710 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11713 /* Jump to the r11 */
11714 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11715 gen_int_mode (0xff49, HImode));
11716 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11717 gen_int_mode (0xe3, QImode));
11719 if (offset > TRAMPOLINE_SIZE)
11724 #define def_builtin(MASK, NAME, TYPE, CODE) \
11726 if ((MASK) & target_flags) \
11727 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11728 NULL, NULL_TREE); \
11731 struct builtin_description
11733 const unsigned int mask;
11734 const enum insn_code icode;
11735 const char *const name;
11736 const enum ix86_builtins code;
11737 const enum rtx_code comparison;
11738 const unsigned int flag;
11741 /* Used for builtins that are enabled both by -msse and -msse2. */
11742 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11744 static const struct builtin_description bdesc_comi[] =
11746 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11747 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11748 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11749 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11750 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11751 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11752 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11753 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11754 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11755 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11756 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11757 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11758 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11759 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11760 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11761 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11762 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11763 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11764 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11765 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11766 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11767 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11768 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11769 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11772 static const struct builtin_description bdesc_2arg[] =
11775 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11776 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11777 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11778 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11779 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11780 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11781 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11782 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11784 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11785 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11786 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11787 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11788 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11789 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11790 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11791 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11792 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11793 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11794 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11795 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11796 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11797 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11798 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11799 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11800 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11801 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11802 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11803 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11804 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11805 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11806 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11807 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11809 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11810 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11811 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11812 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11814 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11815 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11816 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11817 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11818 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11821 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11822 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11823 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11824 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11825 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11826 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11828 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11829 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11830 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11831 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11832 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11833 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11834 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11835 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11837 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11838 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11839 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11841 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11842 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11843 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11844 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11846 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11847 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11849 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11850 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11851 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11852 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11853 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11854 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11856 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11857 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11858 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11859 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11861 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11862 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11863 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11864 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11865 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11866 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11869 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11870 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11871 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11873 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11874 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11876 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11877 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11878 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11879 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11880 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11881 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11883 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11884 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11885 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11886 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11887 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11888 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11890 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11891 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11892 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11893 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11895 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11896 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11899 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11905 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11908 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11909 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11910 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11911 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11912 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11913 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11914 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11915 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11916 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11917 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11918 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11919 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11920 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11921 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11922 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11923 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11924 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11925 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11926 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11927 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11928 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11929 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11930 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11931 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11933 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11934 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11935 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11938 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11939 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11940 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11941 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11945 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11948 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11949 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11950 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11951 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11952 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11953 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11954 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11955 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11957 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11958 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11959 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11960 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11961 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11962 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11963 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11964 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11966 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11967 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11968 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11969 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11971 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11972 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11973 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11974 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11976 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11977 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11979 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11980 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11981 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11982 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11983 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11984 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11986 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11987 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11988 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11989 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11991 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11992 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11993 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11994 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11995 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11996 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11998 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11999 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12000 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12002 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12003 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12005 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12006 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12007 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12008 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12009 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12010 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12012 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12013 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12014 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12015 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12016 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12017 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12019 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12020 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12021 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12022 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12024 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12026 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12027 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12028 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12031 static const struct builtin_description bdesc_1arg[] =
12033 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12034 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12036 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12037 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12038 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12040 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12041 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12042 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12043 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12045 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12046 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12047 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12051 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12052 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12054 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12055 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12056 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12057 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12058 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12060 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12062 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12063 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12065 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12066 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12067 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
12071 ix86_init_builtins ()
12074 ix86_init_mmx_sse_builtins ();
12077 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12078 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12081 ix86_init_mmx_sse_builtins ()
12083 const struct builtin_description * d;
12086 tree pchar_type_node = build_pointer_type (char_type_node);
12087 tree pfloat_type_node = build_pointer_type (float_type_node);
12088 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12089 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12090 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12093 tree int_ftype_v4sf_v4sf
12094 = build_function_type_list (integer_type_node,
12095 V4SF_type_node, V4SF_type_node, NULL_TREE);
12096 tree v4si_ftype_v4sf_v4sf
12097 = build_function_type_list (V4SI_type_node,
12098 V4SF_type_node, V4SF_type_node, NULL_TREE);
12099 /* MMX/SSE/integer conversions. */
12100 tree int_ftype_v4sf
12101 = build_function_type_list (integer_type_node,
12102 V4SF_type_node, NULL_TREE);
12103 tree int_ftype_v8qi
12104 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12105 tree v4sf_ftype_v4sf_int
12106 = build_function_type_list (V4SF_type_node,
12107 V4SF_type_node, integer_type_node, NULL_TREE);
12108 tree v4sf_ftype_v4sf_v2si
12109 = build_function_type_list (V4SF_type_node,
12110 V4SF_type_node, V2SI_type_node, NULL_TREE);
12111 tree int_ftype_v4hi_int
12112 = build_function_type_list (integer_type_node,
12113 V4HI_type_node, integer_type_node, NULL_TREE);
12114 tree v4hi_ftype_v4hi_int_int
12115 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12116 integer_type_node, integer_type_node,
12118 /* Miscellaneous. */
12119 tree v8qi_ftype_v4hi_v4hi
12120 = build_function_type_list (V8QI_type_node,
12121 V4HI_type_node, V4HI_type_node, NULL_TREE);
12122 tree v4hi_ftype_v2si_v2si
12123 = build_function_type_list (V4HI_type_node,
12124 V2SI_type_node, V2SI_type_node, NULL_TREE);
12125 tree v4sf_ftype_v4sf_v4sf_int
12126 = build_function_type_list (V4SF_type_node,
12127 V4SF_type_node, V4SF_type_node,
12128 integer_type_node, NULL_TREE);
12129 tree v2si_ftype_v4hi_v4hi
12130 = build_function_type_list (V2SI_type_node,
12131 V4HI_type_node, V4HI_type_node, NULL_TREE);
12132 tree v4hi_ftype_v4hi_int
12133 = build_function_type_list (V4HI_type_node,
12134 V4HI_type_node, integer_type_node, NULL_TREE);
12135 tree v4hi_ftype_v4hi_di
12136 = build_function_type_list (V4HI_type_node,
12137 V4HI_type_node, long_long_unsigned_type_node,
12139 tree v2si_ftype_v2si_di
12140 = build_function_type_list (V2SI_type_node,
12141 V2SI_type_node, long_long_unsigned_type_node,
12143 tree void_ftype_void
12144 = build_function_type (void_type_node, void_list_node);
12145 tree void_ftype_unsigned
12146 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12147 tree unsigned_ftype_void
12148 = build_function_type (unsigned_type_node, void_list_node);
12150 = build_function_type (long_long_unsigned_type_node, void_list_node);
12151 tree v4sf_ftype_void
12152 = build_function_type (V4SF_type_node, void_list_node);
12153 tree v2si_ftype_v4sf
12154 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12155 /* Loads/stores. */
12156 tree void_ftype_v8qi_v8qi_pchar
12157 = build_function_type_list (void_type_node,
12158 V8QI_type_node, V8QI_type_node,
12159 pchar_type_node, NULL_TREE);
12160 tree v4sf_ftype_pfloat
12161 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12162 /* @@@ the type is bogus */
12163 tree v4sf_ftype_v4sf_pv2si
12164 = build_function_type_list (V4SF_type_node,
12165 V4SF_type_node, pv2di_type_node, NULL_TREE);
12166 tree void_ftype_pv2si_v4sf
12167 = build_function_type_list (void_type_node,
12168 pv2di_type_node, V4SF_type_node, NULL_TREE);
12169 tree void_ftype_pfloat_v4sf
12170 = build_function_type_list (void_type_node,
12171 pfloat_type_node, V4SF_type_node, NULL_TREE);
12172 tree void_ftype_pdi_di
12173 = build_function_type_list (void_type_node,
12174 pdi_type_node, long_long_unsigned_type_node,
12176 tree void_ftype_pv2di_v2di
12177 = build_function_type_list (void_type_node,
12178 pv2di_type_node, V2DI_type_node, NULL_TREE);
12179 /* Normal vector unops. */
12180 tree v4sf_ftype_v4sf
12181 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12183 /* Normal vector binops. */
12184 tree v4sf_ftype_v4sf_v4sf
12185 = build_function_type_list (V4SF_type_node,
12186 V4SF_type_node, V4SF_type_node, NULL_TREE);
12187 tree v8qi_ftype_v8qi_v8qi
12188 = build_function_type_list (V8QI_type_node,
12189 V8QI_type_node, V8QI_type_node, NULL_TREE);
12190 tree v4hi_ftype_v4hi_v4hi
12191 = build_function_type_list (V4HI_type_node,
12192 V4HI_type_node, V4HI_type_node, NULL_TREE);
12193 tree v2si_ftype_v2si_v2si
12194 = build_function_type_list (V2SI_type_node,
12195 V2SI_type_node, V2SI_type_node, NULL_TREE);
12196 tree di_ftype_di_di
12197 = build_function_type_list (long_long_unsigned_type_node,
12198 long_long_unsigned_type_node,
12199 long_long_unsigned_type_node, NULL_TREE);
12201 tree v2si_ftype_v2sf
12202 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12203 tree v2sf_ftype_v2si
12204 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12205 tree v2si_ftype_v2si
12206 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12207 tree v2sf_ftype_v2sf
12208 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12209 tree v2sf_ftype_v2sf_v2sf
12210 = build_function_type_list (V2SF_type_node,
12211 V2SF_type_node, V2SF_type_node, NULL_TREE);
12212 tree v2si_ftype_v2sf_v2sf
12213 = build_function_type_list (V2SI_type_node,
12214 V2SF_type_node, V2SF_type_node, NULL_TREE);
12215 tree pint_type_node = build_pointer_type (integer_type_node);
12216 tree pdouble_type_node = build_pointer_type (double_type_node);
12217 tree int_ftype_v2df_v2df
12218 = build_function_type_list (integer_type_node,
12219 V2DF_type_node, V2DF_type_node, NULL_TREE);
12222 = build_function_type (intTI_type_node, void_list_node);
12223 tree ti_ftype_ti_ti
12224 = build_function_type_list (intTI_type_node,
12225 intTI_type_node, intTI_type_node, NULL_TREE);
12226 tree void_ftype_pvoid
12227 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12229 = build_function_type_list (V2DI_type_node,
12230 long_long_unsigned_type_node, NULL_TREE);
12231 tree v4sf_ftype_v4si
12232 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12233 tree v4si_ftype_v4sf
12234 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12235 tree v2df_ftype_v4si
12236 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12237 tree v4si_ftype_v2df
12238 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12239 tree v2si_ftype_v2df
12240 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12241 tree v4sf_ftype_v2df
12242 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12243 tree v2df_ftype_v2si
12244 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12245 tree v2df_ftype_v4sf
12246 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12247 tree int_ftype_v2df
12248 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12249 tree v2df_ftype_v2df_int
12250 = build_function_type_list (V2DF_type_node,
12251 V2DF_type_node, integer_type_node, NULL_TREE);
12252 tree v4sf_ftype_v4sf_v2df
12253 = build_function_type_list (V4SF_type_node,
12254 V4SF_type_node, V2DF_type_node, NULL_TREE);
12255 tree v2df_ftype_v2df_v4sf
12256 = build_function_type_list (V2DF_type_node,
12257 V2DF_type_node, V4SF_type_node, NULL_TREE);
12258 tree v2df_ftype_v2df_v2df_int
12259 = build_function_type_list (V2DF_type_node,
12260 V2DF_type_node, V2DF_type_node,
12263 tree v2df_ftype_v2df_pv2si
12264 = build_function_type_list (V2DF_type_node,
12265 V2DF_type_node, pv2si_type_node, NULL_TREE);
12266 tree void_ftype_pv2si_v2df
12267 = build_function_type_list (void_type_node,
12268 pv2si_type_node, V2DF_type_node, NULL_TREE);
12269 tree void_ftype_pdouble_v2df
12270 = build_function_type_list (void_type_node,
12271 pdouble_type_node, V2DF_type_node, NULL_TREE);
12272 tree void_ftype_pint_int
12273 = build_function_type_list (void_type_node,
12274 pint_type_node, integer_type_node, NULL_TREE);
12275 tree void_ftype_v16qi_v16qi_pchar
12276 = build_function_type_list (void_type_node,
12277 V16QI_type_node, V16QI_type_node,
12278 pchar_type_node, NULL_TREE);
12279 tree v2df_ftype_pdouble
12280 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12281 tree v2df_ftype_v2df_v2df
12282 = build_function_type_list (V2DF_type_node,
12283 V2DF_type_node, V2DF_type_node, NULL_TREE);
12284 tree v16qi_ftype_v16qi_v16qi
12285 = build_function_type_list (V16QI_type_node,
12286 V16QI_type_node, V16QI_type_node, NULL_TREE);
12287 tree v8hi_ftype_v8hi_v8hi
12288 = build_function_type_list (V8HI_type_node,
12289 V8HI_type_node, V8HI_type_node, NULL_TREE);
12290 tree v4si_ftype_v4si_v4si
12291 = build_function_type_list (V4SI_type_node,
12292 V4SI_type_node, V4SI_type_node, NULL_TREE);
12293 tree v2di_ftype_v2di_v2di
12294 = build_function_type_list (V2DI_type_node,
12295 V2DI_type_node, V2DI_type_node, NULL_TREE);
12296 tree v2di_ftype_v2df_v2df
12297 = build_function_type_list (V2DI_type_node,
12298 V2DF_type_node, V2DF_type_node, NULL_TREE);
12299 tree v2df_ftype_v2df
12300 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12301 tree v2df_ftype_double
12302 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12303 tree v2df_ftype_double_double
12304 = build_function_type_list (V2DF_type_node,
12305 double_type_node, double_type_node, NULL_TREE);
12306 tree int_ftype_v8hi_int
12307 = build_function_type_list (integer_type_node,
12308 V8HI_type_node, integer_type_node, NULL_TREE);
12309 tree v8hi_ftype_v8hi_int_int
12310 = build_function_type_list (V8HI_type_node,
12311 V8HI_type_node, integer_type_node,
12312 integer_type_node, NULL_TREE);
12313 tree v2di_ftype_v2di_int
12314 = build_function_type_list (V2DI_type_node,
12315 V2DI_type_node, integer_type_node, NULL_TREE);
12316 tree v4si_ftype_v4si_int
12317 = build_function_type_list (V4SI_type_node,
12318 V4SI_type_node, integer_type_node, NULL_TREE);
12319 tree v8hi_ftype_v8hi_int
12320 = build_function_type_list (V8HI_type_node,
12321 V8HI_type_node, integer_type_node, NULL_TREE);
12322 tree v8hi_ftype_v8hi_v2di
12323 = build_function_type_list (V8HI_type_node,
12324 V8HI_type_node, V2DI_type_node, NULL_TREE);
12325 tree v4si_ftype_v4si_v2di
12326 = build_function_type_list (V4SI_type_node,
12327 V4SI_type_node, V2DI_type_node, NULL_TREE);
12328 tree v4si_ftype_v8hi_v8hi
12329 = build_function_type_list (V4SI_type_node,
12330 V8HI_type_node, V8HI_type_node, NULL_TREE);
12331 tree di_ftype_v8qi_v8qi
12332 = build_function_type_list (long_long_unsigned_type_node,
12333 V8QI_type_node, V8QI_type_node, NULL_TREE);
12334 tree v2di_ftype_v16qi_v16qi
12335 = build_function_type_list (V2DI_type_node,
12336 V16QI_type_node, V16QI_type_node, NULL_TREE);
12337 tree int_ftype_v16qi
12338 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12340 /* Add all builtins that are more or less simple operations on two
12342 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12344 /* Use one of the operands; the target can have a different mode for
12345 mask-generating compares. */
12346 enum machine_mode mode;
12351 mode = insn_data[d->icode].operand[1].mode;
12356 type = v16qi_ftype_v16qi_v16qi;
12359 type = v8hi_ftype_v8hi_v8hi;
12362 type = v4si_ftype_v4si_v4si;
12365 type = v2di_ftype_v2di_v2di;
12368 type = v2df_ftype_v2df_v2df;
12371 type = ti_ftype_ti_ti;
12374 type = v4sf_ftype_v4sf_v4sf;
12377 type = v8qi_ftype_v8qi_v8qi;
12380 type = v4hi_ftype_v4hi_v4hi;
12383 type = v2si_ftype_v2si_v2si;
12386 type = di_ftype_di_di;
12393 /* Override for comparisons. */
12394 if (d->icode == CODE_FOR_maskcmpv4sf3
12395 || d->icode == CODE_FOR_maskncmpv4sf3
12396 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12397 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12398 type = v4si_ftype_v4sf_v4sf;
12400 if (d->icode == CODE_FOR_maskcmpv2df3
12401 || d->icode == CODE_FOR_maskncmpv2df3
12402 || d->icode == CODE_FOR_vmmaskcmpv2df3
12403 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12404 type = v2di_ftype_v2df_v2df;
12406 def_builtin (d->mask, d->name, type, d->code);
12409 /* Add the remaining MMX insns with somewhat more complicated types. */
12410 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12411 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12412 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12413 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12414 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12415 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12416 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12418 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12419 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12420 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12422 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12423 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12425 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12426 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12428 /* comi/ucomi insns. */
12429 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12430 if (d->mask == MASK_SSE2)
12431 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12433 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12435 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12436 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12437 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12439 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12440 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12441 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12442 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12443 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12444 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12446 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12447 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12448 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12449 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12451 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12452 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12454 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12456 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12457 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12458 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12459 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12460 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12461 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12463 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12464 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12465 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12466 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12468 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12469 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12470 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12471 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12473 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12475 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12477 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12478 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12479 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12480 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12481 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12482 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12484 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12486 /* Original 3DNow! */
12487 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12488 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12489 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12490 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12491 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12492 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12493 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12494 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12495 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12496 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12497 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12498 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12499 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12500 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12503 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12505 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12506 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12508 /* 3DNow! extension as used in the Athlon CPU. */
12509 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12510 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12511 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12512 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12513 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12514 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12516 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12519 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12520 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12522 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12523 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12525 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12526 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12527 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12528 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12529 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12530 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12532 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12533 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12534 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12535 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12537 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12538 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12539 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12540 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12541 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12544 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12545 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12546 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12548 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12549 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12551 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12553 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12554 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12556 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12557 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12558 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12559 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12560 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12562 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12564 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12565 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12567 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12568 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12569 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12571 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12572 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12573 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12575 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12576 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12577 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12578 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12579 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12580 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12581 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12583 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12584 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12585 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12587 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12588 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12589 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12591 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12592 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12593 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12595 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12596 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12598 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12599 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12600 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12602 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12603 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12604 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12606 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12607 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12609 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12612 /* Errors in the source file can cause expand_expr to return const0_rtx
12613 where we expect a vector. To avoid crashing, use one of the vector
12614 clear instructions. */
12616 safe_vector_operand (x, mode)
12618 enum machine_mode mode;
12620 if (x != const0_rtx)
12622 x = gen_reg_rtx (mode);
12624 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12625 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12626 : gen_rtx_SUBREG (DImode, x, 0)));
12628 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12629 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12633 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12636 ix86_expand_binop_builtin (icode, arglist, target)
12637 enum insn_code icode;
12642 tree arg0 = TREE_VALUE (arglist);
12643 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12644 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12645 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12646 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12647 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12648 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12650 if (VECTOR_MODE_P (mode0))
12651 op0 = safe_vector_operand (op0, mode0);
12652 if (VECTOR_MODE_P (mode1))
12653 op1 = safe_vector_operand (op1, mode1);
12656 || GET_MODE (target) != tmode
12657 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12658 target = gen_reg_rtx (tmode);
12660 /* In case the insn wants input operands in modes different from
12661 the result, abort. */
12662 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12665 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12666 op0 = copy_to_mode_reg (mode0, op0);
12667 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12668 op1 = copy_to_mode_reg (mode1, op1);
12670 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12671 yet one of the two must not be a memory. This is normally enforced
12672 by expanders, but we didn't bother to create one here. */
12673 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12674 op0 = copy_to_mode_reg (mode0, op0);
12676 pat = GEN_FCN (icode) (target, op0, op1);
12683 /* In type_for_mode we restrict the ability to create TImode types
12684 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12685 to have a V4SFmode signature. Convert them in-place to TImode. */
12688 ix86_expand_timode_binop_builtin (icode, arglist, target)
12689 enum insn_code icode;
12694 tree arg0 = TREE_VALUE (arglist);
12695 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12696 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12697 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12699 op0 = gen_lowpart (TImode, op0);
12700 op1 = gen_lowpart (TImode, op1);
12701 target = gen_reg_rtx (TImode);
12703 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12704 op0 = copy_to_mode_reg (TImode, op0);
12705 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12706 op1 = copy_to_mode_reg (TImode, op1);
12708 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12709 yet one of the two must not be a memory. This is normally enforced
12710 by expanders, but we didn't bother to create one here. */
12711 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12712 op0 = copy_to_mode_reg (TImode, op0);
12714 pat = GEN_FCN (icode) (target, op0, op1);
12719 return gen_lowpart (V4SFmode, target);
12722 /* Subroutine of ix86_expand_builtin to take care of stores. */
12725 ix86_expand_store_builtin (icode, arglist)
12726 enum insn_code icode;
12730 tree arg0 = TREE_VALUE (arglist);
12731 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12732 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12733 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12734 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12735 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12737 if (VECTOR_MODE_P (mode1))
12738 op1 = safe_vector_operand (op1, mode1);
12740 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12742 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12743 op1 = copy_to_mode_reg (mode1, op1);
12745 pat = GEN_FCN (icode) (op0, op1);
12751 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12754 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12755 enum insn_code icode;
12761 tree arg0 = TREE_VALUE (arglist);
12762 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12763 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12764 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12767 || GET_MODE (target) != tmode
12768 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12769 target = gen_reg_rtx (tmode);
12771 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12774 if (VECTOR_MODE_P (mode0))
12775 op0 = safe_vector_operand (op0, mode0);
12777 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12778 op0 = copy_to_mode_reg (mode0, op0);
12781 pat = GEN_FCN (icode) (target, op0);
12788 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12789 sqrtss, rsqrtss, rcpss. */
12792 ix86_expand_unop1_builtin (icode, arglist, target)
12793 enum insn_code icode;
12798 tree arg0 = TREE_VALUE (arglist);
12799 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12800 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12801 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12804 || GET_MODE (target) != tmode
12805 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12806 target = gen_reg_rtx (tmode);
12808 if (VECTOR_MODE_P (mode0))
12809 op0 = safe_vector_operand (op0, mode0);
12811 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12812 op0 = copy_to_mode_reg (mode0, op0);
12815 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12816 op1 = copy_to_mode_reg (mode0, op1);
12818 pat = GEN_FCN (icode) (target, op0, op1);
12825 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12828 ix86_expand_sse_compare (d, arglist, target)
12829 const struct builtin_description *d;
12834 tree arg0 = TREE_VALUE (arglist);
12835 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12836 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12837 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12839 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12840 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12841 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12842 enum rtx_code comparison = d->comparison;
12844 if (VECTOR_MODE_P (mode0))
12845 op0 = safe_vector_operand (op0, mode0);
12846 if (VECTOR_MODE_P (mode1))
12847 op1 = safe_vector_operand (op1, mode1);
12849 /* Swap operands if we have a comparison that isn't available in
12853 rtx tmp = gen_reg_rtx (mode1);
12854 emit_move_insn (tmp, op1);
12860 || GET_MODE (target) != tmode
12861 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12862 target = gen_reg_rtx (tmode);
12864 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12865 op0 = copy_to_mode_reg (mode0, op0);
12866 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12867 op1 = copy_to_mode_reg (mode1, op1);
12869 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12870 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12877 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12880 ix86_expand_sse_comi (d, arglist, target)
12881 const struct builtin_description *d;
12886 tree arg0 = TREE_VALUE (arglist);
12887 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12888 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12889 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12891 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12892 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12893 enum rtx_code comparison = d->comparison;
12895 if (VECTOR_MODE_P (mode0))
12896 op0 = safe_vector_operand (op0, mode0);
12897 if (VECTOR_MODE_P (mode1))
12898 op1 = safe_vector_operand (op1, mode1);
12900 /* Swap operands if we have a comparison that isn't available in
12909 target = gen_reg_rtx (SImode);
12910 emit_move_insn (target, const0_rtx);
12911 target = gen_rtx_SUBREG (QImode, target, 0);
12913 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12914 op0 = copy_to_mode_reg (mode0, op0);
12915 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12916 op1 = copy_to_mode_reg (mode1, op1);
12918 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12919 pat = GEN_FCN (d->icode) (op0, op1, op2);
12923 emit_insn (gen_rtx_SET (VOIDmode,
12924 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12925 gen_rtx_fmt_ee (comparison, QImode,
12926 gen_rtx_REG (CCmode, FLAGS_REG),
12929 return SUBREG_REG (target);
12932 /* Expand an expression EXP that calls a built-in function,
12933 with result going to TARGET if that's convenient
12934 (and in mode MODE if that's convenient).
12935 SUBTARGET may be used as the target for computing one of EXP's operands.
12936 IGNORE is nonzero if the value is to be ignored. */
12939 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12942 rtx subtarget ATTRIBUTE_UNUSED;
12943 enum machine_mode mode ATTRIBUTE_UNUSED;
12944 int ignore ATTRIBUTE_UNUSED;
12946 const struct builtin_description *d;
12948 enum insn_code icode;
12949 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12950 tree arglist = TREE_OPERAND (exp, 1);
12951 tree arg0, arg1, arg2;
12952 rtx op0, op1, op2, pat;
12953 enum machine_mode tmode, mode0, mode1, mode2;
12954 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12958 case IX86_BUILTIN_EMMS:
12959 emit_insn (gen_emms ());
12962 case IX86_BUILTIN_SFENCE:
12963 emit_insn (gen_sfence ());
12966 case IX86_BUILTIN_PEXTRW:
12967 case IX86_BUILTIN_PEXTRW128:
12968 icode = (fcode == IX86_BUILTIN_PEXTRW
12969 ? CODE_FOR_mmx_pextrw
12970 : CODE_FOR_sse2_pextrw);
12971 arg0 = TREE_VALUE (arglist);
12972 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12973 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12974 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12975 tmode = insn_data[icode].operand[0].mode;
12976 mode0 = insn_data[icode].operand[1].mode;
12977 mode1 = insn_data[icode].operand[2].mode;
12979 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12980 op0 = copy_to_mode_reg (mode0, op0);
12981 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12983 /* @@@ better error message */
12984 error ("selector must be an immediate");
12985 return gen_reg_rtx (tmode);
12988 || GET_MODE (target) != tmode
12989 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12990 target = gen_reg_rtx (tmode);
12991 pat = GEN_FCN (icode) (target, op0, op1);
12997 case IX86_BUILTIN_PINSRW:
12998 case IX86_BUILTIN_PINSRW128:
12999 icode = (fcode == IX86_BUILTIN_PINSRW
13000 ? CODE_FOR_mmx_pinsrw
13001 : CODE_FOR_sse2_pinsrw);
13002 arg0 = TREE_VALUE (arglist);
13003 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13004 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13005 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13006 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13007 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13008 tmode = insn_data[icode].operand[0].mode;
13009 mode0 = insn_data[icode].operand[1].mode;
13010 mode1 = insn_data[icode].operand[2].mode;
13011 mode2 = insn_data[icode].operand[3].mode;
13013 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13014 op0 = copy_to_mode_reg (mode0, op0);
13015 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13016 op1 = copy_to_mode_reg (mode1, op1);
13017 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13019 /* @@@ better error message */
13020 error ("selector must be an immediate");
13024 || GET_MODE (target) != tmode
13025 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13026 target = gen_reg_rtx (tmode);
13027 pat = GEN_FCN (icode) (target, op0, op1, op2);
13033 case IX86_BUILTIN_MASKMOVQ:
13034 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13035 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13036 : CODE_FOR_sse2_maskmovdqu);
13037 /* Note the arg order is different from the operand order. */
13038 arg1 = TREE_VALUE (arglist);
13039 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13040 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13041 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13042 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13043 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13044 mode0 = insn_data[icode].operand[0].mode;
13045 mode1 = insn_data[icode].operand[1].mode;
13046 mode2 = insn_data[icode].operand[2].mode;
13048 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13049 op0 = copy_to_mode_reg (mode0, op0);
13050 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13051 op1 = copy_to_mode_reg (mode1, op1);
13052 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13053 op2 = copy_to_mode_reg (mode2, op2);
13054 pat = GEN_FCN (icode) (op0, op1, op2);
13060 case IX86_BUILTIN_SQRTSS:
13061 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13062 case IX86_BUILTIN_RSQRTSS:
13063 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13064 case IX86_BUILTIN_RCPSS:
13065 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13067 case IX86_BUILTIN_ANDPS:
13068 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
13070 case IX86_BUILTIN_ANDNPS:
13071 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
13073 case IX86_BUILTIN_ORPS:
13074 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
13076 case IX86_BUILTIN_XORPS:
13077 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
13080 case IX86_BUILTIN_LOADAPS:
13081 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13083 case IX86_BUILTIN_LOADUPS:
13084 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13086 case IX86_BUILTIN_STOREAPS:
13087 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13088 case IX86_BUILTIN_STOREUPS:
13089 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13091 case IX86_BUILTIN_LOADSS:
13092 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13094 case IX86_BUILTIN_STORESS:
13095 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13097 case IX86_BUILTIN_LOADHPS:
13098 case IX86_BUILTIN_LOADLPS:
13099 case IX86_BUILTIN_LOADHPD:
13100 case IX86_BUILTIN_LOADLPD:
13101 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13102 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13103 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13104 : CODE_FOR_sse2_movlpd);
13105 arg0 = TREE_VALUE (arglist);
13106 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13107 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13108 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13109 tmode = insn_data[icode].operand[0].mode;
13110 mode0 = insn_data[icode].operand[1].mode;
13111 mode1 = insn_data[icode].operand[2].mode;
13113 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13114 op0 = copy_to_mode_reg (mode0, op0);
13115 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13117 || GET_MODE (target) != tmode
13118 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13119 target = gen_reg_rtx (tmode);
13120 pat = GEN_FCN (icode) (target, op0, op1);
13126 case IX86_BUILTIN_STOREHPS:
13127 case IX86_BUILTIN_STORELPS:
13128 case IX86_BUILTIN_STOREHPD:
13129 case IX86_BUILTIN_STORELPD:
13130 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13131 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13132 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13133 : CODE_FOR_sse2_movlpd);
13134 arg0 = TREE_VALUE (arglist);
13135 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13136 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13137 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13138 mode0 = insn_data[icode].operand[1].mode;
13139 mode1 = insn_data[icode].operand[2].mode;
13141 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13143 op1 = copy_to_mode_reg (mode1, op1);
13145 pat = GEN_FCN (icode) (op0, op0, op1);
13151 case IX86_BUILTIN_MOVNTPS:
13152 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13153 case IX86_BUILTIN_MOVNTQ:
13154 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13156 case IX86_BUILTIN_LDMXCSR:
13157 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13158 target = assign_386_stack_local (SImode, 0);
13159 emit_move_insn (target, op0);
13160 emit_insn (gen_ldmxcsr (target));
13163 case IX86_BUILTIN_STMXCSR:
13164 target = assign_386_stack_local (SImode, 0);
13165 emit_insn (gen_stmxcsr (target));
13166 return copy_to_mode_reg (SImode, target);
13168 case IX86_BUILTIN_SHUFPS:
13169 case IX86_BUILTIN_SHUFPD:
13170 icode = (fcode == IX86_BUILTIN_SHUFPS
13171 ? CODE_FOR_sse_shufps
13172 : CODE_FOR_sse2_shufpd);
13173 arg0 = TREE_VALUE (arglist);
13174 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13175 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13176 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13177 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13178 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13179 tmode = insn_data[icode].operand[0].mode;
13180 mode0 = insn_data[icode].operand[1].mode;
13181 mode1 = insn_data[icode].operand[2].mode;
13182 mode2 = insn_data[icode].operand[3].mode;
13184 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13185 op0 = copy_to_mode_reg (mode0, op0);
13186 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13187 op1 = copy_to_mode_reg (mode1, op1);
13188 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13190 /* @@@ better error message */
13191 error ("mask must be an immediate");
13192 return gen_reg_rtx (tmode);
13195 || GET_MODE (target) != tmode
13196 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13197 target = gen_reg_rtx (tmode);
13198 pat = GEN_FCN (icode) (target, op0, op1, op2);
13204 case IX86_BUILTIN_PSHUFW:
13205 case IX86_BUILTIN_PSHUFD:
13206 case IX86_BUILTIN_PSHUFHW:
13207 case IX86_BUILTIN_PSHUFLW:
13208 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13209 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13210 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13211 : CODE_FOR_mmx_pshufw);
13212 arg0 = TREE_VALUE (arglist);
13213 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13214 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13215 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13216 tmode = insn_data[icode].operand[0].mode;
13217 mode1 = insn_data[icode].operand[1].mode;
13218 mode2 = insn_data[icode].operand[2].mode;
13220 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13221 op0 = copy_to_mode_reg (mode1, op0);
13222 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13224 /* @@@ better error message */
13225 error ("mask must be an immediate");
13229 || GET_MODE (target) != tmode
13230 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13231 target = gen_reg_rtx (tmode);
13232 pat = GEN_FCN (icode) (target, op0, op1);
13238 case IX86_BUILTIN_FEMMS:
13239 emit_insn (gen_femms ());
13242 case IX86_BUILTIN_PAVGUSB:
13243 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13245 case IX86_BUILTIN_PF2ID:
13246 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13248 case IX86_BUILTIN_PFACC:
13249 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13251 case IX86_BUILTIN_PFADD:
13252 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13254 case IX86_BUILTIN_PFCMPEQ:
13255 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13257 case IX86_BUILTIN_PFCMPGE:
13258 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13260 case IX86_BUILTIN_PFCMPGT:
13261 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13263 case IX86_BUILTIN_PFMAX:
13264 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13266 case IX86_BUILTIN_PFMIN:
13267 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13269 case IX86_BUILTIN_PFMUL:
13270 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13272 case IX86_BUILTIN_PFRCP:
13273 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13275 case IX86_BUILTIN_PFRCPIT1:
13276 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13278 case IX86_BUILTIN_PFRCPIT2:
13279 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13281 case IX86_BUILTIN_PFRSQIT1:
13282 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13284 case IX86_BUILTIN_PFRSQRT:
13285 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13287 case IX86_BUILTIN_PFSUB:
13288 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13290 case IX86_BUILTIN_PFSUBR:
13291 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13293 case IX86_BUILTIN_PI2FD:
13294 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13296 case IX86_BUILTIN_PMULHRW:
13297 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13299 case IX86_BUILTIN_PF2IW:
13300 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13302 case IX86_BUILTIN_PFNACC:
13303 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13305 case IX86_BUILTIN_PFPNACC:
13306 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13308 case IX86_BUILTIN_PI2FW:
13309 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13311 case IX86_BUILTIN_PSWAPDSI:
13312 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13314 case IX86_BUILTIN_PSWAPDSF:
13315 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13317 case IX86_BUILTIN_SSE_ZERO:
13318 target = gen_reg_rtx (V4SFmode);
13319 emit_insn (gen_sse_clrv4sf (target));
13322 case IX86_BUILTIN_MMX_ZERO:
13323 target = gen_reg_rtx (DImode);
13324 emit_insn (gen_mmx_clrdi (target));
13327 case IX86_BUILTIN_SQRTSD:
13328 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13329 case IX86_BUILTIN_LOADAPD:
13330 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13331 case IX86_BUILTIN_LOADUPD:
13332 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13334 case IX86_BUILTIN_STOREAPD:
13335 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13336 case IX86_BUILTIN_STOREUPD:
13337 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13339 case IX86_BUILTIN_LOADSD:
13340 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13342 case IX86_BUILTIN_STORESD:
13343 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13345 case IX86_BUILTIN_SETPD1:
13346 target = assign_386_stack_local (DFmode, 0);
13347 arg0 = TREE_VALUE (arglist);
13348 emit_move_insn (adjust_address (target, DFmode, 0),
13349 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13350 op0 = gen_reg_rtx (V2DFmode);
13351 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13352 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13355 case IX86_BUILTIN_SETPD:
13356 target = assign_386_stack_local (V2DFmode, 0);
13357 arg0 = TREE_VALUE (arglist);
13358 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13359 emit_move_insn (adjust_address (target, DFmode, 0),
13360 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13361 emit_move_insn (adjust_address (target, DFmode, 8),
13362 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13363 op0 = gen_reg_rtx (V2DFmode);
13364 emit_insn (gen_sse2_movapd (op0, target));
13367 case IX86_BUILTIN_LOADRPD:
13368 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13369 gen_reg_rtx (V2DFmode), 1);
13370 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13373 case IX86_BUILTIN_LOADPD1:
13374 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13375 gen_reg_rtx (V2DFmode), 1);
13376 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13379 case IX86_BUILTIN_STOREPD1:
13380 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13381 case IX86_BUILTIN_STORERPD:
13382 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13384 case IX86_BUILTIN_MFENCE:
13385 emit_insn (gen_sse2_mfence ());
13387 case IX86_BUILTIN_LFENCE:
13388 emit_insn (gen_sse2_lfence ());
13391 case IX86_BUILTIN_CLFLUSH:
13392 arg0 = TREE_VALUE (arglist);
13393 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13394 icode = CODE_FOR_sse2_clflush;
13395 mode0 = insn_data[icode].operand[0].mode;
13396 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13397 op0 = copy_to_mode_reg (mode0, op0);
13399 emit_insn (gen_sse2_clflush (op0));
13402 case IX86_BUILTIN_MOVNTPD:
13403 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13404 case IX86_BUILTIN_MOVNTDQ:
13405 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13406 case IX86_BUILTIN_MOVNTI:
13407 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13413 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13414 if (d->code == fcode)
13416 /* Compares are treated specially. */
13417 if (d->icode == CODE_FOR_maskcmpv4sf3
13418 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13419 || d->icode == CODE_FOR_maskncmpv4sf3
13420 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13421 || d->icode == CODE_FOR_maskcmpv2df3
13422 || d->icode == CODE_FOR_vmmaskcmpv2df3
13423 || d->icode == CODE_FOR_maskncmpv2df3
13424 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13425 return ix86_expand_sse_compare (d, arglist, target);
13427 return ix86_expand_binop_builtin (d->icode, arglist, target);
13430 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13431 if (d->code == fcode)
13432 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13434 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13435 if (d->code == fcode)
13436 return ix86_expand_sse_comi (d, arglist, target);
13438 /* @@@ Should really do something sensible here. */
13442 /* Store OPERAND to the memory after reload is completed. This means
13443 that we can't easily use assign_stack_local. */
13445 ix86_force_to_memory (mode, operand)
13446 enum machine_mode mode;
13450 if (!reload_completed)
13452 if (TARGET_64BIT && TARGET_RED_ZONE)
13454 result = gen_rtx_MEM (mode,
13455 gen_rtx_PLUS (Pmode,
13457 GEN_INT (-RED_ZONE_SIZE)));
13458 emit_move_insn (result, operand);
13460 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13466 operand = gen_lowpart (DImode, operand);
13470 gen_rtx_SET (VOIDmode,
13471 gen_rtx_MEM (DImode,
13472 gen_rtx_PRE_DEC (DImode,
13473 stack_pointer_rtx)),
13479 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13488 split_di (&operand, 1, operands, operands + 1);
13490 gen_rtx_SET (VOIDmode,
13491 gen_rtx_MEM (SImode,
13492 gen_rtx_PRE_DEC (Pmode,
13493 stack_pointer_rtx)),
13496 gen_rtx_SET (VOIDmode,
13497 gen_rtx_MEM (SImode,
13498 gen_rtx_PRE_DEC (Pmode,
13499 stack_pointer_rtx)),
13504 /* It is better to store HImodes as SImodes. */
13505 if (!TARGET_PARTIAL_REG_STALL)
13506 operand = gen_lowpart (SImode, operand);
13510 gen_rtx_SET (VOIDmode,
13511 gen_rtx_MEM (GET_MODE (operand),
13512 gen_rtx_PRE_DEC (SImode,
13513 stack_pointer_rtx)),
13519 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13524 /* Free operand from the memory. */
13526 ix86_free_from_memory (mode)
13527 enum machine_mode mode;
13529 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13533 if (mode == DImode || TARGET_64BIT)
13535 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13539 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13540 to pop or add instruction if registers are available. */
13541 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13542 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13547 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13548 QImode must go into class Q_REGS.
13549 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13550 movdf to do mem-to-mem moves through integer regs. */
13552 ix86_preferred_reload_class (x, class)
13554 enum reg_class class;
13556 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13558 /* SSE can't load any constant directly yet. */
13559 if (SSE_CLASS_P (class))
13561 /* Floats can load 0 and 1. */
13562 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13564 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13565 if (MAYBE_SSE_CLASS_P (class))
13566 return (reg_class_subset_p (class, GENERAL_REGS)
13567 ? GENERAL_REGS : FLOAT_REGS);
13571 /* General regs can load everything. */
13572 if (reg_class_subset_p (class, GENERAL_REGS))
13573 return GENERAL_REGS;
13574 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13575 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13578 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13580 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13585 /* If we are copying between general and FP registers, we need a memory
13586 location. The same is true for SSE and MMX registers.
13588 The macro can't work reliably when one of the CLASSES is class containing
13589 registers from multiple units (SSE, MMX, integer). We avoid this by never
13590 combining those units in single alternative in the machine description.
13591 Ensure that this constraint holds to avoid unexpected surprises.
13593 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13594 enforce these sanity checks. */
13596 ix86_secondary_memory_needed (class1, class2, mode, strict)
13597 enum reg_class class1, class2;
13598 enum machine_mode mode;
13601 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13602 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13603 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13604 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13605 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13606 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13613 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13614 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13615 && (mode) != SImode)
13616 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13617 && (mode) != SImode));
13619 /* Return the cost of moving data from a register in class CLASS1 to
13620 one in class CLASS2.
13622 It is not required that the cost always equal 2 when FROM is the same as TO;
13623 on some machines it is expensive to move between registers if they are not
13624 general registers. */
13626 ix86_register_move_cost (mode, class1, class2)
13627 enum machine_mode mode;
13628 enum reg_class class1, class2;
13630 /* In case we require secondary memory, compute cost of the store followed
13631 by load. In order to avoid bad register allocation choices, we need
13632 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13634 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13638 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13639 MEMORY_MOVE_COST (mode, class1, 1));
13640 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13641 MEMORY_MOVE_COST (mode, class2, 1));
13643 /* In case of copying from general_purpose_register we may emit multiple
13644 stores followed by single load causing memory size mismatch stall.
13645 Count this as arbitarily high cost of 20. */
13646 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13649 /* In the case of FP/MMX moves, the registers actually overlap, and we
13650 have to switch modes in order to treat them differently. */
13651 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13652 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13658 /* Moves between SSE/MMX and integer unit are expensive. */
13659 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13660 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13661 return ix86_cost->mmxsse_to_integer;
13662 if (MAYBE_FLOAT_CLASS_P (class1))
13663 return ix86_cost->fp_move;
13664 if (MAYBE_SSE_CLASS_P (class1))
13665 return ix86_cost->sse_move;
13666 if (MAYBE_MMX_CLASS_P (class1))
13667 return ix86_cost->mmx_move;
13671 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13673 ix86_hard_regno_mode_ok (regno, mode)
13675 enum machine_mode mode;
13677 /* Flags and only flags can only hold CCmode values. */
13678 if (CC_REGNO_P (regno))
13679 return GET_MODE_CLASS (mode) == MODE_CC;
13680 if (GET_MODE_CLASS (mode) == MODE_CC
13681 || GET_MODE_CLASS (mode) == MODE_RANDOM
13682 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13684 if (FP_REGNO_P (regno))
13685 return VALID_FP_MODE_P (mode);
13686 if (SSE_REGNO_P (regno))
13687 return VALID_SSE_REG_MODE (mode);
13688 if (MMX_REGNO_P (regno))
13689 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13690 /* We handle both integer and floats in the general purpose registers.
13691 In future we should be able to handle vector modes as well. */
13692 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13694 /* Take care for QImode values - they can be in non-QI regs, but then
13695 they do cause partial register stalls. */
13696 if (regno < 4 || mode != QImode || TARGET_64BIT)
13698 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13701 /* Return the cost of moving data of mode M between a
13702 register and memory. A value of 2 is the default; this cost is
13703 relative to those in `REGISTER_MOVE_COST'.
13705 If moving between registers and memory is more expensive than
13706 between two registers, you should define this macro to express the
13709 Model also increased moving costs of QImode registers in non
13713 ix86_memory_move_cost (mode, class, in)
13714 enum machine_mode mode;
13715 enum reg_class class;
13718 if (FLOAT_CLASS_P (class))
13736 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13738 if (SSE_CLASS_P (class))
13741 switch (GET_MODE_SIZE (mode))
13755 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13757 if (MMX_CLASS_P (class))
13760 switch (GET_MODE_SIZE (mode))
13771 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13773 switch (GET_MODE_SIZE (mode))
13777 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13778 : ix86_cost->movzbl_load);
13780 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13781 : ix86_cost->int_store[0] + 4);
13784 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13786 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13787 if (mode == TFmode)
13789 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13790 * (int) GET_MODE_SIZE (mode) / 4);
13794 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13796 ix86_svr3_asm_out_constructor (symbol, priority)
13798 int priority ATTRIBUTE_UNUSED;
13801 fputs ("\tpushl $", asm_out_file);
13802 assemble_name (asm_out_file, XSTR (symbol, 0));
13803 fputc ('\n', asm_out_file);
13809 static int current_machopic_label_num;
13811 /* Given a symbol name and its associated stub, write out the
13812 definition of the stub. */
13815 machopic_output_stub (file, symb, stub)
13817 const char *symb, *stub;
13819 unsigned int length;
13820 char *binder_name, *symbol_name, lazy_ptr_name[32];
13821 int label = ++current_machopic_label_num;
13823 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13824 symb = (*targetm.strip_name_encoding) (symb);
13826 length = strlen (stub);
13827 binder_name = alloca (length + 32);
13828 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13830 length = strlen (symb);
13831 symbol_name = alloca (length + 32);
13832 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13834 sprintf (lazy_ptr_name, "L%d$lz", label);
13837 machopic_picsymbol_stub_section ();
13839 machopic_symbol_stub_section ();
13841 fprintf (file, "%s:\n", stub);
13842 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13846 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13847 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13848 fprintf (file, "\tjmp %%edx\n");
13851 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13853 fprintf (file, "%s:\n", binder_name);
13857 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13858 fprintf (file, "\tpushl %%eax\n");
13861 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13863 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13865 machopic_lazy_symbol_ptr_section ();
13866 fprintf (file, "%s:\n", lazy_ptr_name);
13867 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13868 fprintf (file, "\t.long %s\n", binder_name);
13870 #endif /* TARGET_MACHO */
13872 /* Order the registers for register allocator. */
13875 x86_order_regs_for_local_alloc ()
13880 /* First allocate the local general purpose registers. */
13881 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13882 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13883 reg_alloc_order [pos++] = i;
13885 /* Global general purpose registers. */
13886 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13887 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13888 reg_alloc_order [pos++] = i;
13890 /* x87 registers come first in case we are doing FP math
13892 if (!TARGET_SSE_MATH)
13893 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13894 reg_alloc_order [pos++] = i;
13896 /* SSE registers. */
13897 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13898 reg_alloc_order [pos++] = i;
13899 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13900 reg_alloc_order [pos++] = i;
13902 /* x87 registerts. */
13903 if (TARGET_SSE_MATH)
13904 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13905 reg_alloc_order [pos++] = i;
13907 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13908 reg_alloc_order [pos++] = i;
13910 /* Initialize the rest of array as we do not allocate some registers
13912 while (pos < FIRST_PSEUDO_REGISTER)
13913 reg_alloc_order [pos++] = 0;
13917 x86_output_mi_thunk (file, delta, function)
13925 if (ix86_regparm > 0)
13926 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13929 for (; parm; parm = TREE_CHAIN (parm))
13930 if (TREE_VALUE (parm) == void_type_node)
13933 xops[0] = GEN_INT (delta);
13936 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13937 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13938 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13941 fprintf (file, "\tjmp *");
13942 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13943 fprintf (file, "@GOTPCREL(%%rip)\n");
13947 fprintf (file, "\tjmp ");
13948 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13949 fprintf (file, "\n");
13955 xops[1] = gen_rtx_REG (SImode, 0);
13956 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13957 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13959 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13960 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13964 xops[0] = pic_offset_table_rtx;
13965 xops[1] = gen_label_rtx ();
13966 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13968 if (ix86_regparm > 2)
13970 output_asm_insn ("push{l}\t%0", xops);
13971 output_asm_insn ("call\t%P1", xops);
13972 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13973 output_asm_insn ("pop{l}\t%0", xops);
13975 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13976 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13978 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13979 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13980 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13984 fprintf (file, "\tjmp ");
13985 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13986 fprintf (file, "\n");
13992 x86_field_alignment (field, computed)
13996 enum machine_mode mode;
13997 tree type = TREE_TYPE (field);
13999 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14001 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14002 ? get_inner_array_type (type) : type);
14003 if (mode == DFmode || mode == DCmode
14004 || GET_MODE_CLASS (mode) == MODE_INT
14005 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14006 return MIN (32, computed);
14010 /* Implement machine specific optimizations.
14011 At the moment we implement single transformation: AMD Athlon works faster
14012 when RET is not destination of conditional jump or directly preceeded
14013 by other jump instruction. We avoid the penalty by inserting NOP just
14014 before the RET instructions in such cases. */
14016 x86_machine_dependent_reorg (first)
14017 rtx first ATTRIBUTE_UNUSED;
14021 if (!TARGET_ATHLON || !optimize || optimize_size)
14023 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14025 basic_block bb = e->src;
14028 bool insert = false;
14030 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14032 prev = prev_nonnote_insn (ret);
14033 if (prev && GET_CODE (prev) == CODE_LABEL)
14036 for (e = bb->pred; e; e = e->pred_next)
14037 if (EDGE_FREQUENCY (e) && e->src->index > 0
14038 && !(e->flags & EDGE_FALLTHRU))
14043 prev = prev_real_insn (ret);
14044 if (prev && GET_CODE (prev) == JUMP_INSN
14045 && any_condjump_p (prev))
14049 emit_insn_before (gen_nop (), ret);
14053 #include "gt-i386.h"