1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
95 /* Processor costs (relative to an add) */
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
397 const struct processor_costs *ix86_cost = &pentium_cost;
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
452 #define FAST_PROLOGUE_INSN_COUNT 30
454 /* Set by prologue expander and used by epilogue expander to determine
456 static int use_fast_prologue_epilogue;
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
469 AREG, DREG, CREG, BREG,
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
489 /* The "default" register map used in 32bit mode. */
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
502 static int const x86_64_int_parameter_registers[6] =
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
508 static int const x86_64_int_return_registers[4] =
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
596 /* The encoding characters for the four TLS models present in ELF. */
598 static char const tls_model_chars[] = " GLil";
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
616 /* Structure describing stack frame layout.
617 Stack grows downward:
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
630 > to_allocate <- FRAME_POINTER
642 int outgoing_arguments_size;
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string;
657 enum cmodel ix86_cmodel;
659 const char *ix86_asm_string;
660 enum asm_dialect ix86_asm_dialect = ASM_ATT;
662 const char *ix86_tls_dialect_string;
663 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath;
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch;
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string; /* for -march=<xxx> */
676 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string;
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse;
684 /* ix86_regparm_string as a number */
687 /* Alignment to use for loops and jumps: */
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string;
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string;
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string;
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary;
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost;
703 const char *ix86_branch_cost_string;
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string;
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix[16];
710 static int internal_label_prefix_len;
712 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
713 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
714 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
717 static const char *get_some_local_dynamic_name PARAMS ((void));
718 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719 static rtx maybe_get_pool_constant PARAMS ((rtx));
720 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
721 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
723 static rtx get_thread_pointer PARAMS ((void));
724 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
725 static rtx gen_push PARAMS ((rtx));
726 static int memory_address_length PARAMS ((rtx addr));
727 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
729 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730 static void ix86_dump_ppro_packet PARAMS ((FILE *));
731 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
732 static struct machine_function * ix86_init_machine_status PARAMS ((void));
733 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
734 static int ix86_nsaved_regs PARAMS ((void));
735 static void ix86_emit_save_regs PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
737 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
738 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
739 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
740 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
741 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
742 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
743 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
745 static int ix86_issue_rate PARAMS ((void));
746 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747 static void ix86_sched_init PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
750 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins PARAMS ((void));
753 static rtx x86_this_parameter PARAMS ((tree));
754 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
755 HOST_WIDE_INT, tree));
756 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
757 HOST_WIDE_INT, tree));
761 rtx base, index, disp;
765 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
767 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
768 static const char *ix86_strip_name_encoding PARAMS ((const char *))
771 struct builtin_description;
772 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
774 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
776 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
777 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
778 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
779 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
780 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
781 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
782 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
786 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
788 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
789 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
790 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
791 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
792 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
793 static int ix86_save_reg PARAMS ((unsigned int, int));
794 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
795 static int ix86_comp_type_attributes PARAMS ((tree, tree));
796 static int ix86_fntype_regparm PARAMS ((tree));
797 const struct attribute_spec ix86_attribute_table[];
798 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
799 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
800 static int ix86_value_regno PARAMS ((enum machine_mode));
802 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
803 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
806 /* Register class used for passing given 64bit part of the argument.
807 These represent classes as documented by the PS ABI, with the exception
808 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
809 use SF or DFmode move instead of DImode to avoid reformating penalties.
811 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
812 whenever possible (upper half does contain padding).
814 enum x86_64_reg_class
817 X86_64_INTEGER_CLASS,
818 X86_64_INTEGERSI_CLASS,
827 static const char * const x86_64_reg_class_name[] =
828 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
830 #define MAX_CLASSES 4
831 static int classify_argument PARAMS ((enum machine_mode, tree,
832 enum x86_64_reg_class [MAX_CLASSES],
834 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
836 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
838 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
839 enum x86_64_reg_class));
841 /* Initialize the GCC target structure. */
842 #undef TARGET_ATTRIBUTE_TABLE
843 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
844 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
845 # undef TARGET_MERGE_DECL_ATTRIBUTES
846 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
849 #undef TARGET_COMP_TYPE_ATTRIBUTES
850 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
852 #undef TARGET_INIT_BUILTINS
853 #define TARGET_INIT_BUILTINS ix86_init_builtins
855 #undef TARGET_EXPAND_BUILTIN
856 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
858 #undef TARGET_ASM_FUNCTION_EPILOGUE
859 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
861 #undef TARGET_ASM_OPEN_PAREN
862 #define TARGET_ASM_OPEN_PAREN ""
863 #undef TARGET_ASM_CLOSE_PAREN
864 #define TARGET_ASM_CLOSE_PAREN ""
866 #undef TARGET_ASM_ALIGNED_HI_OP
867 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
868 #undef TARGET_ASM_ALIGNED_SI_OP
869 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
871 #undef TARGET_ASM_ALIGNED_DI_OP
872 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
875 #undef TARGET_ASM_UNALIGNED_HI_OP
876 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
877 #undef TARGET_ASM_UNALIGNED_SI_OP
878 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
879 #undef TARGET_ASM_UNALIGNED_DI_OP
880 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
882 #undef TARGET_SCHED_ADJUST_COST
883 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
884 #undef TARGET_SCHED_ISSUE_RATE
885 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
886 #undef TARGET_SCHED_VARIABLE_ISSUE
887 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
888 #undef TARGET_SCHED_INIT
889 #define TARGET_SCHED_INIT ix86_sched_init
890 #undef TARGET_SCHED_REORDER
891 #define TARGET_SCHED_REORDER ix86_sched_reorder
892 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
893 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
894 ia32_use_dfa_pipeline_interface
895 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
896 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
897 ia32_multipass_dfa_lookahead
900 #undef TARGET_HAVE_TLS
901 #define TARGET_HAVE_TLS true
904 #undef TARGET_ASM_OUTPUT_MI_THUNK
905 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
906 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
907 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
909 struct gcc_target targetm = TARGET_INITIALIZER;
911 /* Sometimes certain combinations of command options do not make
912 sense on a particular target machine. You can define a macro
913 `OVERRIDE_OPTIONS' to take account of this. This macro, if
914 defined, is executed once just after all the command options have
917 Don't use this macro to turn on various extra optimizations for
918 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
924 /* Comes from final.c -- no real reason to change it. */
925 #define MAX_CODE_ALIGN 16
929 const struct processor_costs *cost; /* Processor costs */
930 const int target_enable; /* Target flags to enable. */
931 const int target_disable; /* Target flags to disable. */
932 const int align_loop; /* Default alignments. */
933 const int align_loop_max_skip;
934 const int align_jump;
935 const int align_jump_max_skip;
936 const int align_func;
937 const int branch_cost;
939 const processor_target_table[PROCESSOR_max] =
941 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
942 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
943 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
944 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
945 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
946 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
947 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
950 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
953 const char *const name; /* processor name or nickname. */
954 const enum processor_type processor;
960 PTA_PREFETCH_SSE = 8,
965 const processor_alias_table[] =
967 {"i386", PROCESSOR_I386, 0},
968 {"i486", PROCESSOR_I486, 0},
969 {"i586", PROCESSOR_PENTIUM, 0},
970 {"pentium", PROCESSOR_PENTIUM, 0},
971 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
972 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
973 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
974 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
975 {"i686", PROCESSOR_PENTIUMPRO, 0},
976 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
977 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
978 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
979 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
980 PTA_MMX | PTA_PREFETCH_SSE},
981 {"k6", PROCESSOR_K6, PTA_MMX},
982 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
983 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
984 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
986 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
987 | PTA_3DNOW | PTA_3DNOW_A},
988 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
989 | PTA_3DNOW_A | PTA_SSE},
990 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
991 | PTA_3DNOW_A | PTA_SSE},
992 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
993 | PTA_3DNOW_A | PTA_SSE},
996 int const pta_size = ARRAY_SIZE (processor_alias_table);
998 /* By default our XFmode is the 80-bit extended format. If we have
999 use TFmode instead, it's also the 80-bit format, but with padding. */
1000 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1001 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1003 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1004 SUBTARGET_OVERRIDE_OPTIONS;
1007 if (!ix86_cpu_string && ix86_arch_string)
1008 ix86_cpu_string = ix86_arch_string;
1009 if (!ix86_cpu_string)
1010 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1011 if (!ix86_arch_string)
1012 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1014 if (ix86_cmodel_string != 0)
1016 if (!strcmp (ix86_cmodel_string, "small"))
1017 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1019 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1020 else if (!strcmp (ix86_cmodel_string, "32"))
1021 ix86_cmodel = CM_32;
1022 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1023 ix86_cmodel = CM_KERNEL;
1024 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1025 ix86_cmodel = CM_MEDIUM;
1026 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1027 ix86_cmodel = CM_LARGE;
1029 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1033 ix86_cmodel = CM_32;
1035 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1037 if (ix86_asm_string != 0)
1039 if (!strcmp (ix86_asm_string, "intel"))
1040 ix86_asm_dialect = ASM_INTEL;
1041 else if (!strcmp (ix86_asm_string, "att"))
1042 ix86_asm_dialect = ASM_ATT;
1044 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1046 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1047 error ("code model `%s' not supported in the %s bit mode",
1048 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1049 if (ix86_cmodel == CM_LARGE)
1050 sorry ("code model `large' not supported yet");
1051 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1052 sorry ("%i-bit mode not compiled in",
1053 (target_flags & MASK_64BIT) ? 64 : 32);
1055 for (i = 0; i < pta_size; i++)
1056 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1058 ix86_arch = processor_alias_table[i].processor;
1059 /* Default cpu tuning to the architecture. */
1060 ix86_cpu = ix86_arch;
1061 if (processor_alias_table[i].flags & PTA_MMX
1062 && !(target_flags_explicit & MASK_MMX))
1063 target_flags |= MASK_MMX;
1064 if (processor_alias_table[i].flags & PTA_3DNOW
1065 && !(target_flags_explicit & MASK_3DNOW))
1066 target_flags |= MASK_3DNOW;
1067 if (processor_alias_table[i].flags & PTA_3DNOW_A
1068 && !(target_flags_explicit & MASK_3DNOW_A))
1069 target_flags |= MASK_3DNOW_A;
1070 if (processor_alias_table[i].flags & PTA_SSE
1071 && !(target_flags_explicit & MASK_SSE))
1072 target_flags |= MASK_SSE;
1073 if (processor_alias_table[i].flags & PTA_SSE2
1074 && !(target_flags_explicit & MASK_SSE2))
1075 target_flags |= MASK_SSE2;
1076 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1077 x86_prefetch_sse = true;
1082 error ("bad value (%s) for -march= switch", ix86_arch_string);
1084 for (i = 0; i < pta_size; i++)
1085 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1087 ix86_cpu = processor_alias_table[i].processor;
1090 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1091 x86_prefetch_sse = true;
1093 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1096 ix86_cost = &size_cost;
1098 ix86_cost = processor_target_table[ix86_cpu].cost;
1099 target_flags |= processor_target_table[ix86_cpu].target_enable;
1100 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1102 /* Arrange to set up i386_stack_locals for all functions. */
1103 init_machine_status = ix86_init_machine_status;
1105 /* Validate -mregparm= value. */
1106 if (ix86_regparm_string)
1108 i = atoi (ix86_regparm_string);
1109 if (i < 0 || i > REGPARM_MAX)
1110 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1116 ix86_regparm = REGPARM_MAX;
1118 /* If the user has provided any of the -malign-* options,
1119 warn and use that value only if -falign-* is not set.
1120 Remove this code in GCC 3.2 or later. */
1121 if (ix86_align_loops_string)
1123 warning ("-malign-loops is obsolete, use -falign-loops");
1124 if (align_loops == 0)
1126 i = atoi (ix86_align_loops_string);
1127 if (i < 0 || i > MAX_CODE_ALIGN)
1128 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1130 align_loops = 1 << i;
1134 if (ix86_align_jumps_string)
1136 warning ("-malign-jumps is obsolete, use -falign-jumps");
1137 if (align_jumps == 0)
1139 i = atoi (ix86_align_jumps_string);
1140 if (i < 0 || i > MAX_CODE_ALIGN)
1141 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1143 align_jumps = 1 << i;
1147 if (ix86_align_funcs_string)
1149 warning ("-malign-functions is obsolete, use -falign-functions");
1150 if (align_functions == 0)
1152 i = atoi (ix86_align_funcs_string);
1153 if (i < 0 || i > MAX_CODE_ALIGN)
1154 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1156 align_functions = 1 << i;
1160 /* Default align_* from the processor table. */
1161 if (align_loops == 0)
1163 align_loops = processor_target_table[ix86_cpu].align_loop;
1164 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1166 if (align_jumps == 0)
1168 align_jumps = processor_target_table[ix86_cpu].align_jump;
1169 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1171 if (align_functions == 0)
1173 align_functions = processor_target_table[ix86_cpu].align_func;
1176 /* Validate -mpreferred-stack-boundary= value, or provide default.
1177 The default of 128 bits is for Pentium III's SSE __m128, but we
1178 don't want additional code to keep the stack aligned when
1179 optimizing for code size. */
1180 ix86_preferred_stack_boundary = (optimize_size
1181 ? TARGET_64BIT ? 128 : 32
1183 if (ix86_preferred_stack_boundary_string)
1185 i = atoi (ix86_preferred_stack_boundary_string);
1186 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1187 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1188 TARGET_64BIT ? 4 : 2);
1190 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1193 /* Validate -mbranch-cost= value, or provide default. */
1194 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1195 if (ix86_branch_cost_string)
1197 i = atoi (ix86_branch_cost_string);
1199 error ("-mbranch-cost=%d is not between 0 and 5", i);
1201 ix86_branch_cost = i;
1204 if (ix86_tls_dialect_string)
1206 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1207 ix86_tls_dialect = TLS_DIALECT_GNU;
1208 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1209 ix86_tls_dialect = TLS_DIALECT_SUN;
1211 error ("bad value (%s) for -mtls-dialect= switch",
1212 ix86_tls_dialect_string);
1216 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1218 /* Keep nonleaf frame pointers. */
1219 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1220 flag_omit_frame_pointer = 1;
1222 /* If we're doing fast math, we don't care about comparison order
1223 wrt NaNs. This lets us use a shorter comparison sequence. */
1224 if (flag_unsafe_math_optimizations)
1225 target_flags &= ~MASK_IEEE_FP;
1227 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1228 since the insns won't need emulation. */
1229 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1230 target_flags &= ~MASK_NO_FANCY_MATH_387;
1234 if (TARGET_ALIGN_DOUBLE)
1235 error ("-malign-double makes no sense in the 64bit mode");
1237 error ("-mrtd calling convention not supported in the 64bit mode");
1238 /* Enable by default the SSE and MMX builtins. */
1239 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1240 ix86_fpmath = FPMATH_SSE;
1243 ix86_fpmath = FPMATH_387;
1245 if (ix86_fpmath_string != 0)
1247 if (! strcmp (ix86_fpmath_string, "387"))
1248 ix86_fpmath = FPMATH_387;
1249 else if (! strcmp (ix86_fpmath_string, "sse"))
1253 warning ("SSE instruction set disabled, using 387 arithmetics");
1254 ix86_fpmath = FPMATH_387;
1257 ix86_fpmath = FPMATH_SSE;
1259 else if (! strcmp (ix86_fpmath_string, "387,sse")
1260 || ! strcmp (ix86_fpmath_string, "sse,387"))
1264 warning ("SSE instruction set disabled, using 387 arithmetics");
1265 ix86_fpmath = FPMATH_387;
1267 else if (!TARGET_80387)
1269 warning ("387 instruction set disabled, using SSE arithmetics");
1270 ix86_fpmath = FPMATH_SSE;
1273 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1276 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1279 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1283 target_flags |= MASK_MMX;
1284 x86_prefetch_sse = true;
1287 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1290 target_flags |= MASK_MMX;
1291 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1292 extensions it adds. */
1293 if (x86_3dnow_a & (1 << ix86_arch))
1294 target_flags |= MASK_3DNOW_A;
1296 if ((x86_accumulate_outgoing_args & CPUMASK)
1297 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1299 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1301 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1304 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1305 p = strchr (internal_label_prefix, 'X');
1306 internal_label_prefix_len = p - internal_label_prefix;
1312 optimization_options (level, size)
1314 int size ATTRIBUTE_UNUSED;
1316 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1317 make the problem with not enough registers even worse. */
1318 #ifdef INSN_SCHEDULING
1320 flag_schedule_insns = 0;
1322 if (TARGET_64BIT && optimize >= 1)
1323 flag_omit_frame_pointer = 1;
1326 flag_pcc_struct_return = 0;
1327 flag_asynchronous_unwind_tables = 1;
1330 flag_omit_frame_pointer = 0;
1333 /* Table of valid machine attributes. */
1334 const struct attribute_spec ix86_attribute_table[] =
1336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1337 /* Stdcall attribute says callee is responsible for popping arguments
1338 if they are not variable. */
1339 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1340 /* Cdecl attribute says the callee is a normal C declaration */
1341 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1342 /* Regparm attribute specifies how many integer arguments are to be
1343 passed in registers. */
1344 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1345 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1346 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1347 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1348 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1350 { NULL, 0, 0, false, false, false, NULL }
1353 /* Handle a "cdecl" or "stdcall" attribute;
1354 arguments as in struct attribute_spec.handler. */
1356 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1359 tree args ATTRIBUTE_UNUSED;
1360 int flags ATTRIBUTE_UNUSED;
1363 if (TREE_CODE (*node) != FUNCTION_TYPE
1364 && TREE_CODE (*node) != METHOD_TYPE
1365 && TREE_CODE (*node) != FIELD_DECL
1366 && TREE_CODE (*node) != TYPE_DECL)
1368 warning ("`%s' attribute only applies to functions",
1369 IDENTIFIER_POINTER (name));
1370 *no_add_attrs = true;
1375 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1376 *no_add_attrs = true;
1382 /* Handle a "regparm" attribute;
1383 arguments as in struct attribute_spec.handler. */
1385 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1389 int flags ATTRIBUTE_UNUSED;
1392 if (TREE_CODE (*node) != FUNCTION_TYPE
1393 && TREE_CODE (*node) != METHOD_TYPE
1394 && TREE_CODE (*node) != FIELD_DECL
1395 && TREE_CODE (*node) != TYPE_DECL)
1397 warning ("`%s' attribute only applies to functions",
1398 IDENTIFIER_POINTER (name));
1399 *no_add_attrs = true;
1405 cst = TREE_VALUE (args);
1406 if (TREE_CODE (cst) != INTEGER_CST)
1408 warning ("`%s' attribute requires an integer constant argument",
1409 IDENTIFIER_POINTER (name));
1410 *no_add_attrs = true;
1412 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1414 warning ("argument to `%s' attribute larger than %d",
1415 IDENTIFIER_POINTER (name), REGPARM_MAX);
1416 *no_add_attrs = true;
1423 /* Return 0 if the attributes for two types are incompatible, 1 if they
1424 are compatible, and 2 if they are nearly compatible (which causes a
1425 warning to be generated). */
1428 ix86_comp_type_attributes (type1, type2)
1432 /* Check for mismatch of non-default calling convention. */
1433 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1435 if (TREE_CODE (type1) != FUNCTION_TYPE)
1438 /* Check for mismatched return types (cdecl vs stdcall). */
1439 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1440 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1445 /* Return the regparm value for a fuctio with the indicated TYPE. */
1448 ix86_fntype_regparm (type)
1453 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1455 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1457 return ix86_regparm;
1460 /* Value is the number of bytes of arguments automatically
1461 popped when returning from a subroutine call.
1462 FUNDECL is the declaration node of the function (as a tree),
1463 FUNTYPE is the data type of the function (as a tree),
1464 or for a library call it is an identifier node for the subroutine name.
1465 SIZE is the number of bytes of arguments passed on the stack.
1467 On the 80386, the RTD insn may be used to pop them if the number
1468 of args is fixed, but if the number is variable then the caller
1469 must pop them all. RTD can't be used for library calls now
1470 because the library is compiled with the Unix compiler.
1471 Use of RTD is a selectable option, since it is incompatible with
1472 standard Unix calling sequences. If the option is not selected,
1473 the caller must always pop the args.
1475 The attribute stdcall is equivalent to RTD on a per module basis. */
1478 ix86_return_pops_args (fundecl, funtype, size)
1483 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1485 /* Cdecl functions override -mrtd, and never pop the stack. */
1486 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1488 /* Stdcall functions will pop the stack if not variable args. */
1489 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1493 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1494 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1495 == void_type_node)))
1499 /* Lose any fake structure return argument if it is passed on the stack. */
1500 if (aggregate_value_p (TREE_TYPE (funtype))
1503 int nregs = ix86_fntype_regparm (funtype);
1506 return GET_MODE_SIZE (Pmode);
1512 /* Argument support functions. */
1514 /* Return true when register may be used to pass function parameters. */
1516 ix86_function_arg_regno_p (regno)
1521 return (regno < REGPARM_MAX
1522 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1523 if (SSE_REGNO_P (regno) && TARGET_SSE)
1525 /* RAX is used as hidden argument to va_arg functions. */
1528 for (i = 0; i < REGPARM_MAX; i++)
1529 if (regno == x86_64_int_parameter_registers[i])
1534 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1535 for a call to a function whose data type is FNTYPE.
1536 For a library call, FNTYPE is 0. */
1539 init_cumulative_args (cum, fntype, libname)
1540 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1541 tree fntype; /* tree ptr for function decl */
1542 rtx libname; /* SYMBOL_REF of library name or 0 */
1544 static CUMULATIVE_ARGS zero_cum;
1545 tree param, next_param;
1547 if (TARGET_DEBUG_ARG)
1549 fprintf (stderr, "\ninit_cumulative_args (");
1551 fprintf (stderr, "fntype code = %s, ret code = %s",
1552 tree_code_name[(int) TREE_CODE (fntype)],
1553 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1555 fprintf (stderr, "no fntype");
1558 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1563 /* Set up the number of registers to use for passing arguments. */
1564 cum->nregs = ix86_regparm;
1565 cum->sse_nregs = SSE_REGPARM_MAX;
1566 if (fntype && !TARGET_64BIT)
1568 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1571 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1573 cum->maybe_vaarg = false;
1575 /* Determine if this function has variable arguments. This is
1576 indicated by the last argument being 'void_type_mode' if there
1577 are no variable arguments. If there are variable arguments, then
1578 we won't pass anything in registers */
1582 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1583 param != 0; param = next_param)
1585 next_param = TREE_CHAIN (param);
1586 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1590 cum->maybe_vaarg = true;
1594 if ((!fntype && !libname)
1595 || (fntype && !TYPE_ARG_TYPES (fntype)))
1596 cum->maybe_vaarg = 1;
1598 if (TARGET_DEBUG_ARG)
1599 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1604 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1605 of this code is to classify each 8bytes of incoming argument by the register
1606 class and assign registers accordingly. */
1608 /* Return the union class of CLASS1 and CLASS2.
1609 See the x86-64 PS ABI for details. */
1611 static enum x86_64_reg_class
1612 merge_classes (class1, class2)
1613 enum x86_64_reg_class class1, class2;
1615 /* Rule #1: If both classes are equal, this is the resulting class. */
1616 if (class1 == class2)
1619 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1621 if (class1 == X86_64_NO_CLASS)
1623 if (class2 == X86_64_NO_CLASS)
1626 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1627 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1628 return X86_64_MEMORY_CLASS;
1630 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1631 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1632 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1633 return X86_64_INTEGERSI_CLASS;
1634 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1635 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1636 return X86_64_INTEGER_CLASS;
1638 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1639 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1640 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1641 return X86_64_MEMORY_CLASS;
1643 /* Rule #6: Otherwise class SSE is used. */
1644 return X86_64_SSE_CLASS;
1647 /* Classify the argument of type TYPE and mode MODE.
1648 CLASSES will be filled by the register class used to pass each word
1649 of the operand. The number of words is returned. In case the parameter
1650 should be passed in memory, 0 is returned. As a special case for zero
1651 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1653 BIT_OFFSET is used internally for handling records and specifies offset
1654 of the offset in bits modulo 256 to avoid overflow cases.
1656 See the x86-64 PS ABI for details.
1660 classify_argument (mode, type, classes, bit_offset)
1661 enum machine_mode mode;
1663 enum x86_64_reg_class classes[MAX_CLASSES];
1667 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1668 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1670 /* Variable sized entities are always passed/returned in memory. */
1674 if (type && AGGREGATE_TYPE_P (type))
1678 enum x86_64_reg_class subclasses[MAX_CLASSES];
1680 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1684 for (i = 0; i < words; i++)
1685 classes[i] = X86_64_NO_CLASS;
1687 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1688 signalize memory class, so handle it as special case. */
1691 classes[0] = X86_64_NO_CLASS;
1695 /* Classify each field of record and merge classes. */
1696 if (TREE_CODE (type) == RECORD_TYPE)
1698 /* For classes first merge in the field of the subclasses. */
1699 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1701 tree bases = TYPE_BINFO_BASETYPES (type);
1702 int n_bases = TREE_VEC_LENGTH (bases);
1705 for (i = 0; i < n_bases; ++i)
1707 tree binfo = TREE_VEC_ELT (bases, i);
1709 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1710 tree type = BINFO_TYPE (binfo);
1712 num = classify_argument (TYPE_MODE (type),
1714 (offset + bit_offset) % 256);
1717 for (i = 0; i < num; i++)
1719 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1721 merge_classes (subclasses[i], classes[i + pos]);
1725 /* And now merge the fields of structure. */
1726 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1728 if (TREE_CODE (field) == FIELD_DECL)
1732 /* Bitfields are always classified as integer. Handle them
1733 early, since later code would consider them to be
1734 misaligned integers. */
1735 if (DECL_BIT_FIELD (field))
1737 for (i = int_bit_position (field) / 8 / 8;
1738 i < (int_bit_position (field)
1739 + tree_low_cst (DECL_SIZE (field), 0)
1742 merge_classes (X86_64_INTEGER_CLASS,
1747 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1748 TREE_TYPE (field), subclasses,
1749 (int_bit_position (field)
1750 + bit_offset) % 256);
1753 for (i = 0; i < num; i++)
1756 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1758 merge_classes (subclasses[i], classes[i + pos]);
1764 /* Arrays are handled as small records. */
1765 else if (TREE_CODE (type) == ARRAY_TYPE)
1768 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1769 TREE_TYPE (type), subclasses, bit_offset);
1773 /* The partial classes are now full classes. */
1774 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1775 subclasses[0] = X86_64_SSE_CLASS;
1776 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1777 subclasses[0] = X86_64_INTEGER_CLASS;
1779 for (i = 0; i < words; i++)
1780 classes[i] = subclasses[i % num];
1782 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1783 else if (TREE_CODE (type) == UNION_TYPE
1784 || TREE_CODE (type) == QUAL_UNION_TYPE)
1786 /* For classes first merge in the field of the subclasses. */
1787 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1789 tree bases = TYPE_BINFO_BASETYPES (type);
1790 int n_bases = TREE_VEC_LENGTH (bases);
1793 for (i = 0; i < n_bases; ++i)
1795 tree binfo = TREE_VEC_ELT (bases, i);
1797 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1798 tree type = BINFO_TYPE (binfo);
1800 num = classify_argument (TYPE_MODE (type),
1802 (offset + (bit_offset % 64)) % 256);
1805 for (i = 0; i < num; i++)
1807 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1809 merge_classes (subclasses[i], classes[i + pos]);
1813 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1815 if (TREE_CODE (field) == FIELD_DECL)
1818 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1819 TREE_TYPE (field), subclasses,
1823 for (i = 0; i < num; i++)
1824 classes[i] = merge_classes (subclasses[i], classes[i]);
1831 /* Final merger cleanup. */
1832 for (i = 0; i < words; i++)
1834 /* If one class is MEMORY, everything should be passed in
1836 if (classes[i] == X86_64_MEMORY_CLASS)
1839 /* The X86_64_SSEUP_CLASS should be always preceded by
1840 X86_64_SSE_CLASS. */
1841 if (classes[i] == X86_64_SSEUP_CLASS
1842 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1843 classes[i] = X86_64_SSE_CLASS;
1845 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1846 if (classes[i] == X86_64_X87UP_CLASS
1847 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1848 classes[i] = X86_64_SSE_CLASS;
1853 /* Compute alignment needed. We align all types to natural boundaries with
1854 exception of XFmode that is aligned to 64bits. */
1855 if (mode != VOIDmode && mode != BLKmode)
1857 int mode_alignment = GET_MODE_BITSIZE (mode);
1860 mode_alignment = 128;
1861 else if (mode == XCmode)
1862 mode_alignment = 256;
1863 /* Misaligned fields are always returned in memory. */
1864 if (bit_offset % mode_alignment)
1868 /* Classification of atomic types. */
1878 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1879 classes[0] = X86_64_INTEGERSI_CLASS;
1881 classes[0] = X86_64_INTEGER_CLASS;
1885 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1888 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1889 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1892 if (!(bit_offset % 64))
1893 classes[0] = X86_64_SSESF_CLASS;
1895 classes[0] = X86_64_SSE_CLASS;
1898 classes[0] = X86_64_SSEDF_CLASS;
1901 classes[0] = X86_64_X87_CLASS;
1902 classes[1] = X86_64_X87UP_CLASS;
1905 classes[0] = X86_64_X87_CLASS;
1906 classes[1] = X86_64_X87UP_CLASS;
1907 classes[2] = X86_64_X87_CLASS;
1908 classes[3] = X86_64_X87UP_CLASS;
1911 classes[0] = X86_64_SSEDF_CLASS;
1912 classes[1] = X86_64_SSEDF_CLASS;
1915 classes[0] = X86_64_SSE_CLASS;
1923 classes[0] = X86_64_SSE_CLASS;
1924 classes[1] = X86_64_SSEUP_CLASS;
1939 /* Examine the argument and return set number of register required in each
1940 class. Return 0 iff parameter should be passed in memory. */
1942 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1943 enum machine_mode mode;
1945 int *int_nregs, *sse_nregs;
1948 enum x86_64_reg_class class[MAX_CLASSES];
1949 int n = classify_argument (mode, type, class, 0);
1955 for (n--; n >= 0; n--)
1958 case X86_64_INTEGER_CLASS:
1959 case X86_64_INTEGERSI_CLASS:
1962 case X86_64_SSE_CLASS:
1963 case X86_64_SSESF_CLASS:
1964 case X86_64_SSEDF_CLASS:
1967 case X86_64_NO_CLASS:
1968 case X86_64_SSEUP_CLASS:
1970 case X86_64_X87_CLASS:
1971 case X86_64_X87UP_CLASS:
1975 case X86_64_MEMORY_CLASS:
1980 /* Construct container for the argument used by GCC interface. See
1981 FUNCTION_ARG for the detailed description. */
1983 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1984 enum machine_mode mode;
1987 int nintregs, nsseregs;
1991 enum machine_mode tmpmode;
1993 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1994 enum x86_64_reg_class class[MAX_CLASSES];
1998 int needed_sseregs, needed_intregs;
1999 rtx exp[MAX_CLASSES];
2002 n = classify_argument (mode, type, class, 0);
2003 if (TARGET_DEBUG_ARG)
2006 fprintf (stderr, "Memory class\n");
2009 fprintf (stderr, "Classes:");
2010 for (i = 0; i < n; i++)
2012 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2014 fprintf (stderr, "\n");
2019 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2021 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2024 /* First construct simple cases. Avoid SCmode, since we want to use
2025 single register to pass this type. */
2026 if (n == 1 && mode != SCmode)
2029 case X86_64_INTEGER_CLASS:
2030 case X86_64_INTEGERSI_CLASS:
2031 return gen_rtx_REG (mode, intreg[0]);
2032 case X86_64_SSE_CLASS:
2033 case X86_64_SSESF_CLASS:
2034 case X86_64_SSEDF_CLASS:
2035 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2036 case X86_64_X87_CLASS:
2037 return gen_rtx_REG (mode, FIRST_STACK_REG);
2038 case X86_64_NO_CLASS:
2039 /* Zero sized array, struct or class. */
2044 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2045 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2047 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2048 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2049 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2050 && class[1] == X86_64_INTEGER_CLASS
2051 && (mode == CDImode || mode == TImode)
2052 && intreg[0] + 1 == intreg[1])
2053 return gen_rtx_REG (mode, intreg[0]);
2055 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2056 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2057 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2059 /* Otherwise figure out the entries of the PARALLEL. */
2060 for (i = 0; i < n; i++)
2064 case X86_64_NO_CLASS:
2066 case X86_64_INTEGER_CLASS:
2067 case X86_64_INTEGERSI_CLASS:
2068 /* Merge TImodes on aligned occassions here too. */
2069 if (i * 8 + 8 > bytes)
2070 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2071 else if (class[i] == X86_64_INTEGERSI_CLASS)
2075 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2076 if (tmpmode == BLKmode)
2078 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2079 gen_rtx_REG (tmpmode, *intreg),
2083 case X86_64_SSESF_CLASS:
2084 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2085 gen_rtx_REG (SFmode,
2086 SSE_REGNO (sse_regno)),
2090 case X86_64_SSEDF_CLASS:
2091 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2092 gen_rtx_REG (DFmode,
2093 SSE_REGNO (sse_regno)),
2097 case X86_64_SSE_CLASS:
2098 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2099 tmpmode = TImode, i++;
2102 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2103 gen_rtx_REG (tmpmode,
2104 SSE_REGNO (sse_regno)),
2112 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2113 for (i = 0; i < nexps; i++)
2114 XVECEXP (ret, 0, i) = exp [i];
2118 /* Update the data in CUM to advance over an argument
2119 of mode MODE and data type TYPE.
2120 (TYPE is null for libcalls where that information may not be available.) */
2123 function_arg_advance (cum, mode, type, named)
2124 CUMULATIVE_ARGS *cum; /* current arg information */
2125 enum machine_mode mode; /* current arg mode */
2126 tree type; /* type of the argument or 0 if lib support */
2127 int named; /* whether or not the argument was named */
2130 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2131 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2133 if (TARGET_DEBUG_ARG)
2135 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2136 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2139 int int_nregs, sse_nregs;
2140 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2141 cum->words += words;
2142 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2144 cum->nregs -= int_nregs;
2145 cum->sse_nregs -= sse_nregs;
2146 cum->regno += int_nregs;
2147 cum->sse_regno += sse_nregs;
2150 cum->words += words;
2154 if (TARGET_SSE && mode == TImode)
2156 cum->sse_words += words;
2157 cum->sse_nregs -= 1;
2158 cum->sse_regno += 1;
2159 if (cum->sse_nregs <= 0)
2167 cum->words += words;
2168 cum->nregs -= words;
2169 cum->regno += words;
2171 if (cum->nregs <= 0)
2181 /* Define where to put the arguments to a function.
2182 Value is zero to push the argument on the stack,
2183 or a hard register in which to store the argument.
2185 MODE is the argument's machine mode.
2186 TYPE is the data type of the argument (as a tree).
2187 This is null for libcalls where that information may
2189 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2190 the preceding args and about the function being called.
2191 NAMED is nonzero if this argument is a named parameter
2192 (otherwise it is an extra parameter matching an ellipsis). */
2195 function_arg (cum, mode, type, named)
2196 CUMULATIVE_ARGS *cum; /* current arg information */
2197 enum machine_mode mode; /* current arg mode */
2198 tree type; /* type of the argument or 0 if lib support */
2199 int named; /* != 0 for normal args, == 0 for ... args */
2203 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2204 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2206 /* Handle an hidden AL argument containing number of registers for varargs
2207 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2209 if (mode == VOIDmode)
2212 return GEN_INT (cum->maybe_vaarg
2213 ? (cum->sse_nregs < 0
2221 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2222 &x86_64_int_parameter_registers [cum->regno],
2227 /* For now, pass fp/complex values on the stack. */
2236 if (words <= cum->nregs)
2237 ret = gen_rtx_REG (mode, cum->regno);
2241 ret = gen_rtx_REG (mode, cum->sse_regno);
2245 if (TARGET_DEBUG_ARG)
2248 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2249 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2252 print_simple_rtl (stderr, ret);
2254 fprintf (stderr, ", stack");
2256 fprintf (stderr, " )\n");
2262 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2266 ix86_function_arg_boundary (mode, type)
2267 enum machine_mode mode;
2272 return PARM_BOUNDARY;
2274 align = TYPE_ALIGN (type);
2276 align = GET_MODE_ALIGNMENT (mode);
2277 if (align < PARM_BOUNDARY)
2278 align = PARM_BOUNDARY;
2284 /* Return true if N is a possible register number of function value. */
2286 ix86_function_value_regno_p (regno)
2291 return ((regno) == 0
2292 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2293 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2295 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2296 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2297 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2300 /* Define how to find the value returned by a function.
2301 VALTYPE is the data type of the value (as a tree).
2302 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2303 otherwise, FUNC is 0. */
2305 ix86_function_value (valtype)
2310 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2311 REGPARM_MAX, SSE_REGPARM_MAX,
2312 x86_64_int_return_registers, 0);
2313 /* For zero sized structures, construct_continer return NULL, but we need
2314 to keep rest of compiler happy by returning meaningfull value. */
2316 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2320 return gen_rtx_REG (TYPE_MODE (valtype),
2321 ix86_value_regno (TYPE_MODE (valtype)));
2324 /* Return false iff type is returned in memory. */
2326 ix86_return_in_memory (type)
2329 int needed_intregs, needed_sseregs;
2332 return !examine_argument (TYPE_MODE (type), type, 1,
2333 &needed_intregs, &needed_sseregs);
2337 if (TYPE_MODE (type) == BLKmode
2338 || (VECTOR_MODE_P (TYPE_MODE (type))
2339 && int_size_in_bytes (type) == 8)
2340 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2341 && TYPE_MODE (type) != TFmode
2342 && !VECTOR_MODE_P (TYPE_MODE (type))))
2348 /* Define how to find the value returned by a library function
2349 assuming the value has mode MODE. */
2351 ix86_libcall_value (mode)
2352 enum machine_mode mode;
2362 return gen_rtx_REG (mode, FIRST_SSE_REG);
2365 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2367 return gen_rtx_REG (mode, 0);
2371 return gen_rtx_REG (mode, ix86_value_regno (mode));
2374 /* Given a mode, return the register to use for a return value. */
2377 ix86_value_regno (mode)
2378 enum machine_mode mode;
2380 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2381 return FIRST_FLOAT_REG;
2382 if (mode == TImode || VECTOR_MODE_P (mode))
2383 return FIRST_SSE_REG;
2387 /* Create the va_list data type. */
2390 ix86_build_va_list ()
2392 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2394 /* For i386 we use plain pointer to argument area. */
2396 return build_pointer_type (char_type_node);
2398 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2399 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2401 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2402 unsigned_type_node);
2403 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2404 unsigned_type_node);
2405 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2407 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2410 DECL_FIELD_CONTEXT (f_gpr) = record;
2411 DECL_FIELD_CONTEXT (f_fpr) = record;
2412 DECL_FIELD_CONTEXT (f_ovf) = record;
2413 DECL_FIELD_CONTEXT (f_sav) = record;
2415 TREE_CHAIN (record) = type_decl;
2416 TYPE_NAME (record) = type_decl;
2417 TYPE_FIELDS (record) = f_gpr;
2418 TREE_CHAIN (f_gpr) = f_fpr;
2419 TREE_CHAIN (f_fpr) = f_ovf;
2420 TREE_CHAIN (f_ovf) = f_sav;
2422 layout_type (record);
2424 /* The correct type is an array type of one element. */
2425 return build_array_type (record, build_index_type (size_zero_node));
2428 /* Perform any needed actions needed for a function that is receiving a
2429 variable number of arguments.
2433 MODE and TYPE are the mode and type of the current parameter.
2435 PRETEND_SIZE is a variable that should be set to the amount of stack
2436 that must be pushed by the prolog to pretend that our caller pushed
2439 Normally, this macro will push all remaining incoming registers on the
2440 stack and set PRETEND_SIZE to the length of the registers pushed. */
2443 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2444 CUMULATIVE_ARGS *cum;
2445 enum machine_mode mode;
2447 int *pretend_size ATTRIBUTE_UNUSED;
2451 CUMULATIVE_ARGS next_cum;
2452 rtx save_area = NULL_RTX, mem;
2465 /* Indicate to allocate space on the stack for varargs save area. */
2466 ix86_save_varrargs_registers = 1;
2468 fntype = TREE_TYPE (current_function_decl);
2469 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2470 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2471 != void_type_node));
2473 /* For varargs, we do not want to skip the dummy va_dcl argument.
2474 For stdargs, we do want to skip the last named argument. */
2477 function_arg_advance (&next_cum, mode, type, 1);
2480 save_area = frame_pointer_rtx;
2482 set = get_varargs_alias_set ();
2484 for (i = next_cum.regno; i < ix86_regparm; i++)
2486 mem = gen_rtx_MEM (Pmode,
2487 plus_constant (save_area, i * UNITS_PER_WORD));
2488 set_mem_alias_set (mem, set);
2489 emit_move_insn (mem, gen_rtx_REG (Pmode,
2490 x86_64_int_parameter_registers[i]));
2493 if (next_cum.sse_nregs)
2495 /* Now emit code to save SSE registers. The AX parameter contains number
2496 of SSE parameter regsiters used to call this function. We use
2497 sse_prologue_save insn template that produces computed jump across
2498 SSE saves. We need some preparation work to get this working. */
2500 label = gen_label_rtx ();
2501 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2503 /* Compute address to jump to :
2504 label - 5*eax + nnamed_sse_arguments*5 */
2505 tmp_reg = gen_reg_rtx (Pmode);
2506 nsse_reg = gen_reg_rtx (Pmode);
2507 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2508 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2509 gen_rtx_MULT (Pmode, nsse_reg,
2511 if (next_cum.sse_regno)
2514 gen_rtx_CONST (DImode,
2515 gen_rtx_PLUS (DImode,
2517 GEN_INT (next_cum.sse_regno * 4))));
2519 emit_move_insn (nsse_reg, label_ref);
2520 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2522 /* Compute address of memory block we save into. We always use pointer
2523 pointing 127 bytes after first byte to store - this is needed to keep
2524 instruction size limited by 4 bytes. */
2525 tmp_reg = gen_reg_rtx (Pmode);
2526 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2527 plus_constant (save_area,
2528 8 * REGPARM_MAX + 127)));
2529 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2530 set_mem_alias_set (mem, set);
2531 set_mem_align (mem, BITS_PER_WORD);
2533 /* And finally do the dirty job! */
2534 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2535 GEN_INT (next_cum.sse_regno), label));
2540 /* Implement va_start. */
2543 ix86_va_start (valist, nextarg)
2547 HOST_WIDE_INT words, n_gpr, n_fpr;
2548 tree f_gpr, f_fpr, f_ovf, f_sav;
2549 tree gpr, fpr, ovf, sav, t;
2551 /* Only 64bit target needs something special. */
2554 std_expand_builtin_va_start (valist, nextarg);
2558 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2559 f_fpr = TREE_CHAIN (f_gpr);
2560 f_ovf = TREE_CHAIN (f_fpr);
2561 f_sav = TREE_CHAIN (f_ovf);
2563 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2564 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2565 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2566 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2567 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2569 /* Count number of gp and fp argument registers used. */
2570 words = current_function_args_info.words;
2571 n_gpr = current_function_args_info.regno;
2572 n_fpr = current_function_args_info.sse_regno;
2574 if (TARGET_DEBUG_ARG)
2575 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2576 (int) words, (int) n_gpr, (int) n_fpr);
2578 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2579 build_int_2 (n_gpr * 8, 0));
2580 TREE_SIDE_EFFECTS (t) = 1;
2581 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2583 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2584 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2585 TREE_SIDE_EFFECTS (t) = 1;
2586 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2588 /* Find the overflow area. */
2589 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2591 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2592 build_int_2 (words * UNITS_PER_WORD, 0));
2593 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2594 TREE_SIDE_EFFECTS (t) = 1;
2595 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2597 /* Find the register save area.
2598 Prologue of the function save it right above stack frame. */
2599 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2600 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2601 TREE_SIDE_EFFECTS (t) = 1;
2602 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2605 /* Implement va_arg. */
2607 ix86_va_arg (valist, type)
2610 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2611 tree f_gpr, f_fpr, f_ovf, f_sav;
2612 tree gpr, fpr, ovf, sav, t;
2614 rtx lab_false, lab_over = NULL_RTX;
2618 /* Only 64bit target needs something special. */
2621 return std_expand_builtin_va_arg (valist, type);
2624 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2625 f_fpr = TREE_CHAIN (f_gpr);
2626 f_ovf = TREE_CHAIN (f_fpr);
2627 f_sav = TREE_CHAIN (f_ovf);
2629 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2630 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2631 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2632 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2633 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2635 size = int_size_in_bytes (type);
2636 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2638 container = construct_container (TYPE_MODE (type), type, 0,
2639 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2641 * Pull the value out of the saved registers ...
2644 addr_rtx = gen_reg_rtx (Pmode);
2648 rtx int_addr_rtx, sse_addr_rtx;
2649 int needed_intregs, needed_sseregs;
2652 lab_over = gen_label_rtx ();
2653 lab_false = gen_label_rtx ();
2655 examine_argument (TYPE_MODE (type), type, 0,
2656 &needed_intregs, &needed_sseregs);
2659 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2660 || TYPE_ALIGN (type) > 128);
2662 /* In case we are passing structure, verify that it is consetuctive block
2663 on the register save area. If not we need to do moves. */
2664 if (!need_temp && !REG_P (container))
2666 /* Verify that all registers are strictly consetuctive */
2667 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2671 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2673 rtx slot = XVECEXP (container, 0, i);
2674 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2675 || INTVAL (XEXP (slot, 1)) != i * 16)
2683 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2685 rtx slot = XVECEXP (container, 0, i);
2686 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2687 || INTVAL (XEXP (slot, 1)) != i * 8)
2694 int_addr_rtx = addr_rtx;
2695 sse_addr_rtx = addr_rtx;
2699 int_addr_rtx = gen_reg_rtx (Pmode);
2700 sse_addr_rtx = gen_reg_rtx (Pmode);
2702 /* First ensure that we fit completely in registers. */
2705 emit_cmp_and_jump_insns (expand_expr
2706 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2707 GEN_INT ((REGPARM_MAX - needed_intregs +
2708 1) * 8), GE, const1_rtx, SImode,
2713 emit_cmp_and_jump_insns (expand_expr
2714 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2715 GEN_INT ((SSE_REGPARM_MAX -
2716 needed_sseregs + 1) * 16 +
2717 REGPARM_MAX * 8), GE, const1_rtx,
2718 SImode, 1, lab_false);
2721 /* Compute index to start of area used for integer regs. */
2724 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2725 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2726 if (r != int_addr_rtx)
2727 emit_move_insn (int_addr_rtx, r);
2731 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2732 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2733 if (r != sse_addr_rtx)
2734 emit_move_insn (sse_addr_rtx, r);
2741 /* Never use the memory itself, as it has the alias set. */
2742 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2743 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2744 set_mem_alias_set (mem, get_varargs_alias_set ());
2745 set_mem_align (mem, BITS_PER_UNIT);
2747 for (i = 0; i < XVECLEN (container, 0); i++)
2749 rtx slot = XVECEXP (container, 0, i);
2750 rtx reg = XEXP (slot, 0);
2751 enum machine_mode mode = GET_MODE (reg);
2757 if (SSE_REGNO_P (REGNO (reg)))
2759 src_addr = sse_addr_rtx;
2760 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2764 src_addr = int_addr_rtx;
2765 src_offset = REGNO (reg) * 8;
2767 src_mem = gen_rtx_MEM (mode, src_addr);
2768 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2769 src_mem = adjust_address (src_mem, mode, src_offset);
2770 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2771 emit_move_insn (dest_mem, src_mem);
2778 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2779 build_int_2 (needed_intregs * 8, 0));
2780 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2781 TREE_SIDE_EFFECTS (t) = 1;
2782 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2787 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2788 build_int_2 (needed_sseregs * 16, 0));
2789 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2790 TREE_SIDE_EFFECTS (t) = 1;
2791 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2794 emit_jump_insn (gen_jump (lab_over));
2796 emit_label (lab_false);
2799 /* ... otherwise out of the overflow area. */
2801 /* Care for on-stack alignment if needed. */
2802 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2806 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2807 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2808 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2812 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2814 emit_move_insn (addr_rtx, r);
2817 build (PLUS_EXPR, TREE_TYPE (t), t,
2818 build_int_2 (rsize * UNITS_PER_WORD, 0));
2819 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2820 TREE_SIDE_EFFECTS (t) = 1;
2821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2824 emit_label (lab_over);
2829 /* Return nonzero if OP is either a i387 or SSE fp register. */
2831 any_fp_register_operand (op, mode)
2833 enum machine_mode mode ATTRIBUTE_UNUSED;
2835 return ANY_FP_REG_P (op);
2838 /* Return nonzero if OP is an i387 fp register. */
2840 fp_register_operand (op, mode)
2842 enum machine_mode mode ATTRIBUTE_UNUSED;
2844 return FP_REG_P (op);
2847 /* Return nonzero if OP is a non-fp register_operand. */
2849 register_and_not_any_fp_reg_operand (op, mode)
2851 enum machine_mode mode;
2853 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2856 /* Return nonzero of OP is a register operand other than an
2857 i387 fp register. */
2859 register_and_not_fp_reg_operand (op, mode)
2861 enum machine_mode mode;
2863 return register_operand (op, mode) && !FP_REG_P (op);
2866 /* Return nonzero if OP is general operand representable on x86_64. */
2869 x86_64_general_operand (op, mode)
2871 enum machine_mode mode;
2874 return general_operand (op, mode);
2875 if (nonimmediate_operand (op, mode))
2877 return x86_64_sign_extended_value (op, 1);
2880 /* Return nonzero if OP is general operand representable on x86_64
2881 as either sign extended or zero extended constant. */
2884 x86_64_szext_general_operand (op, mode)
2886 enum machine_mode mode;
2889 return general_operand (op, mode);
2890 if (nonimmediate_operand (op, mode))
2892 return x86_64_sign_extended_value (op, 1) || x86_64_zero_extended_value (op);
2895 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2898 x86_64_nonmemory_operand (op, mode)
2900 enum machine_mode mode;
2903 return nonmemory_operand (op, mode);
2904 if (register_operand (op, mode))
2906 return x86_64_sign_extended_value (op, 1);
2909 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2912 x86_64_movabs_operand (op, mode)
2914 enum machine_mode mode;
2916 if (!TARGET_64BIT || !flag_pic)
2917 return nonmemory_operand (op, mode);
2918 if (register_operand (op, mode) || x86_64_sign_extended_value (op, 0))
2920 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2925 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2928 x86_64_szext_nonmemory_operand (op, mode)
2930 enum machine_mode mode;
2933 return nonmemory_operand (op, mode);
2934 if (register_operand (op, mode))
2936 return x86_64_sign_extended_value (op, 0) || x86_64_zero_extended_value (op);
2939 /* Return nonzero if OP is immediate operand representable on x86_64. */
2942 x86_64_immediate_operand (op, mode)
2944 enum machine_mode mode;
2947 return immediate_operand (op, mode);
2948 return x86_64_sign_extended_value (op, 0);
2951 /* Return nonzero if OP is immediate operand representable on x86_64. */
2954 x86_64_zext_immediate_operand (op, mode)
2956 enum machine_mode mode ATTRIBUTE_UNUSED;
2958 return x86_64_zero_extended_value (op);
2961 /* Return nonzero if OP is (const_int 1), else return zero. */
2964 const_int_1_operand (op, mode)
2966 enum machine_mode mode ATTRIBUTE_UNUSED;
2968 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2971 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2972 for shift & compare patterns, as shifting by 0 does not change flags),
2973 else return zero. */
2976 const_int_1_31_operand (op, mode)
2978 enum machine_mode mode ATTRIBUTE_UNUSED;
2980 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2983 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2984 reference and a constant. */
2987 symbolic_operand (op, mode)
2989 enum machine_mode mode ATTRIBUTE_UNUSED;
2991 switch (GET_CODE (op))
2999 if (GET_CODE (op) == SYMBOL_REF
3000 || GET_CODE (op) == LABEL_REF
3001 || (GET_CODE (op) == UNSPEC
3002 && (XINT (op, 1) == UNSPEC_GOT
3003 || XINT (op, 1) == UNSPEC_GOTOFF
3004 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3006 if (GET_CODE (op) != PLUS
3007 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3011 if (GET_CODE (op) == SYMBOL_REF
3012 || GET_CODE (op) == LABEL_REF)
3014 /* Only @GOTOFF gets offsets. */
3015 if (GET_CODE (op) != UNSPEC
3016 || XINT (op, 1) != UNSPEC_GOTOFF)
3019 op = XVECEXP (op, 0, 0);
3020 if (GET_CODE (op) == SYMBOL_REF
3021 || GET_CODE (op) == LABEL_REF)
3030 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3033 pic_symbolic_operand (op, mode)
3035 enum machine_mode mode ATTRIBUTE_UNUSED;
3037 if (GET_CODE (op) != CONST)
3042 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3047 if (GET_CODE (op) == UNSPEC)
3049 if (GET_CODE (op) != PLUS
3050 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3053 if (GET_CODE (op) == UNSPEC)
3059 /* Return true if OP is a symbolic operand that resolves locally. */
3062 local_symbolic_operand (op, mode)
3064 enum machine_mode mode ATTRIBUTE_UNUSED;
3066 if (GET_CODE (op) == CONST
3067 && GET_CODE (XEXP (op, 0)) == PLUS
3068 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3069 && (ix86_cmodel != CM_SMALL_PIC
3070 || (INTVAL (XEXP (XEXP (op, 0), 1)) >= -16*1024*1024
3071 && INTVAL (XEXP (XEXP (op, 0), 1)) < 16*1024*1024)))
3072 op = XEXP (XEXP (op, 0), 0);
3074 if (GET_CODE (op) == LABEL_REF)
3077 if (GET_CODE (op) != SYMBOL_REF)
3080 /* These we've been told are local by varasm and encode_section_info
3082 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3085 /* There is, however, a not insubstantial body of code in the rest of
3086 the compiler that assumes it can just stick the results of
3087 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3088 /* ??? This is a hack. Should update the body of the compiler to
3089 always create a DECL an invoke targetm.encode_section_info. */
3090 if (strncmp (XSTR (op, 0), internal_label_prefix,
3091 internal_label_prefix_len) == 0)
3097 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3100 tls_symbolic_operand (op, mode)
3102 enum machine_mode mode ATTRIBUTE_UNUSED;
3104 const char *symbol_str;
3106 if (GET_CODE (op) != SYMBOL_REF)
3108 symbol_str = XSTR (op, 0);
3110 if (symbol_str[0] != '%')
3112 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3116 tls_symbolic_operand_1 (op, kind)
3118 enum tls_model kind;
3120 const char *symbol_str;
3122 if (GET_CODE (op) != SYMBOL_REF)
3124 symbol_str = XSTR (op, 0);
3126 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3130 global_dynamic_symbolic_operand (op, mode)
3132 enum machine_mode mode ATTRIBUTE_UNUSED;
3134 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3138 local_dynamic_symbolic_operand (op, mode)
3140 enum machine_mode mode ATTRIBUTE_UNUSED;
3142 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3146 initial_exec_symbolic_operand (op, mode)
3148 enum machine_mode mode ATTRIBUTE_UNUSED;
3150 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3154 local_exec_symbolic_operand (op, mode)
3156 enum machine_mode mode ATTRIBUTE_UNUSED;
3158 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3161 /* Test for a valid operand for a call instruction. Don't allow the
3162 arg pointer register or virtual regs since they may decay into
3163 reg + const, which the patterns can't handle. */
3166 call_insn_operand (op, mode)
3168 enum machine_mode mode ATTRIBUTE_UNUSED;
3170 /* Disallow indirect through a virtual register. This leads to
3171 compiler aborts when trying to eliminate them. */
3172 if (GET_CODE (op) == REG
3173 && (op == arg_pointer_rtx
3174 || op == frame_pointer_rtx
3175 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3176 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3179 /* Disallow `call 1234'. Due to varying assembler lameness this
3180 gets either rejected or translated to `call .+1234'. */
3181 if (GET_CODE (op) == CONST_INT)
3184 /* Explicitly allow SYMBOL_REF even if pic. */
3185 if (GET_CODE (op) == SYMBOL_REF)
3188 /* Otherwise we can allow any general_operand in the address. */
3189 return general_operand (op, Pmode);
3193 constant_call_address_operand (op, mode)
3195 enum machine_mode mode ATTRIBUTE_UNUSED;
3197 if (GET_CODE (op) == CONST
3198 && GET_CODE (XEXP (op, 0)) == PLUS
3199 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3200 op = XEXP (XEXP (op, 0), 0);
3201 return GET_CODE (op) == SYMBOL_REF;
3204 /* Match exactly zero and one. */
3207 const0_operand (op, mode)
3209 enum machine_mode mode;
3211 return op == CONST0_RTX (mode);
3215 const1_operand (op, mode)
3217 enum machine_mode mode ATTRIBUTE_UNUSED;
3219 return op == const1_rtx;
3222 /* Match 2, 4, or 8. Used for leal multiplicands. */
3225 const248_operand (op, mode)
3227 enum machine_mode mode ATTRIBUTE_UNUSED;
3229 return (GET_CODE (op) == CONST_INT
3230 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3233 /* True if this is a constant appropriate for an increment or decremenmt. */
3236 incdec_operand (op, mode)
3238 enum machine_mode mode ATTRIBUTE_UNUSED;
3240 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3241 registers, since carry flag is not set. */
3242 if (TARGET_PENTIUM4 && !optimize_size)
3244 return op == const1_rtx || op == constm1_rtx;
3247 /* Return nonzero if OP is acceptable as operand of DImode shift
3251 shiftdi_operand (op, mode)
3253 enum machine_mode mode ATTRIBUTE_UNUSED;
3256 return nonimmediate_operand (op, mode);
3258 return register_operand (op, mode);
3261 /* Return false if this is the stack pointer, or any other fake
3262 register eliminable to the stack pointer. Otherwise, this is
3265 This is used to prevent esp from being used as an index reg.
3266 Which would only happen in pathological cases. */
3269 reg_no_sp_operand (op, mode)
3271 enum machine_mode mode;
3274 if (GET_CODE (t) == SUBREG)
3276 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3279 return register_operand (op, mode);
3283 mmx_reg_operand (op, mode)
3285 enum machine_mode mode ATTRIBUTE_UNUSED;
3287 return MMX_REG_P (op);
3290 /* Return false if this is any eliminable register. Otherwise
3294 general_no_elim_operand (op, mode)
3296 enum machine_mode mode;
3299 if (GET_CODE (t) == SUBREG)
3301 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3302 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3303 || t == virtual_stack_dynamic_rtx)
3306 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3307 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3310 return general_operand (op, mode);
3313 /* Return false if this is any eliminable register. Otherwise
3314 register_operand or const_int. */
3317 nonmemory_no_elim_operand (op, mode)
3319 enum machine_mode mode;
3322 if (GET_CODE (t) == SUBREG)
3324 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3325 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3326 || t == virtual_stack_dynamic_rtx)
3329 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3332 /* Return false if this is any eliminable register or stack register,
3333 otherwise work like register_operand. */
3336 index_register_operand (op, mode)
3338 enum machine_mode mode;
3341 if (GET_CODE (t) == SUBREG)
3345 if (t == arg_pointer_rtx
3346 || t == frame_pointer_rtx
3347 || t == virtual_incoming_args_rtx
3348 || t == virtual_stack_vars_rtx
3349 || t == virtual_stack_dynamic_rtx
3350 || REGNO (t) == STACK_POINTER_REGNUM)
3353 return general_operand (op, mode);
3356 /* Return true if op is a Q_REGS class register. */
3359 q_regs_operand (op, mode)
3361 enum machine_mode mode;
3363 if (mode != VOIDmode && GET_MODE (op) != mode)
3365 if (GET_CODE (op) == SUBREG)
3366 op = SUBREG_REG (op);
3367 return ANY_QI_REG_P (op);
3370 /* Return true if op is a NON_Q_REGS class register. */
3373 non_q_regs_operand (op, mode)
3375 enum machine_mode mode;
3377 if (mode != VOIDmode && GET_MODE (op) != mode)
3379 if (GET_CODE (op) == SUBREG)
3380 op = SUBREG_REG (op);
3381 return NON_QI_REG_P (op);
3384 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3387 sse_comparison_operator (op, mode)
3389 enum machine_mode mode ATTRIBUTE_UNUSED;
3391 enum rtx_code code = GET_CODE (op);
3394 /* Operations supported directly. */
3404 /* These are equivalent to ones above in non-IEEE comparisons. */
3411 return !TARGET_IEEE_FP;
3416 /* Return 1 if OP is a valid comparison operator in valid mode. */
3418 ix86_comparison_operator (op, mode)
3420 enum machine_mode mode;
3422 enum machine_mode inmode;
3423 enum rtx_code code = GET_CODE (op);
3424 if (mode != VOIDmode && GET_MODE (op) != mode)
3426 if (GET_RTX_CLASS (code) != '<')
3428 inmode = GET_MODE (XEXP (op, 0));
3430 if (inmode == CCFPmode || inmode == CCFPUmode)
3432 enum rtx_code second_code, bypass_code;
3433 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3434 return (bypass_code == NIL && second_code == NIL);
3441 if (inmode == CCmode || inmode == CCGCmode
3442 || inmode == CCGOCmode || inmode == CCNOmode)
3445 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3446 if (inmode == CCmode)
3450 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3458 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3461 fcmov_comparison_operator (op, mode)
3463 enum machine_mode mode;
3465 enum machine_mode inmode;
3466 enum rtx_code code = GET_CODE (op);
3467 if (mode != VOIDmode && GET_MODE (op) != mode)
3469 if (GET_RTX_CLASS (code) != '<')
3471 inmode = GET_MODE (XEXP (op, 0));
3472 if (inmode == CCFPmode || inmode == CCFPUmode)
3474 enum rtx_code second_code, bypass_code;
3475 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3476 if (bypass_code != NIL || second_code != NIL)
3478 code = ix86_fp_compare_code_to_integer (code);
3480 /* i387 supports just limited amount of conditional codes. */
3483 case LTU: case GTU: case LEU: case GEU:
3484 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3487 case ORDERED: case UNORDERED:
3495 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3498 promotable_binary_operator (op, mode)
3500 enum machine_mode mode ATTRIBUTE_UNUSED;
3502 switch (GET_CODE (op))
3505 /* Modern CPUs have same latency for HImode and SImode multiply,
3506 but 386 and 486 do HImode multiply faster. */
3507 return ix86_cpu > PROCESSOR_I486;
3519 /* Nearly general operand, but accept any const_double, since we wish
3520 to be able to drop them into memory rather than have them get pulled
3524 cmp_fp_expander_operand (op, mode)
3526 enum machine_mode mode;
3528 if (mode != VOIDmode && mode != GET_MODE (op))
3530 if (GET_CODE (op) == CONST_DOUBLE)
3532 return general_operand (op, mode);
3535 /* Match an SI or HImode register for a zero_extract. */
3538 ext_register_operand (op, mode)
3540 enum machine_mode mode ATTRIBUTE_UNUSED;
3543 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3544 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3547 if (!register_operand (op, VOIDmode))
3550 /* Be curefull to accept only registers having upper parts. */
3551 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3552 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3555 /* Return 1 if this is a valid binary floating-point operation.
3556 OP is the expression matched, and MODE is its mode. */
3559 binary_fp_operator (op, mode)
3561 enum machine_mode mode;
3563 if (mode != VOIDmode && mode != GET_MODE (op))
3566 switch (GET_CODE (op))
3572 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3580 mult_operator (op, mode)
3582 enum machine_mode mode ATTRIBUTE_UNUSED;
3584 return GET_CODE (op) == MULT;
3588 div_operator (op, mode)
3590 enum machine_mode mode ATTRIBUTE_UNUSED;
3592 return GET_CODE (op) == DIV;
3596 arith_or_logical_operator (op, mode)
3598 enum machine_mode mode;
3600 return ((mode == VOIDmode || GET_MODE (op) == mode)
3601 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3602 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3605 /* Returns 1 if OP is memory operand with a displacement. */
3608 memory_displacement_operand (op, mode)
3610 enum machine_mode mode;
3612 struct ix86_address parts;
3614 if (! memory_operand (op, mode))
3617 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3620 return parts.disp != NULL_RTX;
3623 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3624 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3626 ??? It seems likely that this will only work because cmpsi is an
3627 expander, and no actual insns use this. */
3630 cmpsi_operand (op, mode)
3632 enum machine_mode mode;
3634 if (nonimmediate_operand (op, mode))
3637 if (GET_CODE (op) == AND
3638 && GET_MODE (op) == SImode
3639 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3640 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3641 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3642 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3643 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3644 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3650 /* Returns 1 if OP is memory operand that can not be represented by the
3654 long_memory_operand (op, mode)
3656 enum machine_mode mode;
3658 if (! memory_operand (op, mode))
3661 return memory_address_length (op) != 0;
3664 /* Return nonzero if the rtx is known aligned. */
3667 aligned_operand (op, mode)
3669 enum machine_mode mode;
3671 struct ix86_address parts;
3673 if (!general_operand (op, mode))
3676 /* Registers and immediate operands are always "aligned". */
3677 if (GET_CODE (op) != MEM)
3680 /* Don't even try to do any aligned optimizations with volatiles. */
3681 if (MEM_VOLATILE_P (op))
3686 /* Pushes and pops are only valid on the stack pointer. */
3687 if (GET_CODE (op) == PRE_DEC
3688 || GET_CODE (op) == POST_INC)
3691 /* Decode the address. */
3692 if (! ix86_decompose_address (op, &parts))
3695 if (parts.base && GET_CODE (parts.base) == SUBREG)
3696 parts.base = SUBREG_REG (parts.base);
3697 if (parts.index && GET_CODE (parts.index) == SUBREG)
3698 parts.index = SUBREG_REG (parts.index);
3700 /* Look for some component that isn't known to be aligned. */
3704 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3709 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3714 if (GET_CODE (parts.disp) != CONST_INT
3715 || (INTVAL (parts.disp) & 3) != 0)
3719 /* Didn't find one -- this must be an aligned address. */
3723 /* Return true if the constant is something that can be loaded with
3724 a special instruction. Only handle 0.0 and 1.0; others are less
3728 standard_80387_constant_p (x)
3731 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3733 /* Note that on the 80387, other constants, such as pi, that we should support
3734 too. On some machines, these are much slower to load as standard constant,
3735 than to load from doubles in memory. */
3736 if (x == CONST0_RTX (GET_MODE (x)))
3738 if (x == CONST1_RTX (GET_MODE (x)))
3743 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3746 standard_sse_constant_p (x)
3749 if (x == const0_rtx)
3751 return (x == CONST0_RTX (GET_MODE (x)));
3754 /* Returns 1 if OP contains a symbol reference */
3757 symbolic_reference_mentioned_p (op)
3760 register const char *fmt;
3763 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3766 fmt = GET_RTX_FORMAT (GET_CODE (op));
3767 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3773 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3774 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3778 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3785 /* Return 1 if it is appropriate to emit `ret' instructions in the
3786 body of a function. Do this only if the epilogue is simple, needing a
3787 couple of insns. Prior to reloading, we can't tell how many registers
3788 must be saved, so return 0 then. Return 0 if there is no frame
3789 marker to de-allocate.
3791 If NON_SAVING_SETJMP is defined and true, then it is not possible
3792 for the epilogue to be simple, so return 0. This is a special case
3793 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3794 until final, but jump_optimize may need to know sooner if a
3798 ix86_can_use_return_insn_p ()
3800 struct ix86_frame frame;
3802 #ifdef NON_SAVING_SETJMP
3803 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3807 if (! reload_completed || frame_pointer_needed)
3810 /* Don't allow more than 32 pop, since that's all we can do
3811 with one instruction. */
3812 if (current_function_pops_args
3813 && current_function_args_size >= 32768)
3816 ix86_compute_frame_layout (&frame);
3817 return frame.to_allocate == 0 && frame.nregs == 0;
3820 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3822 x86_64_sign_extended_value (value, allow_rip)
3826 switch (GET_CODE (value))
3828 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3829 to be at least 32 and this all acceptable constants are
3830 represented as CONST_INT. */
3832 if (HOST_BITS_PER_WIDE_INT == 32)
3836 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3837 return trunc_int_for_mode (val, SImode) == val;
3841 /* For certain code models, the symbolic references are known to fit.
3842 in CM_SMALL_PIC model we know it fits if it is local to the shared
3843 library. Don't count TLS SYMBOL_REFs here, since they should fit
3844 only if inside of UNSPEC handled below. */
3846 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
3848 && ix86_cmodel == CM_SMALL_PIC
3849 && (CONSTANT_POOL_ADDRESS_P (value)
3850 || SYMBOL_REF_FLAG (value))
3851 && ! tls_symbolic_operand (value, GET_MODE (value))));
3853 /* For certain code models, the code is near as well. */
3855 return ix86_cmodel != CM_LARGE
3856 && (allow_rip || ix86_cmodel != CM_SMALL_PIC);
3858 /* We also may accept the offsetted memory references in certain special
3861 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
3862 switch (XINT (XEXP (value, 0), 1))
3864 case UNSPEC_GOTPCREL:
3866 case UNSPEC_GOTNTPOFF:
3872 if (GET_CODE (XEXP (value, 0)) == PLUS)
3874 rtx op1 = XEXP (XEXP (value, 0), 0);
3875 rtx op2 = XEXP (XEXP (value, 0), 1);
3876 HOST_WIDE_INT offset;
3878 if (ix86_cmodel == CM_LARGE)
3880 if (GET_CODE (op2) != CONST_INT)
3882 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3883 switch (GET_CODE (op1))
3886 /* For CM_SMALL assume that latest object is 16MB before
3887 end of 31bits boundary. We may also accept pretty
3888 large negative constants knowing that all objects are
3889 in the positive half of address space. */
3890 if (ix86_cmodel == CM_SMALL
3891 && offset < 16*1024*1024
3892 && trunc_int_for_mode (offset, SImode) == offset)
3894 /* For CM_KERNEL we know that all object resist in the
3895 negative half of 32bits address space. We may not
3896 accept negative offsets, since they may be just off
3897 and we may accept pretty large positive ones. */
3898 if (ix86_cmodel == CM_KERNEL
3900 && trunc_int_for_mode (offset, SImode) == offset)
3902 /* For CM_SMALL_PIC, we can make similar assumptions
3903 as for CM_SMALL model, if we know the symbol is local
3904 to the shared library. Disallow any TLS symbols,
3905 since they should always be enclosed in an UNSPEC. */
3906 if (ix86_cmodel == CM_SMALL_PIC
3908 && (CONSTANT_POOL_ADDRESS_P (op1)
3909 || SYMBOL_REF_FLAG (op1))
3910 && ! tls_symbolic_operand (op1, GET_MODE (op1))
3911 && offset < 16*1024*1024
3912 && offset >= -16*1024*1024
3913 && trunc_int_for_mode (offset, SImode) == offset)
3917 /* These conditions are similar to SYMBOL_REF ones, just the
3918 constraints for code models differ. */
3919 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
3920 || (ix86_cmodel == CM_SMALL_PIC && allow_rip
3921 && offset >= -16*1024*1024))
3922 && offset < 16*1024*1024
3923 && trunc_int_for_mode (offset, SImode) == offset)
3925 if (ix86_cmodel == CM_KERNEL
3927 && trunc_int_for_mode (offset, SImode) == offset)
3931 switch (XINT (op1, 1))
3936 && trunc_int_for_mode (offset, SImode) == offset)
3950 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3952 x86_64_zero_extended_value (value)
3955 switch (GET_CODE (value))
3958 if (HOST_BITS_PER_WIDE_INT == 32)
3959 return (GET_MODE (value) == VOIDmode
3960 && !CONST_DOUBLE_HIGH (value));
3964 if (HOST_BITS_PER_WIDE_INT == 32)
3965 return INTVAL (value) >= 0;
3967 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3970 /* For certain code models, the symbolic references are known to fit. */
3972 return ix86_cmodel == CM_SMALL;
3974 /* For certain code models, the code is near as well. */
3976 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3978 /* We also may accept the offsetted memory references in certain special
3981 if (GET_CODE (XEXP (value, 0)) == PLUS)
3983 rtx op1 = XEXP (XEXP (value, 0), 0);
3984 rtx op2 = XEXP (XEXP (value, 0), 1);
3986 if (ix86_cmodel == CM_LARGE)
3988 switch (GET_CODE (op1))
3992 /* For small code model we may accept pretty large positive
3993 offsets, since one bit is available for free. Negative
3994 offsets are limited by the size of NULL pointer area
3995 specified by the ABI. */
3996 if (ix86_cmodel == CM_SMALL
3997 && GET_CODE (op2) == CONST_INT
3998 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3999 && (trunc_int_for_mode (INTVAL (op2), SImode)
4002 /* ??? For the kernel, we may accept adjustment of
4003 -0x10000000, since we know that it will just convert
4004 negative address space to positive, but perhaps this
4005 is not worthwhile. */
4008 /* These conditions are similar to SYMBOL_REF ones, just the
4009 constraints for code models differ. */
4010 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4011 && GET_CODE (op2) == CONST_INT
4012 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4013 && (trunc_int_for_mode (INTVAL (op2), SImode)
4027 /* Value should be nonzero if functions must have frame pointers.
4028 Zero means the frame pointer need not be set up (and parms may
4029 be accessed via the stack pointer) in functions that seem suitable. */
4032 ix86_frame_pointer_required ()
4034 /* If we accessed previous frames, then the generated code expects
4035 to be able to access the saved ebp value in our frame. */
4036 if (cfun->machine->accesses_prev_frame)
4039 /* Several x86 os'es need a frame pointer for other reasons,
4040 usually pertaining to setjmp. */
4041 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4044 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4045 the frame pointer by default. Turn it back on now if we've not
4046 got a leaf function. */
4047 if (TARGET_OMIT_LEAF_FRAME_POINTER
4048 && (!current_function_is_leaf || current_function_profile))
4054 /* Record that the current function accesses previous call frames. */
4057 ix86_setup_frame_addresses ()
4059 cfun->machine->accesses_prev_frame = 1;
4062 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4063 # define USE_HIDDEN_LINKONCE 1
4065 # define USE_HIDDEN_LINKONCE 0
4068 static int pic_labels_used;
4070 /* Fills in the label name that should be used for a pc thunk for
4071 the given register. */
4074 get_pc_thunk_name (name, regno)
4078 if (USE_HIDDEN_LINKONCE)
4079 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4081 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4085 /* This function generates code for -fpic that loads %ebx with
4086 the return address of the caller and then returns. */
4089 ix86_asm_file_end (file)
4095 for (regno = 0; regno < 8; ++regno)
4099 if (! ((pic_labels_used >> regno) & 1))
4102 get_pc_thunk_name (name, regno);
4104 if (USE_HIDDEN_LINKONCE)
4108 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4110 TREE_PUBLIC (decl) = 1;
4111 TREE_STATIC (decl) = 1;
4112 DECL_ONE_ONLY (decl) = 1;
4114 (*targetm.asm_out.unique_section) (decl, 0);
4115 named_section (decl, NULL, 0);
4117 (*targetm.asm_out.globalize_label) (file, name);
4118 fputs ("\t.hidden\t", file);
4119 assemble_name (file, name);
4121 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4126 ASM_OUTPUT_LABEL (file, name);
4129 xops[0] = gen_rtx_REG (SImode, regno);
4130 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4131 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4132 output_asm_insn ("ret", xops);
4136 /* Emit code for the SET_GOT patterns. */
4139 output_set_got (dest)
4145 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4147 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4149 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4152 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4154 output_asm_insn ("call\t%a2", xops);
4157 /* Output the "canonical" label name ("Lxx$pb") here too. This
4158 is what will be referred to by the Mach-O PIC subsystem. */
4159 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4161 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4162 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4165 output_asm_insn ("pop{l}\t%0", xops);
4170 get_pc_thunk_name (name, REGNO (dest));
4171 pic_labels_used |= 1 << REGNO (dest);
4173 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4174 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4175 output_asm_insn ("call\t%X2", xops);
4178 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4179 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4180 else if (!TARGET_MACHO)
4181 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4186 /* Generate an "push" pattern for input ARG. */
4192 return gen_rtx_SET (VOIDmode,
4194 gen_rtx_PRE_DEC (Pmode,
4195 stack_pointer_rtx)),
4199 /* Return >= 0 if there is an unused call-clobbered register available
4200 for the entire function. */
4203 ix86_select_alt_pic_regnum ()
4205 if (current_function_is_leaf && !current_function_profile)
4208 for (i = 2; i >= 0; --i)
4209 if (!regs_ever_live[i])
4213 return INVALID_REGNUM;
4216 /* Return 1 if we need to save REGNO. */
4218 ix86_save_reg (regno, maybe_eh_return)
4220 int maybe_eh_return;
4222 if (pic_offset_table_rtx
4223 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4224 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4225 || current_function_profile
4226 || current_function_calls_eh_return))
4228 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4233 if (current_function_calls_eh_return && maybe_eh_return)
4238 unsigned test = EH_RETURN_DATA_REGNO (i);
4239 if (test == INVALID_REGNUM)
4246 return (regs_ever_live[regno]
4247 && !call_used_regs[regno]
4248 && !fixed_regs[regno]
4249 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4252 /* Return number of registers to be saved on the stack. */
4260 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4261 if (ix86_save_reg (regno, true))
4266 /* Return the offset between two registers, one to be eliminated, and the other
4267 its replacement, at the start of a routine. */
4270 ix86_initial_elimination_offset (from, to)
4274 struct ix86_frame frame;
4275 ix86_compute_frame_layout (&frame);
4277 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4278 return frame.hard_frame_pointer_offset;
4279 else if (from == FRAME_POINTER_REGNUM
4280 && to == HARD_FRAME_POINTER_REGNUM)
4281 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4284 if (to != STACK_POINTER_REGNUM)
4286 else if (from == ARG_POINTER_REGNUM)
4287 return frame.stack_pointer_offset;
4288 else if (from != FRAME_POINTER_REGNUM)
4291 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4295 /* Fill structure ix86_frame about frame of currently computed function. */
4298 ix86_compute_frame_layout (frame)
4299 struct ix86_frame *frame;
4301 HOST_WIDE_INT total_size;
4302 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4304 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4305 HOST_WIDE_INT size = get_frame_size ();
4307 frame->nregs = ix86_nsaved_regs ();
4310 /* Skip return address and saved base pointer. */
4311 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4313 frame->hard_frame_pointer_offset = offset;
4315 /* Do some sanity checking of stack_alignment_needed and
4316 preferred_alignment, since i386 port is the only using those features
4317 that may break easily. */
4319 if (size && !stack_alignment_needed)
4321 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4323 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4325 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4328 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4329 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4331 /* Register save area */
4332 offset += frame->nregs * UNITS_PER_WORD;
4335 if (ix86_save_varrargs_registers)
4337 offset += X86_64_VARARGS_SIZE;
4338 frame->va_arg_size = X86_64_VARARGS_SIZE;
4341 frame->va_arg_size = 0;
4343 /* Align start of frame for local function. */
4344 frame->padding1 = ((offset + stack_alignment_needed - 1)
4345 & -stack_alignment_needed) - offset;
4347 offset += frame->padding1;
4349 /* Frame pointer points here. */
4350 frame->frame_pointer_offset = offset;
4354 /* Add outgoing arguments area. Can be skipped if we eliminated
4355 all the function calls as dead code. */
4356 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4358 offset += current_function_outgoing_args_size;
4359 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4362 frame->outgoing_arguments_size = 0;
4364 /* Align stack boundary. Only needed if we're calling another function
4366 if (!current_function_is_leaf || current_function_calls_alloca)
4367 frame->padding2 = ((offset + preferred_alignment - 1)
4368 & -preferred_alignment) - offset;
4370 frame->padding2 = 0;
4372 offset += frame->padding2;
4374 /* We've reached end of stack frame. */
4375 frame->stack_pointer_offset = offset;
4377 /* Size prologue needs to allocate. */
4378 frame->to_allocate =
4379 (size + frame->padding1 + frame->padding2
4380 + frame->outgoing_arguments_size + frame->va_arg_size);
4382 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4383 && current_function_is_leaf)
4385 frame->red_zone_size = frame->to_allocate;
4386 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4387 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4390 frame->red_zone_size = 0;
4391 frame->to_allocate -= frame->red_zone_size;
4392 frame->stack_pointer_offset -= frame->red_zone_size;
4394 fprintf (stderr, "nregs: %i\n", frame->nregs);
4395 fprintf (stderr, "size: %i\n", size);
4396 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4397 fprintf (stderr, "padding1: %i\n", frame->padding1);
4398 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4399 fprintf (stderr, "padding2: %i\n", frame->padding2);
4400 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4401 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4402 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4403 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4404 frame->hard_frame_pointer_offset);
4405 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4409 /* Emit code to save registers in the prologue. */
4412 ix86_emit_save_regs ()
4417 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4418 if (ix86_save_reg (regno, true))
4420 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4421 RTX_FRAME_RELATED_P (insn) = 1;
4425 /* Emit code to save registers using MOV insns. First register
4426 is restored from POINTER + OFFSET. */
4428 ix86_emit_save_regs_using_mov (pointer, offset)
4430 HOST_WIDE_INT offset;
4435 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4436 if (ix86_save_reg (regno, true))
4438 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4440 gen_rtx_REG (Pmode, regno));
4441 RTX_FRAME_RELATED_P (insn) = 1;
4442 offset += UNITS_PER_WORD;
4446 /* Expand the prologue into a bunch of separate insns. */
4449 ix86_expand_prologue ()
4453 struct ix86_frame frame;
4455 HOST_WIDE_INT allocate;
4459 use_fast_prologue_epilogue
4460 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4461 if (TARGET_PROLOGUE_USING_MOVE)
4462 use_mov = use_fast_prologue_epilogue;
4464 ix86_compute_frame_layout (&frame);
4466 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4467 slower on all targets. Also sdb doesn't like it. */
4469 if (frame_pointer_needed)
4471 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4472 RTX_FRAME_RELATED_P (insn) = 1;
4474 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4475 RTX_FRAME_RELATED_P (insn) = 1;
4478 allocate = frame.to_allocate;
4479 /* In case we are dealing only with single register and empty frame,
4480 push is equivalent of the mov+add sequence. */
4481 if (allocate == 0 && frame.nregs <= 1)
4485 ix86_emit_save_regs ();
4487 allocate += frame.nregs * UNITS_PER_WORD;
4491 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4493 insn = emit_insn (gen_pro_epilogue_adjust_stack
4494 (stack_pointer_rtx, stack_pointer_rtx,
4495 GEN_INT (-allocate)));
4496 RTX_FRAME_RELATED_P (insn) = 1;
4500 /* ??? Is this only valid for Win32? */
4507 arg0 = gen_rtx_REG (SImode, 0);
4508 emit_move_insn (arg0, GEN_INT (allocate));
4510 sym = gen_rtx_MEM (FUNCTION_MODE,
4511 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4512 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4514 CALL_INSN_FUNCTION_USAGE (insn)
4515 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4516 CALL_INSN_FUNCTION_USAGE (insn));
4520 if (!frame_pointer_needed || !frame.to_allocate)
4521 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4523 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4524 -frame.nregs * UNITS_PER_WORD);
4527 #ifdef SUBTARGET_PROLOGUE
4531 pic_reg_used = false;
4532 if (pic_offset_table_rtx
4533 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4534 || current_function_profile))
4536 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4538 if (alt_pic_reg_used != INVALID_REGNUM)
4539 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4541 pic_reg_used = true;
4546 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4548 /* Even with accurate pre-reload life analysis, we can wind up
4549 deleting all references to the pic register after reload.
4550 Consider if cross-jumping unifies two sides of a branch
4551 controled by a comparison vs the only read from a global.
4552 In which case, allow the set_got to be deleted, though we're
4553 too late to do anything about the ebx save in the prologue. */
4554 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4557 /* Prevent function calls from be scheduled before the call to mcount.
4558 In the pic_reg_used case, make sure that the got load isn't deleted. */
4559 if (current_function_profile)
4560 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4563 /* Emit code to restore saved registers using MOV insns. First register
4564 is restored from POINTER + OFFSET. */
4566 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4569 int maybe_eh_return;
4573 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4574 if (ix86_save_reg (regno, maybe_eh_return))
4576 emit_move_insn (gen_rtx_REG (Pmode, regno),
4577 adjust_address (gen_rtx_MEM (Pmode, pointer),
4579 offset += UNITS_PER_WORD;
4583 /* Restore function stack, frame, and registers. */
4586 ix86_expand_epilogue (style)
4590 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4591 struct ix86_frame frame;
4592 HOST_WIDE_INT offset;
4594 ix86_compute_frame_layout (&frame);
4596 /* Calculate start of saved registers relative to ebp. Special care
4597 must be taken for the normal return case of a function using
4598 eh_return: the eax and edx registers are marked as saved, but not
4599 restored along this path. */
4600 offset = frame.nregs;
4601 if (current_function_calls_eh_return && style != 2)
4603 offset *= -UNITS_PER_WORD;
4605 /* If we're only restoring one register and sp is not valid then
4606 using a move instruction to restore the register since it's
4607 less work than reloading sp and popping the register.
4609 The default code result in stack adjustment using add/lea instruction,
4610 while this code results in LEAVE instruction (or discrete equivalent),
4611 so it is profitable in some other cases as well. Especially when there
4612 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4613 and there is exactly one register to pop. This heruistic may need some
4614 tuning in future. */
4615 if ((!sp_valid && frame.nregs <= 1)
4616 || (TARGET_EPILOGUE_USING_MOVE
4617 && use_fast_prologue_epilogue
4618 && (frame.nregs > 1 || frame.to_allocate))
4619 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4620 || (frame_pointer_needed && TARGET_USE_LEAVE
4621 && use_fast_prologue_epilogue && frame.nregs == 1)
4622 || current_function_calls_eh_return)
4624 /* Restore registers. We can use ebp or esp to address the memory
4625 locations. If both are available, default to ebp, since offsets
4626 are known to be small. Only exception is esp pointing directly to the
4627 end of block of saved registers, where we may simplify addressing
4630 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4631 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4632 frame.to_allocate, style == 2);
4634 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4635 offset, style == 2);
4637 /* eh_return epilogues need %ecx added to the stack pointer. */
4640 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4642 if (frame_pointer_needed)
4644 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4645 tmp = plus_constant (tmp, UNITS_PER_WORD);
4646 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4648 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4649 emit_move_insn (hard_frame_pointer_rtx, tmp);
4651 emit_insn (gen_pro_epilogue_adjust_stack
4652 (stack_pointer_rtx, sa, const0_rtx));
4656 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4657 tmp = plus_constant (tmp, (frame.to_allocate
4658 + frame.nregs * UNITS_PER_WORD));
4659 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4662 else if (!frame_pointer_needed)
4663 emit_insn (gen_pro_epilogue_adjust_stack
4664 (stack_pointer_rtx, stack_pointer_rtx,
4665 GEN_INT (frame.to_allocate
4666 + frame.nregs * UNITS_PER_WORD)));
4667 /* If not an i386, mov & pop is faster than "leave". */
4668 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4669 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4672 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4673 hard_frame_pointer_rtx,
4676 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4678 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4683 /* First step is to deallocate the stack frame so that we can
4684 pop the registers. */
4687 if (!frame_pointer_needed)
4689 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4690 hard_frame_pointer_rtx,
4693 else if (frame.to_allocate)
4694 emit_insn (gen_pro_epilogue_adjust_stack
4695 (stack_pointer_rtx, stack_pointer_rtx,
4696 GEN_INT (frame.to_allocate)));
4698 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4699 if (ix86_save_reg (regno, false))
4702 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4704 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4706 if (frame_pointer_needed)
4708 /* Leave results in shorter dependency chains on CPUs that are
4709 able to grok it fast. */
4710 if (TARGET_USE_LEAVE)
4711 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4712 else if (TARGET_64BIT)
4713 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4715 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4719 /* Sibcall epilogues don't want a return instruction. */
4723 if (current_function_pops_args && current_function_args_size)
4725 rtx popc = GEN_INT (current_function_pops_args);
4727 /* i386 can only pop 64K bytes. If asked to pop more, pop
4728 return address, do explicit add, and jump indirectly to the
4731 if (current_function_pops_args >= 65536)
4733 rtx ecx = gen_rtx_REG (SImode, 2);
4735 /* There are is no "pascal" calling convention in 64bit ABI. */
4739 emit_insn (gen_popsi1 (ecx));
4740 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4741 emit_jump_insn (gen_return_indirect_internal (ecx));
4744 emit_jump_insn (gen_return_pop_internal (popc));
4747 emit_jump_insn (gen_return_internal ());
4750 /* Reset from the function's potential modifications. */
4753 ix86_output_function_epilogue (file, size)
4754 FILE *file ATTRIBUTE_UNUSED;
4755 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4757 if (pic_offset_table_rtx)
4758 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4761 /* Extract the parts of an RTL expression that is a valid memory address
4762 for an instruction. Return 0 if the structure of the address is
4763 grossly off. Return -1 if the address contains ASHIFT, so it is not
4764 strictly valid, but still used for computing length of lea instruction.
4768 ix86_decompose_address (addr, out)
4770 struct ix86_address *out;
4772 rtx base = NULL_RTX;
4773 rtx index = NULL_RTX;
4774 rtx disp = NULL_RTX;
4775 HOST_WIDE_INT scale = 1;
4776 rtx scale_rtx = NULL_RTX;
4779 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4781 else if (GET_CODE (addr) == PLUS)
4783 rtx op0 = XEXP (addr, 0);
4784 rtx op1 = XEXP (addr, 1);
4785 enum rtx_code code0 = GET_CODE (op0);
4786 enum rtx_code code1 = GET_CODE (op1);
4788 if (code0 == REG || code0 == SUBREG)
4790 if (code1 == REG || code1 == SUBREG)
4791 index = op0, base = op1; /* index + base */
4793 base = op0, disp = op1; /* base + displacement */
4795 else if (code0 == MULT)
4797 index = XEXP (op0, 0);
4798 scale_rtx = XEXP (op0, 1);
4799 if (code1 == REG || code1 == SUBREG)
4800 base = op1; /* index*scale + base */
4802 disp = op1; /* index*scale + disp */
4804 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4806 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4807 scale_rtx = XEXP (XEXP (op0, 0), 1);
4808 base = XEXP (op0, 1);
4811 else if (code0 == PLUS)
4813 index = XEXP (op0, 0); /* index + base + disp */
4814 base = XEXP (op0, 1);
4820 else if (GET_CODE (addr) == MULT)
4822 index = XEXP (addr, 0); /* index*scale */
4823 scale_rtx = XEXP (addr, 1);
4825 else if (GET_CODE (addr) == ASHIFT)
4829 /* We're called for lea too, which implements ashift on occasion. */
4830 index = XEXP (addr, 0);
4831 tmp = XEXP (addr, 1);
4832 if (GET_CODE (tmp) != CONST_INT)
4834 scale = INTVAL (tmp);
4835 if ((unsigned HOST_WIDE_INT) scale > 3)
4841 disp = addr; /* displacement */
4843 /* Extract the integral value of scale. */
4846 if (GET_CODE (scale_rtx) != CONST_INT)
4848 scale = INTVAL (scale_rtx);
4851 /* Allow arg pointer and stack pointer as index if there is not scaling */
4852 if (base && index && scale == 1
4853 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4854 || index == stack_pointer_rtx))
4861 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4862 if ((base == hard_frame_pointer_rtx
4863 || base == frame_pointer_rtx
4864 || base == arg_pointer_rtx) && !disp)
4867 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4868 Avoid this by transforming to [%esi+0]. */
4869 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4870 && base && !index && !disp
4872 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4875 /* Special case: encode reg+reg instead of reg*2. */
4876 if (!base && index && scale && scale == 2)
4877 base = index, scale = 1;
4879 /* Special case: scaling cannot be encoded without base or displacement. */
4880 if (!base && !disp && index && scale != 1)
4891 /* Return cost of the memory address x.
4892 For i386, it is better to use a complex address than let gcc copy
4893 the address into a reg and make a new pseudo. But not if the address
4894 requires to two regs - that would mean more pseudos with longer
4897 ix86_address_cost (x)
4900 struct ix86_address parts;
4903 if (!ix86_decompose_address (x, &parts))
4906 if (parts.base && GET_CODE (parts.base) == SUBREG)
4907 parts.base = SUBREG_REG (parts.base);
4908 if (parts.index && GET_CODE (parts.index) == SUBREG)
4909 parts.index = SUBREG_REG (parts.index);
4911 /* More complex memory references are better. */
4912 if (parts.disp && parts.disp != const0_rtx)
4915 /* Attempt to minimize number of registers in the address. */
4917 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4919 && (!REG_P (parts.index)
4920 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4924 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4926 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4927 && parts.base != parts.index)
4930 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4931 since it's predecode logic can't detect the length of instructions
4932 and it degenerates to vector decoded. Increase cost of such
4933 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4934 to split such addresses or even refuse such addresses at all.
4936 Following addressing modes are affected:
4941 The first and last case may be avoidable by explicitly coding the zero in
4942 memory address, but I don't have AMD-K6 machine handy to check this
4946 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4947 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4948 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4954 /* If X is a machine specific address (i.e. a symbol or label being
4955 referenced as a displacement from the GOT implemented using an
4956 UNSPEC), then return the base term. Otherwise return X. */
4959 ix86_find_base_term (x)
4966 if (GET_CODE (x) != CONST)
4969 if (GET_CODE (term) == PLUS
4970 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4971 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4972 term = XEXP (term, 0);
4973 if (GET_CODE (term) != UNSPEC
4974 || XINT (term, 1) != UNSPEC_GOTPCREL)
4977 term = XVECEXP (term, 0, 0);
4979 if (GET_CODE (term) != SYMBOL_REF
4980 && GET_CODE (term) != LABEL_REF)
4986 if (GET_CODE (x) != PLUS
4987 || XEXP (x, 0) != pic_offset_table_rtx
4988 || GET_CODE (XEXP (x, 1)) != CONST)
4991 term = XEXP (XEXP (x, 1), 0);
4993 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4994 term = XEXP (term, 0);
4996 if (GET_CODE (term) != UNSPEC
4997 || XINT (term, 1) != UNSPEC_GOTOFF)
5000 term = XVECEXP (term, 0, 0);
5002 if (GET_CODE (term) != SYMBOL_REF
5003 && GET_CODE (term) != LABEL_REF)
5009 /* Determine if a given RTX is a valid constant. We already know this
5010 satisfies CONSTANT_P. */
5013 legitimate_constant_p (x)
5018 switch (GET_CODE (x))
5021 /* TLS symbols are not constant. */
5022 if (tls_symbolic_operand (x, Pmode))
5027 inner = XEXP (x, 0);
5029 /* Offsets of TLS symbols are never valid.
5030 Discourage CSE from creating them. */
5031 if (GET_CODE (inner) == PLUS
5032 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5035 /* Only some unspecs are valid as "constants". */
5036 if (GET_CODE (inner) == UNSPEC)
5037 switch (XINT (inner, 1))
5040 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5050 /* Otherwise we handle everything else in the move patterns. */
5054 /* Determine if a given RTX is a valid constant address. */
5057 constant_address_p (x)
5060 switch (GET_CODE (x))
5067 return TARGET_64BIT;
5070 /* For Mach-O, really believe the CONST. */
5073 /* Otherwise fall through. */
5075 return !flag_pic && legitimate_constant_p (x);
5082 /* Nonzero if the constant value X is a legitimate general operand
5083 when generating PIC code. It is given that flag_pic is on and
5084 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5087 legitimate_pic_operand_p (x)
5092 switch (GET_CODE (x))
5095 inner = XEXP (x, 0);
5097 /* Only some unspecs are valid as "constants". */
5098 if (GET_CODE (inner) == UNSPEC)
5099 switch (XINT (inner, 1))
5102 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5110 return legitimate_pic_address_disp_p (x);
5117 /* Determine if a given CONST RTX is a valid memory displacement
5121 legitimate_pic_address_disp_p (disp)
5126 /* In 64bit mode we can allow direct addresses of symbols and labels
5127 when they are not dynamic symbols. */
5128 if (TARGET_64BIT && local_symbolic_operand (disp, Pmode))
5130 if (GET_CODE (disp) != CONST)
5132 disp = XEXP (disp, 0);
5136 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5137 of GOT tables. We should not need these anyway. */
5138 if (GET_CODE (disp) != UNSPEC
5139 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5142 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5143 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5149 if (GET_CODE (disp) == PLUS)
5151 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5153 disp = XEXP (disp, 0);
5157 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5158 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5160 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5161 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5162 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5164 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5165 if (strstr (sym_name, "$pb") != 0)
5170 if (GET_CODE (disp) != UNSPEC)
5173 switch (XINT (disp, 1))
5178 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5180 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5181 case UNSPEC_GOTTPOFF:
5182 case UNSPEC_GOTNTPOFF:
5183 case UNSPEC_INDNTPOFF:
5186 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5188 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5190 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5196 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5197 memory address for an instruction. The MODE argument is the machine mode
5198 for the MEM expression that wants to use this address.
5200 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5201 convert common non-canonical forms to canonical form so that they will
5205 legitimate_address_p (mode, addr, strict)
5206 enum machine_mode mode;
5210 struct ix86_address parts;
5211 rtx base, index, disp;
5212 HOST_WIDE_INT scale;
5213 const char *reason = NULL;
5214 rtx reason_rtx = NULL_RTX;
5216 if (TARGET_DEBUG_ADDR)
5219 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5220 GET_MODE_NAME (mode), strict);
5224 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5226 if (TARGET_DEBUG_ADDR)
5227 fprintf (stderr, "Success.\n");
5231 if (ix86_decompose_address (addr, &parts) <= 0)
5233 reason = "decomposition failed";
5238 index = parts.index;
5240 scale = parts.scale;
5242 /* Validate base register.
5244 Don't allow SUBREG's here, it can lead to spill failures when the base
5245 is one word out of a two word structure, which is represented internally
5253 if (GET_CODE (base) == SUBREG)
5254 reg = SUBREG_REG (base);
5258 if (GET_CODE (reg) != REG)
5260 reason = "base is not a register";
5264 if (GET_MODE (base) != Pmode)
5266 reason = "base is not in Pmode";
5270 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5271 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5273 reason = "base is not valid";
5278 /* Validate index register.
5280 Don't allow SUBREG's here, it can lead to spill failures when the index
5281 is one word out of a two word structure, which is represented internally
5289 if (GET_CODE (index) == SUBREG)
5290 reg = SUBREG_REG (index);
5294 if (GET_CODE (reg) != REG)
5296 reason = "index is not a register";
5300 if (GET_MODE (index) != Pmode)
5302 reason = "index is not in Pmode";
5306 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5307 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5309 reason = "index is not valid";
5314 /* Validate scale factor. */
5317 reason_rtx = GEN_INT (scale);
5320 reason = "scale without index";
5324 if (scale != 2 && scale != 4 && scale != 8)
5326 reason = "scale is not a valid multiplier";
5331 /* Validate displacement. */
5338 if (!x86_64_sign_extended_value (disp, !(index || base)))
5340 reason = "displacement is out of range";
5346 if (GET_CODE (disp) == CONST_DOUBLE)
5348 reason = "displacement is a const_double";
5353 if (GET_CODE (disp) == CONST
5354 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5355 switch (XINT (XEXP (disp, 0), 1))
5359 case UNSPEC_GOTPCREL:
5362 goto is_legitimate_pic;
5364 case UNSPEC_GOTTPOFF:
5365 case UNSPEC_GOTNTPOFF:
5366 case UNSPEC_INDNTPOFF:
5372 reason = "invalid address unspec";
5376 else if (flag_pic && (SYMBOLIC_CONST (disp)
5378 && !machopic_operand_p (disp)
5383 if (TARGET_64BIT && (index || base))
5385 /* foo@dtpoff(%rX) is ok. */
5386 if (GET_CODE (disp) != CONST
5387 || GET_CODE (XEXP (disp, 0)) != PLUS
5388 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5389 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5390 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5391 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5393 reason = "non-constant pic memory reference";
5397 else if (! legitimate_pic_address_disp_p (disp))
5399 reason = "displacement is an invalid pic construct";
5403 /* This code used to verify that a symbolic pic displacement
5404 includes the pic_offset_table_rtx register.
5406 While this is good idea, unfortunately these constructs may
5407 be created by "adds using lea" optimization for incorrect
5416 This code is nonsensical, but results in addressing
5417 GOT table with pic_offset_table_rtx base. We can't
5418 just refuse it easily, since it gets matched by
5419 "addsi3" pattern, that later gets split to lea in the
5420 case output register differs from input. While this
5421 can be handled by separate addsi pattern for this case
5422 that never results in lea, this seems to be easier and
5423 correct fix for crash to disable this test. */
5425 else if (!CONSTANT_ADDRESS_P (disp))
5427 reason = "displacement is not constant";
5432 /* Everything looks valid. */
5433 if (TARGET_DEBUG_ADDR)
5434 fprintf (stderr, "Success.\n");
5438 if (TARGET_DEBUG_ADDR)
5440 fprintf (stderr, "Error: %s\n", reason);
5441 debug_rtx (reason_rtx);
5446 /* Return an unique alias set for the GOT. */
5448 static HOST_WIDE_INT
5449 ix86_GOT_alias_set ()
5451 static HOST_WIDE_INT set = -1;
5453 set = new_alias_set ();
5457 /* Return a legitimate reference for ORIG (an address) using the
5458 register REG. If REG is 0, a new pseudo is generated.
5460 There are two types of references that must be handled:
5462 1. Global data references must load the address from the GOT, via
5463 the PIC reg. An insn is emitted to do this load, and the reg is
5466 2. Static data references, constant pool addresses, and code labels
5467 compute the address as an offset from the GOT, whose base is in
5468 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5469 differentiate them from global data objects. The returned
5470 address is the PIC reg + an unspec constant.
5472 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5473 reg also appears in the address. */
5476 legitimize_pic_address (orig, reg)
5486 reg = gen_reg_rtx (Pmode);
5487 /* Use the generic Mach-O PIC machinery. */
5488 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5491 if (local_symbolic_operand (addr, Pmode))
5493 /* In 64bit mode we can address such objects directly. */
5498 /* This symbol may be referenced via a displacement from the PIC
5499 base address (@GOTOFF). */
5501 if (reload_in_progress)
5502 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5503 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5504 new = gen_rtx_CONST (Pmode, new);
5505 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5509 emit_move_insn (reg, new);
5514 else if (GET_CODE (addr) == SYMBOL_REF)
5518 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5519 new = gen_rtx_CONST (Pmode, new);
5520 new = gen_rtx_MEM (Pmode, new);
5521 RTX_UNCHANGING_P (new) = 1;
5522 set_mem_alias_set (new, ix86_GOT_alias_set ());
5525 reg = gen_reg_rtx (Pmode);
5526 /* Use directly gen_movsi, otherwise the address is loaded
5527 into register for CSE. We don't want to CSE this addresses,
5528 instead we CSE addresses from the GOT table, so skip this. */
5529 emit_insn (gen_movsi (reg, new));
5534 /* This symbol must be referenced via a load from the
5535 Global Offset Table (@GOT). */
5537 if (reload_in_progress)
5538 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5539 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5540 new = gen_rtx_CONST (Pmode, new);
5541 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5542 new = gen_rtx_MEM (Pmode, new);
5543 RTX_UNCHANGING_P (new) = 1;
5544 set_mem_alias_set (new, ix86_GOT_alias_set ());
5547 reg = gen_reg_rtx (Pmode);
5548 emit_move_insn (reg, new);
5554 if (GET_CODE (addr) == CONST)
5556 addr = XEXP (addr, 0);
5558 /* We must match stuff we generate before. Assume the only
5559 unspecs that can get here are ours. Not that we could do
5560 anything with them anyway... */
5561 if (GET_CODE (addr) == UNSPEC
5562 || (GET_CODE (addr) == PLUS
5563 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5565 if (GET_CODE (addr) != PLUS)
5568 if (GET_CODE (addr) == PLUS)
5570 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5572 /* Check first to see if this is a constant offset from a @GOTOFF
5573 symbol reference. */
5574 if (local_symbolic_operand (op0, Pmode)
5575 && GET_CODE (op1) == CONST_INT)
5579 if (reload_in_progress)
5580 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5581 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5583 new = gen_rtx_PLUS (Pmode, new, op1);
5584 new = gen_rtx_CONST (Pmode, new);
5585 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5589 emit_move_insn (reg, new);
5595 if (INTVAL (op1) < -16*1024*1024
5596 || INTVAL (op1) >= 16*1024*1024)
5597 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5602 base = legitimize_pic_address (XEXP (addr, 0), reg);
5603 new = legitimize_pic_address (XEXP (addr, 1),
5604 base == reg ? NULL_RTX : reg);
5606 if (GET_CODE (new) == CONST_INT)
5607 new = plus_constant (base, INTVAL (new));
5610 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5612 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5613 new = XEXP (new, 1);
5615 new = gen_rtx_PLUS (Pmode, base, new);
5624 ix86_encode_section_info (decl, first)
5626 int first ATTRIBUTE_UNUSED;
5628 bool local_p = (*targetm.binds_local_p) (decl);
5631 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5632 if (GET_CODE (rtl) != MEM)
5634 symbol = XEXP (rtl, 0);
5635 if (GET_CODE (symbol) != SYMBOL_REF)
5638 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5639 symbol so that we may access it directly in the GOT. */
5642 SYMBOL_REF_FLAG (symbol) = local_p;
5644 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5645 "local dynamic", "initial exec" or "local exec" TLS models
5648 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5650 const char *symbol_str;
5653 enum tls_model kind = decl_tls_model (decl);
5655 if (TARGET_64BIT && ! flag_pic)
5657 /* x86-64 doesn't allow non-pic code for shared libraries,
5658 so don't generate GD/LD TLS models for non-pic code. */
5661 case TLS_MODEL_GLOBAL_DYNAMIC:
5662 kind = TLS_MODEL_INITIAL_EXEC; break;
5663 case TLS_MODEL_LOCAL_DYNAMIC:
5664 kind = TLS_MODEL_LOCAL_EXEC; break;
5670 symbol_str = XSTR (symbol, 0);
5672 if (symbol_str[0] == '%')
5674 if (symbol_str[1] == tls_model_chars[kind])
5678 len = strlen (symbol_str) + 1;
5679 newstr = alloca (len + 2);
5682 newstr[1] = tls_model_chars[kind];
5683 memcpy (newstr + 2, symbol_str, len);
5685 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5689 /* Undo the above when printing symbol names. */
5692 ix86_strip_name_encoding (str)
5702 /* Load the thread pointer into a register. */
5705 get_thread_pointer ()
5709 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5710 tp = gen_rtx_MEM (Pmode, tp);
5711 RTX_UNCHANGING_P (tp) = 1;
5712 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5713 tp = force_reg (Pmode, tp);
5718 /* Try machine-dependent ways of modifying an illegitimate address
5719 to be legitimate. If we find one, return the new, valid address.
5720 This macro is used in only one place: `memory_address' in explow.c.
5722 OLDX is the address as it was before break_out_memory_refs was called.
5723 In some cases it is useful to look at this to decide what needs to be done.
5725 MODE and WIN are passed so that this macro can use
5726 GO_IF_LEGITIMATE_ADDRESS.
5728 It is always safe for this macro to do nothing. It exists to recognize
5729 opportunities to optimize the output.
5731 For the 80386, we handle X+REG by loading X into a register R and
5732 using R+REG. R will go in a general reg and indexing will be used.
5733 However, if REG is a broken-out memory address or multiplication,
5734 nothing needs to be done because REG can certainly go in a general reg.
5736 When -fpic is used, special handling is needed for symbolic references.
5737 See comments by legitimize_pic_address in i386.c for details. */
5740 legitimize_address (x, oldx, mode)
5742 register rtx oldx ATTRIBUTE_UNUSED;
5743 enum machine_mode mode;
5748 if (TARGET_DEBUG_ADDR)
5750 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5751 GET_MODE_NAME (mode));
5755 log = tls_symbolic_operand (x, mode);
5758 rtx dest, base, off, pic;
5763 case TLS_MODEL_GLOBAL_DYNAMIC:
5764 dest = gen_reg_rtx (Pmode);
5767 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5770 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5771 insns = get_insns ();
5774 emit_libcall_block (insns, dest, rax, x);
5777 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5780 case TLS_MODEL_LOCAL_DYNAMIC:
5781 base = gen_reg_rtx (Pmode);
5784 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5787 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5788 insns = get_insns ();
5791 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5792 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5793 emit_libcall_block (insns, base, rax, note);
5796 emit_insn (gen_tls_local_dynamic_base_32 (base));
5798 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5799 off = gen_rtx_CONST (Pmode, off);
5801 return gen_rtx_PLUS (Pmode, base, off);
5803 case TLS_MODEL_INITIAL_EXEC:
5807 type = UNSPEC_GOTNTPOFF;
5811 if (reload_in_progress)
5812 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5813 pic = pic_offset_table_rtx;
5814 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5816 else if (!TARGET_GNU_TLS)
5818 pic = gen_reg_rtx (Pmode);
5819 emit_insn (gen_set_got (pic));
5820 type = UNSPEC_GOTTPOFF;
5825 type = UNSPEC_INDNTPOFF;
5828 base = get_thread_pointer ();
5830 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5831 off = gen_rtx_CONST (Pmode, off);
5833 off = gen_rtx_PLUS (Pmode, pic, off);
5834 off = gen_rtx_MEM (Pmode, off);
5835 RTX_UNCHANGING_P (off) = 1;
5836 set_mem_alias_set (off, ix86_GOT_alias_set ());
5837 dest = gen_reg_rtx (Pmode);
5839 if (TARGET_64BIT || TARGET_GNU_TLS)
5841 emit_move_insn (dest, off);
5842 return gen_rtx_PLUS (Pmode, base, dest);
5845 emit_insn (gen_subsi3 (dest, base, off));
5848 case TLS_MODEL_LOCAL_EXEC:
5849 base = get_thread_pointer ();
5851 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5852 (TARGET_64BIT || TARGET_GNU_TLS)
5853 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5854 off = gen_rtx_CONST (Pmode, off);
5856 if (TARGET_64BIT || TARGET_GNU_TLS)
5857 return gen_rtx_PLUS (Pmode, base, off);
5860 dest = gen_reg_rtx (Pmode);
5861 emit_insn (gen_subsi3 (dest, base, off));
5872 if (flag_pic && SYMBOLIC_CONST (x))
5873 return legitimize_pic_address (x, 0);
5875 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5876 if (GET_CODE (x) == ASHIFT
5877 && GET_CODE (XEXP (x, 1)) == CONST_INT
5878 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5881 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5882 GEN_INT (1 << log));
5885 if (GET_CODE (x) == PLUS)
5887 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5889 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5890 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5891 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5894 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5895 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5896 GEN_INT (1 << log));
5899 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5900 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5901 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5904 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5905 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5906 GEN_INT (1 << log));
5909 /* Put multiply first if it isn't already. */
5910 if (GET_CODE (XEXP (x, 1)) == MULT)
5912 rtx tmp = XEXP (x, 0);
5913 XEXP (x, 0) = XEXP (x, 1);
5918 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5919 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5920 created by virtual register instantiation, register elimination, and
5921 similar optimizations. */
5922 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5925 x = gen_rtx_PLUS (Pmode,
5926 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5927 XEXP (XEXP (x, 1), 0)),
5928 XEXP (XEXP (x, 1), 1));
5932 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5933 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5934 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5935 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5936 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5937 && CONSTANT_P (XEXP (x, 1)))
5940 rtx other = NULL_RTX;
5942 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5944 constant = XEXP (x, 1);
5945 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5947 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5949 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5950 other = XEXP (x, 1);
5958 x = gen_rtx_PLUS (Pmode,
5959 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5960 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5961 plus_constant (other, INTVAL (constant)));
5965 if (changed && legitimate_address_p (mode, x, FALSE))
5968 if (GET_CODE (XEXP (x, 0)) == MULT)
5971 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5974 if (GET_CODE (XEXP (x, 1)) == MULT)
5977 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5981 && GET_CODE (XEXP (x, 1)) == REG
5982 && GET_CODE (XEXP (x, 0)) == REG)
5985 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5988 x = legitimize_pic_address (x, 0);
5991 if (changed && legitimate_address_p (mode, x, FALSE))
5994 if (GET_CODE (XEXP (x, 0)) == REG)
5996 register rtx temp = gen_reg_rtx (Pmode);
5997 register rtx val = force_operand (XEXP (x, 1), temp);
5999 emit_move_insn (temp, val);
6005 else if (GET_CODE (XEXP (x, 1)) == REG)
6007 register rtx temp = gen_reg_rtx (Pmode);
6008 register rtx val = force_operand (XEXP (x, 0), temp);
6010 emit_move_insn (temp, val);
6020 /* Print an integer constant expression in assembler syntax. Addition
6021 and subtraction are the only arithmetic that may appear in these
6022 expressions. FILE is the stdio stream to write to, X is the rtx, and
6023 CODE is the operand print code from the output string. */
6026 output_pic_addr_const (file, x, code)
6033 switch (GET_CODE (x))
6043 assemble_name (file, XSTR (x, 0));
6044 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6045 fputs ("@PLT", file);
6052 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6053 assemble_name (asm_out_file, buf);
6057 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6061 /* This used to output parentheses around the expression,
6062 but that does not work on the 386 (either ATT or BSD assembler). */
6063 output_pic_addr_const (file, XEXP (x, 0), code);
6067 if (GET_MODE (x) == VOIDmode)
6069 /* We can use %d if the number is <32 bits and positive. */
6070 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6071 fprintf (file, "0x%lx%08lx",
6072 (unsigned long) CONST_DOUBLE_HIGH (x),
6073 (unsigned long) CONST_DOUBLE_LOW (x));
6075 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6078 /* We can't handle floating point constants;
6079 PRINT_OPERAND must handle them. */
6080 output_operand_lossage ("floating constant misused");
6084 /* Some assemblers need integer constants to appear first. */
6085 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6087 output_pic_addr_const (file, XEXP (x, 0), code);
6089 output_pic_addr_const (file, XEXP (x, 1), code);
6091 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6093 output_pic_addr_const (file, XEXP (x, 1), code);
6095 output_pic_addr_const (file, XEXP (x, 0), code);
6103 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6104 output_pic_addr_const (file, XEXP (x, 0), code);
6106 output_pic_addr_const (file, XEXP (x, 1), code);
6108 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6112 if (XVECLEN (x, 0) != 1)
6114 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6115 switch (XINT (x, 1))
6118 fputs ("@GOT", file);
6121 fputs ("@GOTOFF", file);
6123 case UNSPEC_GOTPCREL:
6124 fputs ("@GOTPCREL(%rip)", file);
6126 case UNSPEC_GOTTPOFF:
6127 /* FIXME: This might be @TPOFF in Sun ld too. */
6128 fputs ("@GOTTPOFF", file);
6131 fputs ("@TPOFF", file);
6135 fputs ("@TPOFF", file);
6137 fputs ("@NTPOFF", file);
6140 fputs ("@DTPOFF", file);
6142 case UNSPEC_GOTNTPOFF:
6144 fputs ("@GOTTPOFF(%rip)", file);
6146 fputs ("@GOTNTPOFF", file);
6148 case UNSPEC_INDNTPOFF:
6149 fputs ("@INDNTPOFF", file);
6152 output_operand_lossage ("invalid UNSPEC as operand");
6158 output_operand_lossage ("invalid expression as operand");
6162 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6163 We need to handle our special PIC relocations. */
6166 i386_dwarf_output_addr_const (file, x)
6171 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6175 fprintf (file, "%s", ASM_LONG);
6178 output_pic_addr_const (file, x, '\0');
6180 output_addr_const (file, x);
6184 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6185 We need to emit DTP-relative relocations. */
6188 i386_output_dwarf_dtprel (file, size, x)
6193 fputs (ASM_LONG, file);
6194 output_addr_const (file, x);
6195 fputs ("@DTPOFF", file);
6201 fputs (", 0", file);
6208 /* In the name of slightly smaller debug output, and to cater to
6209 general assembler losage, recognize PIC+GOTOFF and turn it back
6210 into a direct symbol reference. */
6213 i386_simplify_dwarf_addr (orig_x)
6218 if (GET_CODE (x) == MEM)
6223 if (GET_CODE (x) != CONST
6224 || GET_CODE (XEXP (x, 0)) != UNSPEC
6225 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6226 || GET_CODE (orig_x) != MEM)
6228 return XVECEXP (XEXP (x, 0), 0, 0);
6231 if (GET_CODE (x) != PLUS
6232 || GET_CODE (XEXP (x, 1)) != CONST)
6235 if (GET_CODE (XEXP (x, 0)) == REG
6236 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6237 /* %ebx + GOT/GOTOFF */
6239 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6241 /* %ebx + %reg * scale + GOT/GOTOFF */
6243 if (GET_CODE (XEXP (y, 0)) == REG
6244 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6246 else if (GET_CODE (XEXP (y, 1)) == REG
6247 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6251 if (GET_CODE (y) != REG
6252 && GET_CODE (y) != MULT
6253 && GET_CODE (y) != ASHIFT)
6259 x = XEXP (XEXP (x, 1), 0);
6260 if (GET_CODE (x) == UNSPEC
6261 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6262 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6265 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6266 return XVECEXP (x, 0, 0);
6269 if (GET_CODE (x) == PLUS
6270 && GET_CODE (XEXP (x, 0)) == UNSPEC
6271 && GET_CODE (XEXP (x, 1)) == CONST_INT
6272 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6273 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6274 && GET_CODE (orig_x) != MEM)))
6276 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6278 return gen_rtx_PLUS (Pmode, y, x);
6286 put_condition_code (code, mode, reverse, fp, file)
6288 enum machine_mode mode;
6294 if (mode == CCFPmode || mode == CCFPUmode)
6296 enum rtx_code second_code, bypass_code;
6297 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6298 if (bypass_code != NIL || second_code != NIL)
6300 code = ix86_fp_compare_code_to_integer (code);
6304 code = reverse_condition (code);
6315 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6320 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6321 Those same assemblers have the same but opposite losage on cmov. */
6324 suffix = fp ? "nbe" : "a";
6327 if (mode == CCNOmode || mode == CCGOCmode)
6329 else if (mode == CCmode || mode == CCGCmode)
6340 if (mode == CCNOmode || mode == CCGOCmode)
6342 else if (mode == CCmode || mode == CCGCmode)
6351 suffix = fp ? "nb" : "ae";
6354 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6364 suffix = fp ? "u" : "p";
6367 suffix = fp ? "nu" : "np";
6372 fputs (suffix, file);
6376 print_reg (x, code, file)
6381 if (REGNO (x) == ARG_POINTER_REGNUM
6382 || REGNO (x) == FRAME_POINTER_REGNUM
6383 || REGNO (x) == FLAGS_REG
6384 || REGNO (x) == FPSR_REG)
6387 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6390 if (code == 'w' || MMX_REG_P (x))
6392 else if (code == 'b')
6394 else if (code == 'k')
6396 else if (code == 'q')
6398 else if (code == 'y')
6400 else if (code == 'h')
6403 code = GET_MODE_SIZE (GET_MODE (x));
6405 /* Irritatingly, AMD extended registers use different naming convention
6406 from the normal registers. */
6407 if (REX_INT_REG_P (x))
6414 error ("extended registers have no high halves");
6417 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6420 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6423 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6426 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6429 error ("unsupported operand size for extended register");
6437 if (STACK_TOP_P (x))
6439 fputs ("st(0)", file);
6446 if (! ANY_FP_REG_P (x))
6447 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6451 fputs (hi_reg_name[REGNO (x)], file);
6454 fputs (qi_reg_name[REGNO (x)], file);
6457 fputs (qi_high_reg_name[REGNO (x)], file);
6464 /* Locate some local-dynamic symbol still in use by this function
6465 so that we can print its name in some tls_local_dynamic_base
6469 get_some_local_dynamic_name ()
6473 if (cfun->machine->some_ld_name)
6474 return cfun->machine->some_ld_name;
6476 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6478 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6479 return cfun->machine->some_ld_name;
6485 get_some_local_dynamic_name_1 (px, data)
6487 void *data ATTRIBUTE_UNUSED;
6491 if (GET_CODE (x) == SYMBOL_REF
6492 && local_dynamic_symbolic_operand (x, Pmode))
6494 cfun->machine->some_ld_name = XSTR (x, 0);
6502 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6503 C -- print opcode suffix for set/cmov insn.
6504 c -- like C, but print reversed condition
6505 F,f -- likewise, but for floating-point.
6506 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6508 R -- print the prefix for register names.
6509 z -- print the opcode suffix for the size of the current operand.
6510 * -- print a star (in certain assembler syntax)
6511 A -- print an absolute memory reference.
6512 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6513 s -- print a shift double count, followed by the assemblers argument
6515 b -- print the QImode name of the register for the indicated operand.
6516 %b0 would print %al if operands[0] is reg 0.
6517 w -- likewise, print the HImode name of the register.
6518 k -- likewise, print the SImode name of the register.
6519 q -- likewise, print the DImode name of the register.
6520 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6521 y -- print "st(0)" instead of "st" as a register.
6522 D -- print condition for SSE cmp instruction.
6523 P -- if PIC, print an @PLT suffix.
6524 X -- don't print any sort of PIC '@' suffix for a symbol.
6525 & -- print some in-use local-dynamic symbol name.
6529 print_operand (file, x, code)
6539 if (ASSEMBLER_DIALECT == ASM_ATT)
6544 assemble_name (file, get_some_local_dynamic_name ());
6548 if (ASSEMBLER_DIALECT == ASM_ATT)
6550 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6552 /* Intel syntax. For absolute addresses, registers should not
6553 be surrounded by braces. */
6554 if (GET_CODE (x) != REG)
6557 PRINT_OPERAND (file, x, 0);
6565 PRINT_OPERAND (file, x, 0);
6570 if (ASSEMBLER_DIALECT == ASM_ATT)
6575 if (ASSEMBLER_DIALECT == ASM_ATT)
6580 if (ASSEMBLER_DIALECT == ASM_ATT)
6585 if (ASSEMBLER_DIALECT == ASM_ATT)
6590 if (ASSEMBLER_DIALECT == ASM_ATT)
6595 if (ASSEMBLER_DIALECT == ASM_ATT)
6600 /* 387 opcodes don't get size suffixes if the operands are
6602 if (STACK_REG_P (x))
6605 /* Likewise if using Intel opcodes. */
6606 if (ASSEMBLER_DIALECT == ASM_INTEL)
6609 /* This is the size of op from size of operand. */
6610 switch (GET_MODE_SIZE (GET_MODE (x)))
6613 #ifdef HAVE_GAS_FILDS_FISTS
6619 if (GET_MODE (x) == SFmode)
6634 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6636 #ifdef GAS_MNEMONICS
6662 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6664 PRINT_OPERAND (file, x, 0);
6670 /* Little bit of braindamage here. The SSE compare instructions
6671 does use completely different names for the comparisons that the
6672 fp conditional moves. */
6673 switch (GET_CODE (x))
6688 fputs ("unord", file);
6692 fputs ("neq", file);
6696 fputs ("nlt", file);
6700 fputs ("nle", file);
6703 fputs ("ord", file);
6711 #ifdef CMOV_SUN_AS_SYNTAX
6712 if (ASSEMBLER_DIALECT == ASM_ATT)
6714 switch (GET_MODE (x))
6716 case HImode: putc ('w', file); break;
6718 case SFmode: putc ('l', file); break;
6720 case DFmode: putc ('q', file); break;
6728 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6731 #ifdef CMOV_SUN_AS_SYNTAX
6732 if (ASSEMBLER_DIALECT == ASM_ATT)
6735 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6738 /* Like above, but reverse condition */
6740 /* Check to see if argument to %c is really a constant
6741 and not a condition code which needs to be reversed. */
6742 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6744 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6747 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6750 #ifdef CMOV_SUN_AS_SYNTAX
6751 if (ASSEMBLER_DIALECT == ASM_ATT)
6754 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6760 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6763 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6766 int pred_val = INTVAL (XEXP (x, 0));
6768 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6769 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6771 int taken = pred_val > REG_BR_PROB_BASE / 2;
6772 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6774 /* Emit hints only in the case default branch prediction
6775 heruistics would fail. */
6776 if (taken != cputaken)
6778 /* We use 3e (DS) prefix for taken branches and
6779 2e (CS) prefix for not taken branches. */
6781 fputs ("ds ; ", file);
6783 fputs ("cs ; ", file);
6790 output_operand_lossage ("invalid operand code `%c'", code);
6794 if (GET_CODE (x) == REG)
6796 PRINT_REG (x, code, file);
6799 else if (GET_CODE (x) == MEM)
6801 /* No `byte ptr' prefix for call instructions. */
6802 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6805 switch (GET_MODE_SIZE (GET_MODE (x)))
6807 case 1: size = "BYTE"; break;
6808 case 2: size = "WORD"; break;
6809 case 4: size = "DWORD"; break;
6810 case 8: size = "QWORD"; break;
6811 case 12: size = "XWORD"; break;
6812 case 16: size = "XMMWORD"; break;
6817 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6820 else if (code == 'w')
6822 else if (code == 'k')
6826 fputs (" PTR ", file);
6830 if (flag_pic && CONSTANT_ADDRESS_P (x))
6831 output_pic_addr_const (file, x, code);
6832 /* Avoid (%rip) for call operands. */
6833 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6834 && GET_CODE (x) != CONST_INT)
6835 output_addr_const (file, x);
6836 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6837 output_operand_lossage ("invalid constraints for operand");
6842 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6847 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6848 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6850 if (ASSEMBLER_DIALECT == ASM_ATT)
6852 fprintf (file, "0x%lx", l);
6855 /* These float cases don't actually occur as immediate operands. */
6856 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6860 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6861 fprintf (file, "%s", dstr);
6864 else if (GET_CODE (x) == CONST_DOUBLE
6865 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6869 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6870 fprintf (file, "%s", dstr);
6877 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6879 if (ASSEMBLER_DIALECT == ASM_ATT)
6882 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6883 || GET_CODE (x) == LABEL_REF)
6885 if (ASSEMBLER_DIALECT == ASM_ATT)
6888 fputs ("OFFSET FLAT:", file);
6891 if (GET_CODE (x) == CONST_INT)
6892 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6894 output_pic_addr_const (file, x, code);
6896 output_addr_const (file, x);
6900 /* Print a memory operand whose address is ADDR. */
6903 print_operand_address (file, addr)
6907 struct ix86_address parts;
6908 rtx base, index, disp;
6911 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6913 if (ASSEMBLER_DIALECT == ASM_INTEL)
6914 fputs ("DWORD PTR ", file);
6915 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6918 fputs ("fs:0", file);
6920 fputs ("gs:0", file);
6924 if (! ix86_decompose_address (addr, &parts))
6928 index = parts.index;
6930 scale = parts.scale;
6932 if (!base && !index)
6934 /* Displacement only requires special attention. */
6936 if (GET_CODE (disp) == CONST_INT)
6938 if (ASSEMBLER_DIALECT == ASM_INTEL)
6940 if (USER_LABEL_PREFIX[0] == 0)
6942 fputs ("ds:", file);
6944 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6947 output_pic_addr_const (file, addr, 0);
6949 output_addr_const (file, addr);
6951 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6953 && ((GET_CODE (addr) == SYMBOL_REF
6954 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
6955 || GET_CODE (addr) == LABEL_REF
6956 || (GET_CODE (addr) == CONST
6957 && GET_CODE (XEXP (addr, 0)) == PLUS
6958 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6959 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
6960 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6961 fputs ("(%rip)", file);
6965 if (ASSEMBLER_DIALECT == ASM_ATT)
6970 output_pic_addr_const (file, disp, 0);
6971 else if (GET_CODE (disp) == LABEL_REF)
6972 output_asm_label (disp);
6974 output_addr_const (file, disp);
6979 PRINT_REG (base, 0, file);
6983 PRINT_REG (index, 0, file);
6985 fprintf (file, ",%d", scale);
6991 rtx offset = NULL_RTX;
6995 /* Pull out the offset of a symbol; print any symbol itself. */
6996 if (GET_CODE (disp) == CONST
6997 && GET_CODE (XEXP (disp, 0)) == PLUS
6998 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7000 offset = XEXP (XEXP (disp, 0), 1);
7001 disp = gen_rtx_CONST (VOIDmode,
7002 XEXP (XEXP (disp, 0), 0));
7006 output_pic_addr_const (file, disp, 0);
7007 else if (GET_CODE (disp) == LABEL_REF)
7008 output_asm_label (disp);
7009 else if (GET_CODE (disp) == CONST_INT)
7012 output_addr_const (file, disp);
7018 PRINT_REG (base, 0, file);
7021 if (INTVAL (offset) >= 0)
7023 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7027 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7034 PRINT_REG (index, 0, file);
7036 fprintf (file, "*%d", scale);
7044 output_addr_const_extra (file, x)
7050 if (GET_CODE (x) != UNSPEC)
7053 op = XVECEXP (x, 0, 0);
7054 switch (XINT (x, 1))
7056 case UNSPEC_GOTTPOFF:
7057 output_addr_const (file, op);
7058 /* FIXME: This might be @TPOFF in Sun ld. */
7059 fputs ("@GOTTPOFF", file);
7062 output_addr_const (file, op);
7063 fputs ("@TPOFF", file);
7066 output_addr_const (file, op);
7068 fputs ("@TPOFF", file);
7070 fputs ("@NTPOFF", file);
7073 output_addr_const (file, op);
7074 fputs ("@DTPOFF", file);
7076 case UNSPEC_GOTNTPOFF:
7077 output_addr_const (file, op);
7079 fputs ("@GOTTPOFF(%rip)", file);
7081 fputs ("@GOTNTPOFF", file);
7083 case UNSPEC_INDNTPOFF:
7084 output_addr_const (file, op);
7085 fputs ("@INDNTPOFF", file);
7095 /* Split one or more DImode RTL references into pairs of SImode
7096 references. The RTL can be REG, offsettable MEM, integer constant, or
7097 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7098 split and "num" is its length. lo_half and hi_half are output arrays
7099 that parallel "operands". */
7102 split_di (operands, num, lo_half, hi_half)
7105 rtx lo_half[], hi_half[];
7109 rtx op = operands[num];
7111 /* simplify_subreg refuse to split volatile memory addresses,
7112 but we still have to handle it. */
7113 if (GET_CODE (op) == MEM)
7115 lo_half[num] = adjust_address (op, SImode, 0);
7116 hi_half[num] = adjust_address (op, SImode, 4);
7120 lo_half[num] = simplify_gen_subreg (SImode, op,
7121 GET_MODE (op) == VOIDmode
7122 ? DImode : GET_MODE (op), 0);
7123 hi_half[num] = simplify_gen_subreg (SImode, op,
7124 GET_MODE (op) == VOIDmode
7125 ? DImode : GET_MODE (op), 4);
7129 /* Split one or more TImode RTL references into pairs of SImode
7130 references. The RTL can be REG, offsettable MEM, integer constant, or
7131 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7132 split and "num" is its length. lo_half and hi_half are output arrays
7133 that parallel "operands". */
7136 split_ti (operands, num, lo_half, hi_half)
7139 rtx lo_half[], hi_half[];
7143 rtx op = operands[num];
7145 /* simplify_subreg refuse to split volatile memory addresses, but we
7146 still have to handle it. */
7147 if (GET_CODE (op) == MEM)
7149 lo_half[num] = adjust_address (op, DImode, 0);
7150 hi_half[num] = adjust_address (op, DImode, 8);
7154 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7155 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7160 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7161 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7162 is the expression of the binary operation. The output may either be
7163 emitted here, or returned to the caller, like all output_* functions.
7165 There is no guarantee that the operands are the same mode, as they
7166 might be within FLOAT or FLOAT_EXTEND expressions. */
7168 #ifndef SYSV386_COMPAT
7169 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7170 wants to fix the assemblers because that causes incompatibility
7171 with gcc. No-one wants to fix gcc because that causes
7172 incompatibility with assemblers... You can use the option of
7173 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7174 #define SYSV386_COMPAT 1
7178 output_387_binary_op (insn, operands)
7182 static char buf[30];
7185 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7187 #ifdef ENABLE_CHECKING
7188 /* Even if we do not want to check the inputs, this documents input
7189 constraints. Which helps in understanding the following code. */
7190 if (STACK_REG_P (operands[0])
7191 && ((REG_P (operands[1])
7192 && REGNO (operands[0]) == REGNO (operands[1])
7193 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7194 || (REG_P (operands[2])
7195 && REGNO (operands[0]) == REGNO (operands[2])
7196 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7197 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7203 switch (GET_CODE (operands[3]))
7206 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7207 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7215 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7216 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7224 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7225 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7233 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7234 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7248 if (GET_MODE (operands[0]) == SFmode)
7249 strcat (buf, "ss\t{%2, %0|%0, %2}");
7251 strcat (buf, "sd\t{%2, %0|%0, %2}");
7256 switch (GET_CODE (operands[3]))
7260 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7262 rtx temp = operands[2];
7263 operands[2] = operands[1];
7267 /* know operands[0] == operands[1]. */
7269 if (GET_CODE (operands[2]) == MEM)
7275 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7277 if (STACK_TOP_P (operands[0]))
7278 /* How is it that we are storing to a dead operand[2]?
7279 Well, presumably operands[1] is dead too. We can't
7280 store the result to st(0) as st(0) gets popped on this
7281 instruction. Instead store to operands[2] (which I
7282 think has to be st(1)). st(1) will be popped later.
7283 gcc <= 2.8.1 didn't have this check and generated
7284 assembly code that the Unixware assembler rejected. */
7285 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7287 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7291 if (STACK_TOP_P (operands[0]))
7292 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7294 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7299 if (GET_CODE (operands[1]) == MEM)
7305 if (GET_CODE (operands[2]) == MEM)
7311 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7314 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7315 derived assemblers, confusingly reverse the direction of
7316 the operation for fsub{r} and fdiv{r} when the
7317 destination register is not st(0). The Intel assembler
7318 doesn't have this brain damage. Read !SYSV386_COMPAT to
7319 figure out what the hardware really does. */
7320 if (STACK_TOP_P (operands[0]))
7321 p = "{p\t%0, %2|rp\t%2, %0}";
7323 p = "{rp\t%2, %0|p\t%0, %2}";
7325 if (STACK_TOP_P (operands[0]))
7326 /* As above for fmul/fadd, we can't store to st(0). */
7327 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7329 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7334 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7337 if (STACK_TOP_P (operands[0]))
7338 p = "{rp\t%0, %1|p\t%1, %0}";
7340 p = "{p\t%1, %0|rp\t%0, %1}";
7342 if (STACK_TOP_P (operands[0]))
7343 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7345 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7350 if (STACK_TOP_P (operands[0]))
7352 if (STACK_TOP_P (operands[1]))
7353 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7355 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7358 else if (STACK_TOP_P (operands[1]))
7361 p = "{\t%1, %0|r\t%0, %1}";
7363 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7369 p = "{r\t%2, %0|\t%0, %2}";
7371 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7384 /* Output code to initialize control word copies used by
7385 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7386 is set to control word rounding downwards. */
7388 emit_i387_cw_initialization (normal, round_down)
7389 rtx normal, round_down;
7391 rtx reg = gen_reg_rtx (HImode);
7393 emit_insn (gen_x86_fnstcw_1 (normal));
7394 emit_move_insn (reg, normal);
7395 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7397 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7399 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7400 emit_move_insn (round_down, reg);
7403 /* Output code for INSN to convert a float to a signed int. OPERANDS
7404 are the insn operands. The output may be [HSD]Imode and the input
7405 operand may be [SDX]Fmode. */
7408 output_fix_trunc (insn, operands)
7412 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7413 int dimode_p = GET_MODE (operands[0]) == DImode;
7415 /* Jump through a hoop or two for DImode, since the hardware has no
7416 non-popping instruction. We used to do this a different way, but
7417 that was somewhat fragile and broke with post-reload splitters. */
7418 if (dimode_p && !stack_top_dies)
7419 output_asm_insn ("fld\t%y1", operands);
7421 if (!STACK_TOP_P (operands[1]))
7424 if (GET_CODE (operands[0]) != MEM)
7427 output_asm_insn ("fldcw\t%3", operands);
7428 if (stack_top_dies || dimode_p)
7429 output_asm_insn ("fistp%z0\t%0", operands);
7431 output_asm_insn ("fist%z0\t%0", operands);
7432 output_asm_insn ("fldcw\t%2", operands);
7437 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7438 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7439 when fucom should be used. */
7442 output_fp_compare (insn, operands, eflags_p, unordered_p)
7445 int eflags_p, unordered_p;
7448 rtx cmp_op0 = operands[0];
7449 rtx cmp_op1 = operands[1];
7450 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7455 cmp_op1 = operands[2];
7459 if (GET_MODE (operands[0]) == SFmode)
7461 return "ucomiss\t{%1, %0|%0, %1}";
7463 return "comiss\t{%1, %0|%0, %y}";
7466 return "ucomisd\t{%1, %0|%0, %1}";
7468 return "comisd\t{%1, %0|%0, %y}";
7471 if (! STACK_TOP_P (cmp_op0))
7474 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7476 if (STACK_REG_P (cmp_op1)
7478 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7479 && REGNO (cmp_op1) != FIRST_STACK_REG)
7481 /* If both the top of the 387 stack dies, and the other operand
7482 is also a stack register that dies, then this must be a
7483 `fcompp' float compare */
7487 /* There is no double popping fcomi variant. Fortunately,
7488 eflags is immune from the fstp's cc clobbering. */
7490 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7492 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7500 return "fucompp\n\tfnstsw\t%0";
7502 return "fcompp\n\tfnstsw\t%0";
7515 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7517 static const char * const alt[24] =
7529 "fcomi\t{%y1, %0|%0, %y1}",
7530 "fcomip\t{%y1, %0|%0, %y1}",
7531 "fucomi\t{%y1, %0|%0, %y1}",
7532 "fucomip\t{%y1, %0|%0, %y1}",
7539 "fcom%z2\t%y2\n\tfnstsw\t%0",
7540 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7541 "fucom%z2\t%y2\n\tfnstsw\t%0",
7542 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7544 "ficom%z2\t%y2\n\tfnstsw\t%0",
7545 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7553 mask = eflags_p << 3;
7554 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7555 mask |= unordered_p << 1;
7556 mask |= stack_top_dies;
7569 ix86_output_addr_vec_elt (file, value)
7573 const char *directive = ASM_LONG;
7578 directive = ASM_QUAD;
7584 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7588 ix86_output_addr_diff_elt (file, value, rel)
7593 fprintf (file, "%s%s%d-%s%d\n",
7594 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7595 else if (HAVE_AS_GOTOFF_IN_DATA)
7596 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7598 else if (TARGET_MACHO)
7599 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7600 machopic_function_base_name () + 1);
7603 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7604 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7607 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7611 ix86_expand_clear (dest)
7616 /* We play register width games, which are only valid after reload. */
7617 if (!reload_completed)
7620 /* Avoid HImode and its attendant prefix byte. */
7621 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7622 dest = gen_rtx_REG (SImode, REGNO (dest));
7624 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7626 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7627 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7629 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7630 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7636 /* X is an unchanging MEM. If it is a constant pool reference, return
7637 the constant pool rtx, else NULL. */
7640 maybe_get_pool_constant (x)
7645 if (flag_pic && ! TARGET_64BIT)
7647 if (GET_CODE (x) != PLUS)
7649 if (XEXP (x, 0) != pic_offset_table_rtx)
7652 if (GET_CODE (x) != CONST)
7655 if (GET_CODE (x) != UNSPEC)
7657 if (XINT (x, 1) != UNSPEC_GOTOFF)
7659 x = XVECEXP (x, 0, 0);
7662 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7663 return get_pool_constant (x);
7669 ix86_expand_move (mode, operands)
7670 enum machine_mode mode;
7673 int strict = (reload_in_progress || reload_completed);
7674 rtx insn, op0, op1, tmp;
7679 /* ??? We have a slight problem. We need to say that tls symbols are
7680 not legitimate constants so that reload does not helpfully reload
7681 these constants from a REG_EQUIV, which we cannot handle. (Recall
7682 that general- and local-dynamic address resolution requires a
7685 However, if we say that tls symbols are not legitimate constants,
7686 then emit_move_insn helpfully drop them into the constant pool.
7688 It is far easier to work around emit_move_insn than reload. Recognize
7689 the MEM that we would have created and extract the symbol_ref. */
7692 && GET_CODE (op1) == MEM
7693 && RTX_UNCHANGING_P (op1))
7695 tmp = maybe_get_pool_constant (op1);
7696 /* Note that we only care about symbolic constants here, which
7697 unlike CONST_INT will always have a proper mode. */
7698 if (tmp && GET_MODE (tmp) == Pmode)
7702 if (tls_symbolic_operand (op1, Pmode))
7704 op1 = legitimize_address (op1, op1, VOIDmode);
7705 if (GET_CODE (op0) == MEM)
7707 tmp = gen_reg_rtx (mode);
7708 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7712 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7717 rtx temp = ((reload_in_progress
7718 || ((op0 && GET_CODE (op0) == REG)
7720 ? op0 : gen_reg_rtx (Pmode));
7721 op1 = machopic_indirect_data_reference (op1, temp);
7722 op1 = machopic_legitimize_pic_address (op1, mode,
7723 temp == op1 ? 0 : temp);
7727 if (MACHOPIC_INDIRECT)
7728 op1 = machopic_indirect_data_reference (op1, 0);
7732 insn = gen_rtx_SET (VOIDmode, op0, op1);
7736 #endif /* TARGET_MACHO */
7737 if (GET_CODE (op0) == MEM)
7738 op1 = force_reg (Pmode, op1);
7742 if (GET_CODE (temp) != REG)
7743 temp = gen_reg_rtx (Pmode);
7744 temp = legitimize_pic_address (op1, temp);
7752 if (GET_CODE (op0) == MEM
7753 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7754 || !push_operand (op0, mode))
7755 && GET_CODE (op1) == MEM)
7756 op1 = force_reg (mode, op1);
7758 if (push_operand (op0, mode)
7759 && ! general_no_elim_operand (op1, mode))
7760 op1 = copy_to_mode_reg (mode, op1);
7762 /* Force large constants in 64bit compilation into register
7763 to get them CSEed. */
7764 if (TARGET_64BIT && mode == DImode
7765 && immediate_operand (op1, mode)
7766 && !x86_64_zero_extended_value (op1)
7767 && !register_operand (op0, mode)
7768 && optimize && !reload_completed && !reload_in_progress)
7769 op1 = copy_to_mode_reg (mode, op1);
7771 if (FLOAT_MODE_P (mode))
7773 /* If we are loading a floating point constant to a register,
7774 force the value to memory now, since we'll get better code
7775 out the back end. */
7779 else if (GET_CODE (op1) == CONST_DOUBLE
7780 && register_operand (op0, mode))
7781 op1 = validize_mem (force_const_mem (mode, op1));
7785 insn = gen_rtx_SET (VOIDmode, op0, op1);
7791 ix86_expand_vector_move (mode, operands)
7792 enum machine_mode mode;
7795 /* Force constants other than zero into memory. We do not know how
7796 the instructions used to build constants modify the upper 64 bits
7797 of the register, once we have that information we may be able
7798 to handle some of them more efficiently. */
7799 if ((reload_in_progress | reload_completed) == 0
7800 && register_operand (operands[0], mode)
7801 && CONSTANT_P (operands[1]))
7803 rtx addr = gen_reg_rtx (Pmode);
7804 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7805 operands[1] = gen_rtx_MEM (mode, addr);
7808 /* Make operand1 a register if it isn't already. */
7809 if ((reload_in_progress | reload_completed) == 0
7810 && !register_operand (operands[0], mode)
7811 && !register_operand (operands[1], mode))
7813 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7814 emit_move_insn (operands[0], temp);
7818 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7821 /* Attempt to expand a binary operator. Make the expansion closer to the
7822 actual machine, then just general_operand, which will allow 3 separate
7823 memory references (one output, two input) in a single insn. */
7826 ix86_expand_binary_operator (code, mode, operands)
7828 enum machine_mode mode;
7831 int matching_memory;
7832 rtx src1, src2, dst, op, clob;
7838 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7839 if (GET_RTX_CLASS (code) == 'c'
7840 && (rtx_equal_p (dst, src2)
7841 || immediate_operand (src1, mode)))
7848 /* If the destination is memory, and we do not have matching source
7849 operands, do things in registers. */
7850 matching_memory = 0;
7851 if (GET_CODE (dst) == MEM)
7853 if (rtx_equal_p (dst, src1))
7854 matching_memory = 1;
7855 else if (GET_RTX_CLASS (code) == 'c'
7856 && rtx_equal_p (dst, src2))
7857 matching_memory = 2;
7859 dst = gen_reg_rtx (mode);
7862 /* Both source operands cannot be in memory. */
7863 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7865 if (matching_memory != 2)
7866 src2 = force_reg (mode, src2);
7868 src1 = force_reg (mode, src1);
7871 /* If the operation is not commutable, source 1 cannot be a constant
7872 or non-matching memory. */
7873 if ((CONSTANT_P (src1)
7874 || (!matching_memory && GET_CODE (src1) == MEM))
7875 && GET_RTX_CLASS (code) != 'c')
7876 src1 = force_reg (mode, src1);
7878 /* If optimizing, copy to regs to improve CSE */
7879 if (optimize && ! no_new_pseudos)
7881 if (GET_CODE (dst) == MEM)
7882 dst = gen_reg_rtx (mode);
7883 if (GET_CODE (src1) == MEM)
7884 src1 = force_reg (mode, src1);
7885 if (GET_CODE (src2) == MEM)
7886 src2 = force_reg (mode, src2);
7889 /* Emit the instruction. */
7891 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7892 if (reload_in_progress)
7894 /* Reload doesn't know about the flags register, and doesn't know that
7895 it doesn't want to clobber it. We can only do this with PLUS. */
7902 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7903 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7906 /* Fix up the destination if needed. */
7907 if (dst != operands[0])
7908 emit_move_insn (operands[0], dst);
7911 /* Return TRUE or FALSE depending on whether the binary operator meets the
7912 appropriate constraints. */
7915 ix86_binary_operator_ok (code, mode, operands)
7917 enum machine_mode mode ATTRIBUTE_UNUSED;
7920 /* Both source operands cannot be in memory. */
7921 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7923 /* If the operation is not commutable, source 1 cannot be a constant. */
7924 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7926 /* If the destination is memory, we must have a matching source operand. */
7927 if (GET_CODE (operands[0]) == MEM
7928 && ! (rtx_equal_p (operands[0], operands[1])
7929 || (GET_RTX_CLASS (code) == 'c'
7930 && rtx_equal_p (operands[0], operands[2]))))
7932 /* If the operation is not commutable and the source 1 is memory, we must
7933 have a matching destination. */
7934 if (GET_CODE (operands[1]) == MEM
7935 && GET_RTX_CLASS (code) != 'c'
7936 && ! rtx_equal_p (operands[0], operands[1]))
7941 /* Attempt to expand a unary operator. Make the expansion closer to the
7942 actual machine, then just general_operand, which will allow 2 separate
7943 memory references (one output, one input) in a single insn. */
7946 ix86_expand_unary_operator (code, mode, operands)
7948 enum machine_mode mode;
7951 int matching_memory;
7952 rtx src, dst, op, clob;
7957 /* If the destination is memory, and we do not have matching source
7958 operands, do things in registers. */
7959 matching_memory = 0;
7960 if (GET_CODE (dst) == MEM)
7962 if (rtx_equal_p (dst, src))
7963 matching_memory = 1;
7965 dst = gen_reg_rtx (mode);
7968 /* When source operand is memory, destination must match. */
7969 if (!matching_memory && GET_CODE (src) == MEM)
7970 src = force_reg (mode, src);
7972 /* If optimizing, copy to regs to improve CSE */
7973 if (optimize && ! no_new_pseudos)
7975 if (GET_CODE (dst) == MEM)
7976 dst = gen_reg_rtx (mode);
7977 if (GET_CODE (src) == MEM)
7978 src = force_reg (mode, src);
7981 /* Emit the instruction. */
7983 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7984 if (reload_in_progress || code == NOT)
7986 /* Reload doesn't know about the flags register, and doesn't know that
7987 it doesn't want to clobber it. */
7994 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7995 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7998 /* Fix up the destination if needed. */
7999 if (dst != operands[0])
8000 emit_move_insn (operands[0], dst);
8003 /* Return TRUE or FALSE depending on whether the unary operator meets the
8004 appropriate constraints. */
8007 ix86_unary_operator_ok (code, mode, operands)
8008 enum rtx_code code ATTRIBUTE_UNUSED;
8009 enum machine_mode mode ATTRIBUTE_UNUSED;
8010 rtx operands[2] ATTRIBUTE_UNUSED;
8012 /* If one of operands is memory, source and destination must match. */
8013 if ((GET_CODE (operands[0]) == MEM
8014 || GET_CODE (operands[1]) == MEM)
8015 && ! rtx_equal_p (operands[0], operands[1]))
8020 /* Return TRUE or FALSE depending on whether the first SET in INSN
8021 has source and destination with matching CC modes, and that the
8022 CC mode is at least as constrained as REQ_MODE. */
8025 ix86_match_ccmode (insn, req_mode)
8027 enum machine_mode req_mode;
8030 enum machine_mode set_mode;
8032 set = PATTERN (insn);
8033 if (GET_CODE (set) == PARALLEL)
8034 set = XVECEXP (set, 0, 0);
8035 if (GET_CODE (set) != SET)
8037 if (GET_CODE (SET_SRC (set)) != COMPARE)
8040 set_mode = GET_MODE (SET_DEST (set));
8044 if (req_mode != CCNOmode
8045 && (req_mode != CCmode
8046 || XEXP (SET_SRC (set), 1) != const0_rtx))
8050 if (req_mode == CCGCmode)
8054 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8058 if (req_mode == CCZmode)
8068 return (GET_MODE (SET_SRC (set)) == set_mode);
8071 /* Generate insn patterns to do an integer compare of OPERANDS. */
8074 ix86_expand_int_compare (code, op0, op1)
8078 enum machine_mode cmpmode;
8081 cmpmode = SELECT_CC_MODE (code, op0, op1);
8082 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8084 /* This is very simple, but making the interface the same as in the
8085 FP case makes the rest of the code easier. */
8086 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8087 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8089 /* Return the test that should be put into the flags user, i.e.
8090 the bcc, scc, or cmov instruction. */
8091 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8094 /* Figure out whether to use ordered or unordered fp comparisons.
8095 Return the appropriate mode to use. */
8098 ix86_fp_compare_mode (code)
8099 enum rtx_code code ATTRIBUTE_UNUSED;
8101 /* ??? In order to make all comparisons reversible, we do all comparisons
8102 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8103 all forms trapping and nontrapping comparisons, we can make inequality
8104 comparisons trapping again, since it results in better code when using
8105 FCOM based compares. */
8106 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8110 ix86_cc_mode (code, op0, op1)
8114 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8115 return ix86_fp_compare_mode (code);
8118 /* Only zero flag is needed. */
8120 case NE: /* ZF!=0 */
8122 /* Codes needing carry flag. */
8123 case GEU: /* CF=0 */
8124 case GTU: /* CF=0 & ZF=0 */
8125 case LTU: /* CF=1 */
8126 case LEU: /* CF=1 | ZF=1 */
8128 /* Codes possibly doable only with sign flag when
8129 comparing against zero. */
8130 case GE: /* SF=OF or SF=0 */
8131 case LT: /* SF<>OF or SF=1 */
8132 if (op1 == const0_rtx)
8135 /* For other cases Carry flag is not required. */
8137 /* Codes doable only with sign flag when comparing
8138 against zero, but we miss jump instruction for it
8139 so we need to use relational tests agains overflow
8140 that thus needs to be zero. */
8141 case GT: /* ZF=0 & SF=OF */
8142 case LE: /* ZF=1 | SF<>OF */
8143 if (op1 == const0_rtx)
8147 /* strcmp pattern do (use flags) and combine may ask us for proper
8156 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8159 ix86_use_fcomi_compare (code)
8160 enum rtx_code code ATTRIBUTE_UNUSED;
8162 enum rtx_code swapped_code = swap_condition (code);
8163 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8164 || (ix86_fp_comparison_cost (swapped_code)
8165 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8168 /* Swap, force into registers, or otherwise massage the two operands
8169 to a fp comparison. The operands are updated in place; the new
8170 comparsion code is returned. */
8172 static enum rtx_code
8173 ix86_prepare_fp_compare_args (code, pop0, pop1)
8177 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8178 rtx op0 = *pop0, op1 = *pop1;
8179 enum machine_mode op_mode = GET_MODE (op0);
8180 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8182 /* All of the unordered compare instructions only work on registers.
8183 The same is true of the XFmode compare instructions. The same is
8184 true of the fcomi compare instructions. */
8187 && (fpcmp_mode == CCFPUmode
8188 || op_mode == XFmode
8189 || op_mode == TFmode
8190 || ix86_use_fcomi_compare (code)))
8192 op0 = force_reg (op_mode, op0);
8193 op1 = force_reg (op_mode, op1);
8197 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8198 things around if they appear profitable, otherwise force op0
8201 if (standard_80387_constant_p (op0) == 0
8202 || (GET_CODE (op0) == MEM
8203 && ! (standard_80387_constant_p (op1) == 0
8204 || GET_CODE (op1) == MEM)))
8207 tmp = op0, op0 = op1, op1 = tmp;
8208 code = swap_condition (code);
8211 if (GET_CODE (op0) != REG)
8212 op0 = force_reg (op_mode, op0);
8214 if (CONSTANT_P (op1))
8216 if (standard_80387_constant_p (op1))
8217 op1 = force_reg (op_mode, op1);
8219 op1 = validize_mem (force_const_mem (op_mode, op1));
8223 /* Try to rearrange the comparison to make it cheaper. */
8224 if (ix86_fp_comparison_cost (code)
8225 > ix86_fp_comparison_cost (swap_condition (code))
8226 && (GET_CODE (op1) == REG || !no_new_pseudos))
8229 tmp = op0, op0 = op1, op1 = tmp;
8230 code = swap_condition (code);
8231 if (GET_CODE (op0) != REG)
8232 op0 = force_reg (op_mode, op0);
8240 /* Convert comparison codes we use to represent FP comparison to integer
8241 code that will result in proper branch. Return UNKNOWN if no such code
8243 static enum rtx_code
8244 ix86_fp_compare_code_to_integer (code)
8274 /* Split comparison code CODE into comparisons we can do using branch
8275 instructions. BYPASS_CODE is comparison code for branch that will
8276 branch around FIRST_CODE and SECOND_CODE. If some of branches
8277 is not required, set value to NIL.
8278 We never require more than two branches. */
8280 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8281 enum rtx_code code, *bypass_code, *first_code, *second_code;
8287 /* The fcomi comparison sets flags as follows:
8297 case GT: /* GTU - CF=0 & ZF=0 */
8298 case GE: /* GEU - CF=0 */
8299 case ORDERED: /* PF=0 */
8300 case UNORDERED: /* PF=1 */
8301 case UNEQ: /* EQ - ZF=1 */
8302 case UNLT: /* LTU - CF=1 */
8303 case UNLE: /* LEU - CF=1 | ZF=1 */
8304 case LTGT: /* EQ - ZF=0 */
8306 case LT: /* LTU - CF=1 - fails on unordered */
8308 *bypass_code = UNORDERED;
8310 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8312 *bypass_code = UNORDERED;
8314 case EQ: /* EQ - ZF=1 - fails on unordered */
8316 *bypass_code = UNORDERED;
8318 case NE: /* NE - ZF=0 - fails on unordered */
8320 *second_code = UNORDERED;
8322 case UNGE: /* GEU - CF=0 - fails on unordered */
8324 *second_code = UNORDERED;
8326 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8328 *second_code = UNORDERED;
8333 if (!TARGET_IEEE_FP)
8340 /* Return cost of comparison done fcom + arithmetics operations on AX.
8341 All following functions do use number of instructions as an cost metrics.
8342 In future this should be tweaked to compute bytes for optimize_size and
8343 take into account performance of various instructions on various CPUs. */
8345 ix86_fp_comparison_arithmetics_cost (code)
8348 if (!TARGET_IEEE_FP)
8350 /* The cost of code output by ix86_expand_fp_compare. */
8378 /* Return cost of comparison done using fcomi operation.
8379 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8381 ix86_fp_comparison_fcomi_cost (code)
8384 enum rtx_code bypass_code, first_code, second_code;
8385 /* Return arbitarily high cost when instruction is not supported - this
8386 prevents gcc from using it. */
8389 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8390 return (bypass_code != NIL || second_code != NIL) + 2;
8393 /* Return cost of comparison done using sahf operation.
8394 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8396 ix86_fp_comparison_sahf_cost (code)
8399 enum rtx_code bypass_code, first_code, second_code;
8400 /* Return arbitarily high cost when instruction is not preferred - this
8401 avoids gcc from using it. */
8402 if (!TARGET_USE_SAHF && !optimize_size)
8404 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8405 return (bypass_code != NIL || second_code != NIL) + 3;
8408 /* Compute cost of the comparison done using any method.
8409 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8411 ix86_fp_comparison_cost (code)
8414 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8417 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8418 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8420 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8421 if (min > sahf_cost)
8423 if (min > fcomi_cost)
8428 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8431 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8433 rtx op0, op1, scratch;
8437 enum machine_mode fpcmp_mode, intcmp_mode;
8439 int cost = ix86_fp_comparison_cost (code);
8440 enum rtx_code bypass_code, first_code, second_code;
8442 fpcmp_mode = ix86_fp_compare_mode (code);
8443 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8446 *second_test = NULL_RTX;
8448 *bypass_test = NULL_RTX;
8450 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8452 /* Do fcomi/sahf based test when profitable. */
8453 if ((bypass_code == NIL || bypass_test)
8454 && (second_code == NIL || second_test)
8455 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8459 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8460 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8466 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8467 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8469 scratch = gen_reg_rtx (HImode);
8470 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8471 emit_insn (gen_x86_sahf_1 (scratch));
8474 /* The FP codes work out to act like unsigned. */
8475 intcmp_mode = fpcmp_mode;
8477 if (bypass_code != NIL)
8478 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8479 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8481 if (second_code != NIL)
8482 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8483 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8488 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8489 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8490 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8492 scratch = gen_reg_rtx (HImode);
8493 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8495 /* In the unordered case, we have to check C2 for NaN's, which
8496 doesn't happen to work out to anything nice combination-wise.
8497 So do some bit twiddling on the value we've got in AH to come
8498 up with an appropriate set of condition codes. */
8500 intcmp_mode = CCNOmode;
8505 if (code == GT || !TARGET_IEEE_FP)
8507 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8512 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8513 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8514 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8515 intcmp_mode = CCmode;
8521 if (code == LT && TARGET_IEEE_FP)
8523 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8524 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8525 intcmp_mode = CCmode;
8530 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8536 if (code == GE || !TARGET_IEEE_FP)
8538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8543 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8544 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8551 if (code == LE && TARGET_IEEE_FP)
8553 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8554 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8555 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8556 intcmp_mode = CCmode;
8561 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8567 if (code == EQ && TARGET_IEEE_FP)
8569 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8570 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8571 intcmp_mode = CCmode;
8576 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8583 if (code == NE && TARGET_IEEE_FP)
8585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8586 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8592 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8598 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8602 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8611 /* Return the test that should be put into the flags user, i.e.
8612 the bcc, scc, or cmov instruction. */
8613 return gen_rtx_fmt_ee (code, VOIDmode,
8614 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8619 ix86_expand_compare (code, second_test, bypass_test)
8621 rtx *second_test, *bypass_test;
8624 op0 = ix86_compare_op0;
8625 op1 = ix86_compare_op1;
8628 *second_test = NULL_RTX;
8630 *bypass_test = NULL_RTX;
8632 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8633 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8634 second_test, bypass_test);
8636 ret = ix86_expand_int_compare (code, op0, op1);
8641 /* Return true if the CODE will result in nontrivial jump sequence. */
8643 ix86_fp_jump_nontrivial_p (code)
8646 enum rtx_code bypass_code, first_code, second_code;
8649 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8650 return bypass_code != NIL || second_code != NIL;
8654 ix86_expand_branch (code, label)
8660 switch (GET_MODE (ix86_compare_op0))
8666 tmp = ix86_expand_compare (code, NULL, NULL);
8667 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8668 gen_rtx_LABEL_REF (VOIDmode, label),
8670 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8680 enum rtx_code bypass_code, first_code, second_code;
8682 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8685 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8687 /* Check whether we will use the natural sequence with one jump. If
8688 so, we can expand jump early. Otherwise delay expansion by
8689 creating compound insn to not confuse optimizers. */
8690 if (bypass_code == NIL && second_code == NIL
8693 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8694 gen_rtx_LABEL_REF (VOIDmode, label),
8699 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8700 ix86_compare_op0, ix86_compare_op1);
8701 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8702 gen_rtx_LABEL_REF (VOIDmode, label),
8704 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8706 use_fcomi = ix86_use_fcomi_compare (code);
8707 vec = rtvec_alloc (3 + !use_fcomi);
8708 RTVEC_ELT (vec, 0) = tmp;
8710 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8712 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8715 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8717 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8725 /* Expand DImode branch into multiple compare+branch. */
8727 rtx lo[2], hi[2], label2;
8728 enum rtx_code code1, code2, code3;
8730 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8732 tmp = ix86_compare_op0;
8733 ix86_compare_op0 = ix86_compare_op1;
8734 ix86_compare_op1 = tmp;
8735 code = swap_condition (code);
8737 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8738 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8740 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8741 avoid two branches. This costs one extra insn, so disable when
8742 optimizing for size. */
8744 if ((code == EQ || code == NE)
8746 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8751 if (hi[1] != const0_rtx)
8752 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8753 NULL_RTX, 0, OPTAB_WIDEN);
8756 if (lo[1] != const0_rtx)
8757 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8758 NULL_RTX, 0, OPTAB_WIDEN);
8760 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8761 NULL_RTX, 0, OPTAB_WIDEN);
8763 ix86_compare_op0 = tmp;
8764 ix86_compare_op1 = const0_rtx;
8765 ix86_expand_branch (code, label);
8769 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8770 op1 is a constant and the low word is zero, then we can just
8771 examine the high word. */
8773 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8776 case LT: case LTU: case GE: case GEU:
8777 ix86_compare_op0 = hi[0];
8778 ix86_compare_op1 = hi[1];
8779 ix86_expand_branch (code, label);
8785 /* Otherwise, we need two or three jumps. */
8787 label2 = gen_label_rtx ();
8790 code2 = swap_condition (code);
8791 code3 = unsigned_condition (code);
8795 case LT: case GT: case LTU: case GTU:
8798 case LE: code1 = LT; code2 = GT; break;
8799 case GE: code1 = GT; code2 = LT; break;
8800 case LEU: code1 = LTU; code2 = GTU; break;
8801 case GEU: code1 = GTU; code2 = LTU; break;
8803 case EQ: code1 = NIL; code2 = NE; break;
8804 case NE: code2 = NIL; break;
8812 * if (hi(a) < hi(b)) goto true;
8813 * if (hi(a) > hi(b)) goto false;
8814 * if (lo(a) < lo(b)) goto true;
8818 ix86_compare_op0 = hi[0];
8819 ix86_compare_op1 = hi[1];
8822 ix86_expand_branch (code1, label);
8824 ix86_expand_branch (code2, label2);
8826 ix86_compare_op0 = lo[0];
8827 ix86_compare_op1 = lo[1];
8828 ix86_expand_branch (code3, label);
8831 emit_label (label2);
8840 /* Split branch based on floating point condition. */
8842 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8844 rtx op1, op2, target1, target2, tmp;
8847 rtx label = NULL_RTX;
8849 int bypass_probability = -1, second_probability = -1, probability = -1;
8852 if (target2 != pc_rtx)
8855 code = reverse_condition_maybe_unordered (code);
8860 condition = ix86_expand_fp_compare (code, op1, op2,
8861 tmp, &second, &bypass);
8863 if (split_branch_probability >= 0)
8865 /* Distribute the probabilities across the jumps.
8866 Assume the BYPASS and SECOND to be always test
8868 probability = split_branch_probability;
8870 /* Value of 1 is low enough to make no need for probability
8871 to be updated. Later we may run some experiments and see
8872 if unordered values are more frequent in practice. */
8874 bypass_probability = 1;
8876 second_probability = 1;
8878 if (bypass != NULL_RTX)
8880 label = gen_label_rtx ();
8881 i = emit_jump_insn (gen_rtx_SET
8883 gen_rtx_IF_THEN_ELSE (VOIDmode,
8885 gen_rtx_LABEL_REF (VOIDmode,
8888 if (bypass_probability >= 0)
8890 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8891 GEN_INT (bypass_probability),
8894 i = emit_jump_insn (gen_rtx_SET
8896 gen_rtx_IF_THEN_ELSE (VOIDmode,
8897 condition, target1, target2)));
8898 if (probability >= 0)
8900 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8901 GEN_INT (probability),
8903 if (second != NULL_RTX)
8905 i = emit_jump_insn (gen_rtx_SET
8907 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8909 if (second_probability >= 0)
8911 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8912 GEN_INT (second_probability),
8915 if (label != NULL_RTX)
8920 ix86_expand_setcc (code, dest)
8924 rtx ret, tmp, tmpreg;
8925 rtx second_test, bypass_test;
8927 if (GET_MODE (ix86_compare_op0) == DImode
8929 return 0; /* FAIL */
8931 if (GET_MODE (dest) != QImode)
8934 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8935 PUT_MODE (ret, QImode);
8940 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8941 if (bypass_test || second_test)
8943 rtx test = second_test;
8945 rtx tmp2 = gen_reg_rtx (QImode);
8952 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8954 PUT_MODE (test, QImode);
8955 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8958 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8960 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8963 return 1; /* DONE */
8967 ix86_expand_int_movcc (operands)
8970 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8971 rtx compare_seq, compare_op;
8972 rtx second_test, bypass_test;
8973 enum machine_mode mode = GET_MODE (operands[0]);
8975 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8976 In case comparsion is done with immediate, we can convert it to LTU or
8977 GEU by altering the integer. */
8979 if ((code == LEU || code == GTU)
8980 && GET_CODE (ix86_compare_op1) == CONST_INT
8982 && INTVAL (ix86_compare_op1) != -1
8983 /* For x86-64, the immediate field in the instruction is 32-bit
8984 signed, so we can't increment a DImode value above 0x7fffffff. */
8986 || GET_MODE (ix86_compare_op0) != DImode
8987 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8988 && GET_CODE (operands[2]) == CONST_INT
8989 && GET_CODE (operands[3]) == CONST_INT)
8995 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8996 GET_MODE (ix86_compare_op0));
9000 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9001 compare_seq = get_insns ();
9004 compare_code = GET_CODE (compare_op);
9006 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9007 HImode insns, we'd be swallowed in word prefix ops. */
9010 && (mode != DImode || TARGET_64BIT)
9011 && GET_CODE (operands[2]) == CONST_INT
9012 && GET_CODE (operands[3]) == CONST_INT)
9014 rtx out = operands[0];
9015 HOST_WIDE_INT ct = INTVAL (operands[2]);
9016 HOST_WIDE_INT cf = INTVAL (operands[3]);
9019 if ((compare_code == LTU || compare_code == GEU)
9020 && !second_test && !bypass_test)
9022 /* Detect overlap between destination and compare sources. */
9025 /* To simplify rest of code, restrict to the GEU case. */
9026 if (compare_code == LTU)
9031 compare_code = reverse_condition (compare_code);
9032 code = reverse_condition (code);
9036 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9037 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9038 tmp = gen_reg_rtx (mode);
9040 emit_insn (compare_seq);
9042 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9044 emit_insn (gen_x86_movsicc_0_m1 (tmp));
9056 tmp = expand_simple_binop (mode, PLUS,
9058 tmp, 1, OPTAB_DIRECT);
9069 tmp = expand_simple_binop (mode, IOR,
9071 tmp, 1, OPTAB_DIRECT);
9073 else if (diff == -1 && ct)
9083 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9085 tmp = expand_simple_binop (mode, PLUS,
9087 tmp, 1, OPTAB_DIRECT);
9095 * andl cf - ct, dest
9105 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9108 tmp = expand_simple_binop (mode, AND,
9110 gen_int_mode (cf - ct, mode),
9111 tmp, 1, OPTAB_DIRECT);
9113 tmp = expand_simple_binop (mode, PLUS,
9115 tmp, 1, OPTAB_DIRECT);
9119 emit_move_insn (out, tmp);
9121 return 1; /* DONE */
9128 tmp = ct, ct = cf, cf = tmp;
9130 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9132 /* We may be reversing unordered compare to normal compare, that
9133 is not valid in general (we may convert non-trapping condition
9134 to trapping one), however on i386 we currently emit all
9135 comparisons unordered. */
9136 compare_code = reverse_condition_maybe_unordered (compare_code);
9137 code = reverse_condition_maybe_unordered (code);
9141 compare_code = reverse_condition (compare_code);
9142 code = reverse_condition (code);
9147 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9148 && GET_CODE (ix86_compare_op1) == CONST_INT)
9150 if (ix86_compare_op1 == const0_rtx
9151 && (code == LT || code == GE))
9152 compare_code = code;
9153 else if (ix86_compare_op1 == constm1_rtx)
9157 else if (code == GT)
9162 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9163 if (compare_code != NIL
9164 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9165 && (cf == -1 || ct == -1))
9167 /* If lea code below could be used, only optimize
9168 if it results in a 2 insn sequence. */
9170 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9171 || diff == 3 || diff == 5 || diff == 9)
9172 || (compare_code == LT && ct == -1)
9173 || (compare_code == GE && cf == -1))
9176 * notl op1 (if necessary)
9184 code = reverse_condition (code);
9187 out = emit_store_flag (out, code, ix86_compare_op0,
9188 ix86_compare_op1, VOIDmode, 0, -1);
9190 out = expand_simple_binop (mode, IOR,
9192 out, 1, OPTAB_DIRECT);
9193 if (out != operands[0])
9194 emit_move_insn (operands[0], out);
9196 return 1; /* DONE */
9200 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9201 || diff == 3 || diff == 5 || diff == 9)
9202 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf), 0)))
9208 * lea cf(dest*(ct-cf)),dest
9212 * This also catches the degenerate setcc-only case.
9218 out = emit_store_flag (out, code, ix86_compare_op0,
9219 ix86_compare_op1, VOIDmode, 0, 1);
9222 /* On x86_64 the lea instruction operates on Pmode, so we need
9223 to get arithmetics done in proper mode to match. */
9230 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9234 tmp = gen_rtx_PLUS (mode, tmp, out1);
9240 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9244 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9250 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9251 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9253 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9254 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9258 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9260 if (out != operands[0])
9261 emit_move_insn (operands[0], copy_rtx (out));
9263 return 1; /* DONE */
9267 * General case: Jumpful:
9268 * xorl dest,dest cmpl op1, op2
9269 * cmpl op1, op2 movl ct, dest
9271 * decl dest movl cf, dest
9272 * andl (cf-ct),dest 1:
9277 * This is reasonably steep, but branch mispredict costs are
9278 * high on modern cpus, so consider failing only if optimizing
9281 * %%% Parameterize branch_cost on the tuning architecture, then
9282 * use that. The 80386 couldn't care less about mispredicts.
9285 if (!optimize_size && !TARGET_CMOVE)
9291 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9292 /* We may be reversing unordered compare to normal compare,
9293 that is not valid in general (we may convert non-trapping
9294 condition to trapping one), however on i386 we currently
9295 emit all comparisons unordered. */
9296 code = reverse_condition_maybe_unordered (code);
9299 code = reverse_condition (code);
9300 if (compare_code != NIL)
9301 compare_code = reverse_condition (compare_code);
9305 if (compare_code != NIL)
9307 /* notl op1 (if needed)
9312 For x < 0 (resp. x <= -1) there will be no notl,
9313 so if possible swap the constants to get rid of the
9315 True/false will be -1/0 while code below (store flag
9316 followed by decrement) is 0/-1, so the constants need
9317 to be exchanged once more. */
9319 if (compare_code == GE || !cf)
9321 code = reverse_condition (code);
9326 HOST_WIDE_INT tmp = cf;
9331 out = emit_store_flag (out, code, ix86_compare_op0,
9332 ix86_compare_op1, VOIDmode, 0, -1);
9336 out = emit_store_flag (out, code, ix86_compare_op0,
9337 ix86_compare_op1, VOIDmode, 0, 1);
9339 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9340 out, 1, OPTAB_DIRECT);
9343 out = expand_simple_binop (mode, AND, out,
9344 gen_int_mode (cf - ct, mode),
9345 out, 1, OPTAB_DIRECT);
9347 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9348 out, 1, OPTAB_DIRECT);
9349 if (out != operands[0])
9350 emit_move_insn (operands[0], out);
9352 return 1; /* DONE */
9358 /* Try a few things more with specific constants and a variable. */
9361 rtx var, orig_out, out, tmp;
9364 return 0; /* FAIL */
9366 /* If one of the two operands is an interesting constant, load a
9367 constant with the above and mask it in with a logical operation. */
9369 if (GET_CODE (operands[2]) == CONST_INT)
9372 if (INTVAL (operands[2]) == 0)
9373 operands[3] = constm1_rtx, op = and_optab;
9374 else if (INTVAL (operands[2]) == -1)
9375 operands[3] = const0_rtx, op = ior_optab;
9377 return 0; /* FAIL */
9379 else if (GET_CODE (operands[3]) == CONST_INT)
9382 if (INTVAL (operands[3]) == 0)
9383 operands[2] = constm1_rtx, op = and_optab;
9384 else if (INTVAL (operands[3]) == -1)
9385 operands[2] = const0_rtx, op = ior_optab;
9387 return 0; /* FAIL */
9390 return 0; /* FAIL */
9392 orig_out = operands[0];
9393 tmp = gen_reg_rtx (mode);
9396 /* Recurse to get the constant loaded. */
9397 if (ix86_expand_int_movcc (operands) == 0)
9398 return 0; /* FAIL */
9400 /* Mask in the interesting variable. */
9401 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9403 if (out != orig_out)
9404 emit_move_insn (orig_out, out);
9406 return 1; /* DONE */
9410 * For comparison with above,
9420 if (! nonimmediate_operand (operands[2], mode))
9421 operands[2] = force_reg (mode, operands[2]);
9422 if (! nonimmediate_operand (operands[3], mode))
9423 operands[3] = force_reg (mode, operands[3]);
9425 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9427 rtx tmp = gen_reg_rtx (mode);
9428 emit_move_insn (tmp, operands[3]);
9431 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9433 rtx tmp = gen_reg_rtx (mode);
9434 emit_move_insn (tmp, operands[2]);
9437 if (! register_operand (operands[2], VOIDmode)
9438 && ! register_operand (operands[3], VOIDmode))
9439 operands[2] = force_reg (mode, operands[2]);
9441 emit_insn (compare_seq);
9442 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9443 gen_rtx_IF_THEN_ELSE (mode,
9444 compare_op, operands[2],
9447 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9448 gen_rtx_IF_THEN_ELSE (mode,
9453 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9454 gen_rtx_IF_THEN_ELSE (mode,
9459 return 1; /* DONE */
9463 ix86_expand_fp_movcc (operands)
9468 rtx compare_op, second_test, bypass_test;
9470 /* For SF/DFmode conditional moves based on comparisons
9471 in same mode, we may want to use SSE min/max instructions. */
9472 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9473 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9474 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9475 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9477 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9478 /* We may be called from the post-reload splitter. */
9479 && (!REG_P (operands[0])
9480 || SSE_REG_P (operands[0])
9481 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9483 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9484 code = GET_CODE (operands[1]);
9486 /* See if we have (cross) match between comparison operands and
9487 conditional move operands. */
9488 if (rtx_equal_p (operands[2], op1))
9493 code = reverse_condition_maybe_unordered (code);
9495 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9497 /* Check for min operation. */
9500 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9501 if (memory_operand (op0, VOIDmode))
9502 op0 = force_reg (GET_MODE (operands[0]), op0);
9503 if (GET_MODE (operands[0]) == SFmode)
9504 emit_insn (gen_minsf3 (operands[0], op0, op1));
9506 emit_insn (gen_mindf3 (operands[0], op0, op1));
9509 /* Check for max operation. */
9512 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9513 if (memory_operand (op0, VOIDmode))
9514 op0 = force_reg (GET_MODE (operands[0]), op0);
9515 if (GET_MODE (operands[0]) == SFmode)
9516 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9518 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9522 /* Manage condition to be sse_comparison_operator. In case we are
9523 in non-ieee mode, try to canonicalize the destination operand
9524 to be first in the comparison - this helps reload to avoid extra
9526 if (!sse_comparison_operator (operands[1], VOIDmode)
9527 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9529 rtx tmp = ix86_compare_op0;
9530 ix86_compare_op0 = ix86_compare_op1;
9531 ix86_compare_op1 = tmp;
9532 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9533 VOIDmode, ix86_compare_op0,
9536 /* Similary try to manage result to be first operand of conditional
9537 move. We also don't support the NE comparison on SSE, so try to
9539 if ((rtx_equal_p (operands[0], operands[3])
9540 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9541 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9543 rtx tmp = operands[2];
9544 operands[2] = operands[3];
9546 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9547 (GET_CODE (operands[1])),
9548 VOIDmode, ix86_compare_op0,
9551 if (GET_MODE (operands[0]) == SFmode)
9552 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9553 operands[2], operands[3],
9554 ix86_compare_op0, ix86_compare_op1));
9556 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9557 operands[2], operands[3],
9558 ix86_compare_op0, ix86_compare_op1));
9562 /* The floating point conditional move instructions don't directly
9563 support conditions resulting from a signed integer comparison. */
9565 code = GET_CODE (operands[1]);
9566 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9568 /* The floating point conditional move instructions don't directly
9569 support signed integer comparisons. */
9571 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9573 if (second_test != NULL || bypass_test != NULL)
9575 tmp = gen_reg_rtx (QImode);
9576 ix86_expand_setcc (code, tmp);
9578 ix86_compare_op0 = tmp;
9579 ix86_compare_op1 = const0_rtx;
9580 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9582 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9584 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9585 emit_move_insn (tmp, operands[3]);
9588 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9590 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9591 emit_move_insn (tmp, operands[2]);
9595 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9596 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9602 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9607 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9608 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9616 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9617 works for floating pointer parameters and nonoffsetable memories.
9618 For pushes, it returns just stack offsets; the values will be saved
9619 in the right order. Maximally three parts are generated. */
9622 ix86_split_to_parts (operand, parts, mode)
9625 enum machine_mode mode;
9630 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9632 size = (GET_MODE_SIZE (mode) + 4) / 8;
9634 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9636 if (size < 2 || size > 3)
9639 /* Optimize constant pool reference to immediates. This is used by fp
9640 moves, that force all constants to memory to allow combining. */
9641 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9643 rtx tmp = maybe_get_pool_constant (operand);
9648 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9650 /* The only non-offsetable memories we handle are pushes. */
9651 if (! push_operand (operand, VOIDmode))
9654 operand = copy_rtx (operand);
9655 PUT_MODE (operand, Pmode);
9656 parts[0] = parts[1] = parts[2] = operand;
9658 else if (!TARGET_64BIT)
9661 split_di (&operand, 1, &parts[0], &parts[1]);
9664 if (REG_P (operand))
9666 if (!reload_completed)
9668 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9669 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9671 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9673 else if (offsettable_memref_p (operand))
9675 operand = adjust_address (operand, SImode, 0);
9677 parts[1] = adjust_address (operand, SImode, 4);
9679 parts[2] = adjust_address (operand, SImode, 8);
9681 else if (GET_CODE (operand) == CONST_DOUBLE)
9686 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9691 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9692 parts[2] = gen_int_mode (l[2], SImode);
9695 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9700 parts[1] = gen_int_mode (l[1], SImode);
9701 parts[0] = gen_int_mode (l[0], SImode);
9710 split_ti (&operand, 1, &parts[0], &parts[1]);
9711 if (mode == XFmode || mode == TFmode)
9713 if (REG_P (operand))
9715 if (!reload_completed)
9717 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9718 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9720 else if (offsettable_memref_p (operand))
9722 operand = adjust_address (operand, DImode, 0);
9724 parts[1] = adjust_address (operand, SImode, 8);
9726 else if (GET_CODE (operand) == CONST_DOUBLE)
9731 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9732 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9733 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9734 if (HOST_BITS_PER_WIDE_INT >= 64)
9737 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9738 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9741 parts[0] = immed_double_const (l[0], l[1], DImode);
9742 parts[1] = gen_int_mode (l[2], SImode);
9752 /* Emit insns to perform a move or push of DI, DF, and XF values.
9753 Return false when normal moves are needed; true when all required
9754 insns have been emitted. Operands 2-4 contain the input values
9755 int the correct order; operands 5-7 contain the output values. */
9758 ix86_split_long_move (operands)
9765 enum machine_mode mode = GET_MODE (operands[0]);
9767 /* The DFmode expanders may ask us to move double.
9768 For 64bit target this is single move. By hiding the fact
9769 here we simplify i386.md splitters. */
9770 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9772 /* Optimize constant pool reference to immediates. This is used by
9773 fp moves, that force all constants to memory to allow combining. */
9775 if (GET_CODE (operands[1]) == MEM
9776 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9777 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9778 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9779 if (push_operand (operands[0], VOIDmode))
9781 operands[0] = copy_rtx (operands[0]);
9782 PUT_MODE (operands[0], Pmode);
9785 operands[0] = gen_lowpart (DImode, operands[0]);
9786 operands[1] = gen_lowpart (DImode, operands[1]);
9787 emit_move_insn (operands[0], operands[1]);
9791 /* The only non-offsettable memory we handle is push. */
9792 if (push_operand (operands[0], VOIDmode))
9794 else if (GET_CODE (operands[0]) == MEM
9795 && ! offsettable_memref_p (operands[0]))
9798 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9799 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9801 /* When emitting push, take care for source operands on the stack. */
9802 if (push && GET_CODE (operands[1]) == MEM
9803 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9806 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9807 XEXP (part[1][2], 0));
9808 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9809 XEXP (part[1][1], 0));
9812 /* We need to do copy in the right order in case an address register
9813 of the source overlaps the destination. */
9814 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9816 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9818 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9821 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9824 /* Collision in the middle part can be handled by reordering. */
9825 if (collisions == 1 && nparts == 3
9826 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9829 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9830 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9833 /* If there are more collisions, we can't handle it by reordering.
9834 Do an lea to the last part and use only one colliding move. */
9835 else if (collisions > 1)
9838 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9839 XEXP (part[1][0], 0)));
9840 part[1][0] = change_address (part[1][0],
9841 TARGET_64BIT ? DImode : SImode,
9842 part[0][nparts - 1]);
9843 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9845 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9855 /* We use only first 12 bytes of TFmode value, but for pushing we
9856 are required to adjust stack as if we were pushing real 16byte
9858 if (mode == TFmode && !TARGET_64BIT)
9859 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9861 emit_move_insn (part[0][2], part[1][2]);
9866 /* In 64bit mode we don't have 32bit push available. In case this is
9867 register, it is OK - we will just use larger counterpart. We also
9868 retype memory - these comes from attempt to avoid REX prefix on
9869 moving of second half of TFmode value. */
9870 if (GET_MODE (part[1][1]) == SImode)
9872 if (GET_CODE (part[1][1]) == MEM)
9873 part[1][1] = adjust_address (part[1][1], DImode, 0);
9874 else if (REG_P (part[1][1]))
9875 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9878 if (GET_MODE (part[1][0]) == SImode)
9879 part[1][0] = part[1][1];
9882 emit_move_insn (part[0][1], part[1][1]);
9883 emit_move_insn (part[0][0], part[1][0]);
9887 /* Choose correct order to not overwrite the source before it is copied. */
9888 if ((REG_P (part[0][0])
9889 && REG_P (part[1][1])
9890 && (REGNO (part[0][0]) == REGNO (part[1][1])
9892 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9894 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9898 operands[2] = part[0][2];
9899 operands[3] = part[0][1];
9900 operands[4] = part[0][0];
9901 operands[5] = part[1][2];
9902 operands[6] = part[1][1];
9903 operands[7] = part[1][0];
9907 operands[2] = part[0][1];
9908 operands[3] = part[0][0];
9909 operands[5] = part[1][1];
9910 operands[6] = part[1][0];
9917 operands[2] = part[0][0];
9918 operands[3] = part[0][1];
9919 operands[4] = part[0][2];
9920 operands[5] = part[1][0];
9921 operands[6] = part[1][1];
9922 operands[7] = part[1][2];
9926 operands[2] = part[0][0];
9927 operands[3] = part[0][1];
9928 operands[5] = part[1][0];
9929 operands[6] = part[1][1];
9932 emit_move_insn (operands[2], operands[5]);
9933 emit_move_insn (operands[3], operands[6]);
9935 emit_move_insn (operands[4], operands[7]);
9941 ix86_split_ashldi (operands, scratch)
9942 rtx *operands, scratch;
9944 rtx low[2], high[2];
9947 if (GET_CODE (operands[2]) == CONST_INT)
9949 split_di (operands, 2, low, high);
9950 count = INTVAL (operands[2]) & 63;
9954 emit_move_insn (high[0], low[1]);
9955 emit_move_insn (low[0], const0_rtx);
9958 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9962 if (!rtx_equal_p (operands[0], operands[1]))
9963 emit_move_insn (operands[0], operands[1]);
9964 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9965 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9970 if (!rtx_equal_p (operands[0], operands[1]))
9971 emit_move_insn (operands[0], operands[1]);
9973 split_di (operands, 1, low, high);
9975 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9976 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9978 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9980 if (! no_new_pseudos)
9981 scratch = force_reg (SImode, const0_rtx);
9983 emit_move_insn (scratch, const0_rtx);
9985 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9989 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9994 ix86_split_ashrdi (operands, scratch)
9995 rtx *operands, scratch;
9997 rtx low[2], high[2];
10000 if (GET_CODE (operands[2]) == CONST_INT)
10002 split_di (operands, 2, low, high);
10003 count = INTVAL (operands[2]) & 63;
10007 emit_move_insn (low[0], high[1]);
10009 if (! reload_completed)
10010 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10013 emit_move_insn (high[0], low[0]);
10014 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10018 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10022 if (!rtx_equal_p (operands[0], operands[1]))
10023 emit_move_insn (operands[0], operands[1]);
10024 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10025 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10030 if (!rtx_equal_p (operands[0], operands[1]))
10031 emit_move_insn (operands[0], operands[1]);
10033 split_di (operands, 1, low, high);
10035 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10036 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10038 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10040 if (! no_new_pseudos)
10041 scratch = gen_reg_rtx (SImode);
10042 emit_move_insn (scratch, high[0]);
10043 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10044 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10048 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10053 ix86_split_lshrdi (operands, scratch)
10054 rtx *operands, scratch;
10056 rtx low[2], high[2];
10059 if (GET_CODE (operands[2]) == CONST_INT)
10061 split_di (operands, 2, low, high);
10062 count = INTVAL (operands[2]) & 63;
10066 emit_move_insn (low[0], high[1]);
10067 emit_move_insn (high[0], const0_rtx);
10070 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10074 if (!rtx_equal_p (operands[0], operands[1]))
10075 emit_move_insn (operands[0], operands[1]);
10076 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10077 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10082 if (!rtx_equal_p (operands[0], operands[1]))
10083 emit_move_insn (operands[0], operands[1]);
10085 split_di (operands, 1, low, high);
10087 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10088 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10090 /* Heh. By reversing the arguments, we can reuse this pattern. */
10091 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10093 if (! no_new_pseudos)
10094 scratch = force_reg (SImode, const0_rtx);
10096 emit_move_insn (scratch, const0_rtx);
10098 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10102 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10106 /* Helper function for the string operations below. Dest VARIABLE whether
10107 it is aligned to VALUE bytes. If true, jump to the label. */
10109 ix86_expand_aligntest (variable, value)
10113 rtx label = gen_label_rtx ();
10114 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10115 if (GET_MODE (variable) == DImode)
10116 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10118 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10119 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10124 /* Adjust COUNTER by the VALUE. */
10126 ix86_adjust_counter (countreg, value)
10128 HOST_WIDE_INT value;
10130 if (GET_MODE (countreg) == DImode)
10131 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10133 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10136 /* Zero extend possibly SImode EXP to Pmode register. */
10138 ix86_zero_extend_to_Pmode (exp)
10142 if (GET_MODE (exp) == VOIDmode)
10143 return force_reg (Pmode, exp);
10144 if (GET_MODE (exp) == Pmode)
10145 return copy_to_mode_reg (Pmode, exp);
10146 r = gen_reg_rtx (Pmode);
10147 emit_insn (gen_zero_extendsidi2 (r, exp));
10151 /* Expand string move (memcpy) operation. Use i386 string operations when
10152 profitable. expand_clrstr contains similar code. */
10154 ix86_expand_movstr (dst, src, count_exp, align_exp)
10155 rtx dst, src, count_exp, align_exp;
10157 rtx srcreg, destreg, countreg;
10158 enum machine_mode counter_mode;
10159 HOST_WIDE_INT align = 0;
10160 unsigned HOST_WIDE_INT count = 0;
10165 if (GET_CODE (align_exp) == CONST_INT)
10166 align = INTVAL (align_exp);
10168 /* This simple hack avoids all inlining code and simplifies code below. */
10169 if (!TARGET_ALIGN_STRINGOPS)
10172 if (GET_CODE (count_exp) == CONST_INT)
10173 count = INTVAL (count_exp);
10175 /* Figure out proper mode for counter. For 32bits it is always SImode,
10176 for 64bits use SImode when possible, otherwise DImode.
10177 Set count to number of bytes copied when known at compile time. */
10178 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10179 || x86_64_zero_extended_value (count_exp))
10180 counter_mode = SImode;
10182 counter_mode = DImode;
10184 if (counter_mode != SImode && counter_mode != DImode)
10187 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10188 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10190 emit_insn (gen_cld ());
10192 /* When optimizing for size emit simple rep ; movsb instruction for
10193 counts not divisible by 4. */
10195 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10197 countreg = ix86_zero_extend_to_Pmode (count_exp);
10199 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10200 destreg, srcreg, countreg));
10202 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10203 destreg, srcreg, countreg));
10206 /* For constant aligned (or small unaligned) copies use rep movsl
10207 followed by code copying the rest. For PentiumPro ensure 8 byte
10208 alignment to allow rep movsl acceleration. */
10210 else if (count != 0
10212 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10213 || optimize_size || count < (unsigned int) 64))
10215 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10216 if (count & ~(size - 1))
10218 countreg = copy_to_mode_reg (counter_mode,
10219 GEN_INT ((count >> (size == 4 ? 2 : 3))
10220 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10221 countreg = ix86_zero_extend_to_Pmode (countreg);
10225 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10226 destreg, srcreg, countreg));
10228 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10229 destreg, srcreg, countreg));
10232 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10233 destreg, srcreg, countreg));
10235 if (size == 8 && (count & 0x04))
10236 emit_insn (gen_strmovsi (destreg, srcreg));
10238 emit_insn (gen_strmovhi (destreg, srcreg));
10240 emit_insn (gen_strmovqi (destreg, srcreg));
10242 /* The generic code based on the glibc implementation:
10243 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10244 allowing accelerated copying there)
10245 - copy the data using rep movsl
10246 - copy the rest. */
10251 int desired_alignment = (TARGET_PENTIUMPRO
10252 && (count == 0 || count >= (unsigned int) 260)
10253 ? 8 : UNITS_PER_WORD);
10255 /* In case we don't know anything about the alignment, default to
10256 library version, since it is usually equally fast and result in
10258 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10264 if (TARGET_SINGLE_STRINGOP)
10265 emit_insn (gen_cld ());
10267 countreg2 = gen_reg_rtx (Pmode);
10268 countreg = copy_to_mode_reg (counter_mode, count_exp);
10270 /* We don't use loops to align destination and to copy parts smaller
10271 than 4 bytes, because gcc is able to optimize such code better (in
10272 the case the destination or the count really is aligned, gcc is often
10273 able to predict the branches) and also it is friendlier to the
10274 hardware branch prediction.
10276 Using loops is benefical for generic case, because we can
10277 handle small counts using the loops. Many CPUs (such as Athlon)
10278 have large REP prefix setup costs.
10280 This is quite costy. Maybe we can revisit this decision later or
10281 add some customizability to this code. */
10283 if (count == 0 && align < desired_alignment)
10285 label = gen_label_rtx ();
10286 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10287 LEU, 0, counter_mode, 1, label);
10291 rtx label = ix86_expand_aligntest (destreg, 1);
10292 emit_insn (gen_strmovqi (destreg, srcreg));
10293 ix86_adjust_counter (countreg, 1);
10294 emit_label (label);
10295 LABEL_NUSES (label) = 1;
10299 rtx label = ix86_expand_aligntest (destreg, 2);
10300 emit_insn (gen_strmovhi (destreg, srcreg));
10301 ix86_adjust_counter (countreg, 2);
10302 emit_label (label);
10303 LABEL_NUSES (label) = 1;
10305 if (align <= 4 && desired_alignment > 4)
10307 rtx label = ix86_expand_aligntest (destreg, 4);
10308 emit_insn (gen_strmovsi (destreg, srcreg));
10309 ix86_adjust_counter (countreg, 4);
10310 emit_label (label);
10311 LABEL_NUSES (label) = 1;
10314 if (label && desired_alignment > 4 && !TARGET_64BIT)
10316 emit_label (label);
10317 LABEL_NUSES (label) = 1;
10320 if (!TARGET_SINGLE_STRINGOP)
10321 emit_insn (gen_cld ());
10324 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10326 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10327 destreg, srcreg, countreg2));
10331 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10332 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10333 destreg, srcreg, countreg2));
10338 emit_label (label);
10339 LABEL_NUSES (label) = 1;
10341 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10342 emit_insn (gen_strmovsi (destreg, srcreg));
10343 if ((align <= 4 || count == 0) && TARGET_64BIT)
10345 rtx label = ix86_expand_aligntest (countreg, 4);
10346 emit_insn (gen_strmovsi (destreg, srcreg));
10347 emit_label (label);
10348 LABEL_NUSES (label) = 1;
10350 if (align > 2 && count != 0 && (count & 2))
10351 emit_insn (gen_strmovhi (destreg, srcreg));
10352 if (align <= 2 || count == 0)
10354 rtx label = ix86_expand_aligntest (countreg, 2);
10355 emit_insn (gen_strmovhi (destreg, srcreg));
10356 emit_label (label);
10357 LABEL_NUSES (label) = 1;
10359 if (align > 1 && count != 0 && (count & 1))
10360 emit_insn (gen_strmovqi (destreg, srcreg));
10361 if (align <= 1 || count == 0)
10363 rtx label = ix86_expand_aligntest (countreg, 1);
10364 emit_insn (gen_strmovqi (destreg, srcreg));
10365 emit_label (label);
10366 LABEL_NUSES (label) = 1;
10370 insns = get_insns ();
10373 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10378 /* Expand string clear operation (bzero). Use i386 string operations when
10379 profitable. expand_movstr contains similar code. */
10381 ix86_expand_clrstr (src, count_exp, align_exp)
10382 rtx src, count_exp, align_exp;
10384 rtx destreg, zeroreg, countreg;
10385 enum machine_mode counter_mode;
10386 HOST_WIDE_INT align = 0;
10387 unsigned HOST_WIDE_INT count = 0;
10389 if (GET_CODE (align_exp) == CONST_INT)
10390 align = INTVAL (align_exp);
10392 /* This simple hack avoids all inlining code and simplifies code below. */
10393 if (!TARGET_ALIGN_STRINGOPS)
10396 if (GET_CODE (count_exp) == CONST_INT)
10397 count = INTVAL (count_exp);
10398 /* Figure out proper mode for counter. For 32bits it is always SImode,
10399 for 64bits use SImode when possible, otherwise DImode.
10400 Set count to number of bytes copied when known at compile time. */
10401 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10402 || x86_64_zero_extended_value (count_exp))
10403 counter_mode = SImode;
10405 counter_mode = DImode;
10407 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10409 emit_insn (gen_cld ());
10411 /* When optimizing for size emit simple rep ; movsb instruction for
10412 counts not divisible by 4. */
10414 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10416 countreg = ix86_zero_extend_to_Pmode (count_exp);
10417 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10419 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10420 destreg, countreg));
10422 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10423 destreg, countreg));
10425 else if (count != 0
10427 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10428 || optimize_size || count < (unsigned int) 64))
10430 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10431 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10432 if (count & ~(size - 1))
10434 countreg = copy_to_mode_reg (counter_mode,
10435 GEN_INT ((count >> (size == 4 ? 2 : 3))
10436 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10437 countreg = ix86_zero_extend_to_Pmode (countreg);
10441 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10442 destreg, countreg));
10444 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10445 destreg, countreg));
10448 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10449 destreg, countreg));
10451 if (size == 8 && (count & 0x04))
10452 emit_insn (gen_strsetsi (destreg,
10453 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10455 emit_insn (gen_strsethi (destreg,
10456 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10458 emit_insn (gen_strsetqi (destreg,
10459 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10465 /* Compute desired alignment of the string operation. */
10466 int desired_alignment = (TARGET_PENTIUMPRO
10467 && (count == 0 || count >= (unsigned int) 260)
10468 ? 8 : UNITS_PER_WORD);
10470 /* In case we don't know anything about the alignment, default to
10471 library version, since it is usually equally fast and result in
10473 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10476 if (TARGET_SINGLE_STRINGOP)
10477 emit_insn (gen_cld ());
10479 countreg2 = gen_reg_rtx (Pmode);
10480 countreg = copy_to_mode_reg (counter_mode, count_exp);
10481 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10483 if (count == 0 && align < desired_alignment)
10485 label = gen_label_rtx ();
10486 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10487 LEU, 0, counter_mode, 1, label);
10491 rtx label = ix86_expand_aligntest (destreg, 1);
10492 emit_insn (gen_strsetqi (destreg,
10493 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10494 ix86_adjust_counter (countreg, 1);
10495 emit_label (label);
10496 LABEL_NUSES (label) = 1;
10500 rtx label = ix86_expand_aligntest (destreg, 2);
10501 emit_insn (gen_strsethi (destreg,
10502 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10503 ix86_adjust_counter (countreg, 2);
10504 emit_label (label);
10505 LABEL_NUSES (label) = 1;
10507 if (align <= 4 && desired_alignment > 4)
10509 rtx label = ix86_expand_aligntest (destreg, 4);
10510 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10511 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10513 ix86_adjust_counter (countreg, 4);
10514 emit_label (label);
10515 LABEL_NUSES (label) = 1;
10518 if (label && desired_alignment > 4 && !TARGET_64BIT)
10520 emit_label (label);
10521 LABEL_NUSES (label) = 1;
10525 if (!TARGET_SINGLE_STRINGOP)
10526 emit_insn (gen_cld ());
10529 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10531 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10532 destreg, countreg2));
10536 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10537 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10538 destreg, countreg2));
10542 emit_label (label);
10543 LABEL_NUSES (label) = 1;
10546 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10547 emit_insn (gen_strsetsi (destreg,
10548 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10549 if (TARGET_64BIT && (align <= 4 || count == 0))
10551 rtx label = ix86_expand_aligntest (countreg, 4);
10552 emit_insn (gen_strsetsi (destreg,
10553 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10554 emit_label (label);
10555 LABEL_NUSES (label) = 1;
10557 if (align > 2 && count != 0 && (count & 2))
10558 emit_insn (gen_strsethi (destreg,
10559 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10560 if (align <= 2 || count == 0)
10562 rtx label = ix86_expand_aligntest (countreg, 2);
10563 emit_insn (gen_strsethi (destreg,
10564 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10565 emit_label (label);
10566 LABEL_NUSES (label) = 1;
10568 if (align > 1 && count != 0 && (count & 1))
10569 emit_insn (gen_strsetqi (destreg,
10570 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10571 if (align <= 1 || count == 0)
10573 rtx label = ix86_expand_aligntest (countreg, 1);
10574 emit_insn (gen_strsetqi (destreg,
10575 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10576 emit_label (label);
10577 LABEL_NUSES (label) = 1;
10582 /* Expand strlen. */
10584 ix86_expand_strlen (out, src, eoschar, align)
10585 rtx out, src, eoschar, align;
10587 rtx addr, scratch1, scratch2, scratch3, scratch4;
10589 /* The generic case of strlen expander is long. Avoid it's
10590 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10592 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10593 && !TARGET_INLINE_ALL_STRINGOPS
10595 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10598 addr = force_reg (Pmode, XEXP (src, 0));
10599 scratch1 = gen_reg_rtx (Pmode);
10601 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10604 /* Well it seems that some optimizer does not combine a call like
10605 foo(strlen(bar), strlen(bar));
10606 when the move and the subtraction is done here. It does calculate
10607 the length just once when these instructions are done inside of
10608 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10609 often used and I use one fewer register for the lifetime of
10610 output_strlen_unroll() this is better. */
10612 emit_move_insn (out, addr);
10614 ix86_expand_strlensi_unroll_1 (out, align);
10616 /* strlensi_unroll_1 returns the address of the zero at the end of
10617 the string, like memchr(), so compute the length by subtracting
10618 the start address. */
10620 emit_insn (gen_subdi3 (out, out, addr));
10622 emit_insn (gen_subsi3 (out, out, addr));
10626 scratch2 = gen_reg_rtx (Pmode);
10627 scratch3 = gen_reg_rtx (Pmode);
10628 scratch4 = force_reg (Pmode, constm1_rtx);
10630 emit_move_insn (scratch3, addr);
10631 eoschar = force_reg (QImode, eoschar);
10633 emit_insn (gen_cld ());
10636 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10637 align, scratch4, scratch3));
10638 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10639 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10643 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10644 align, scratch4, scratch3));
10645 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10646 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10652 /* Expand the appropriate insns for doing strlen if not just doing
10655 out = result, initialized with the start address
10656 align_rtx = alignment of the address.
10657 scratch = scratch register, initialized with the startaddress when
10658 not aligned, otherwise undefined
10660 This is just the body. It needs the initialisations mentioned above and
10661 some address computing at the end. These things are done in i386.md. */
10664 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10665 rtx out, align_rtx;
10669 rtx align_2_label = NULL_RTX;
10670 rtx align_3_label = NULL_RTX;
10671 rtx align_4_label = gen_label_rtx ();
10672 rtx end_0_label = gen_label_rtx ();
10674 rtx tmpreg = gen_reg_rtx (SImode);
10675 rtx scratch = gen_reg_rtx (SImode);
10678 if (GET_CODE (align_rtx) == CONST_INT)
10679 align = INTVAL (align_rtx);
10681 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10683 /* Is there a known alignment and is it less than 4? */
10686 rtx scratch1 = gen_reg_rtx (Pmode);
10687 emit_move_insn (scratch1, out);
10688 /* Is there a known alignment and is it not 2? */
10691 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10692 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10694 /* Leave just the 3 lower bits. */
10695 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10696 NULL_RTX, 0, OPTAB_WIDEN);
10698 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10699 Pmode, 1, align_4_label);
10700 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10701 Pmode, 1, align_2_label);
10702 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10703 Pmode, 1, align_3_label);
10707 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10708 check if is aligned to 4 - byte. */
10710 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10711 NULL_RTX, 0, OPTAB_WIDEN);
10713 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10714 Pmode, 1, align_4_label);
10717 mem = gen_rtx_MEM (QImode, out);
10719 /* Now compare the bytes. */
10721 /* Compare the first n unaligned byte on a byte per byte basis. */
10722 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10723 QImode, 1, end_0_label);
10725 /* Increment the address. */
10727 emit_insn (gen_adddi3 (out, out, const1_rtx));
10729 emit_insn (gen_addsi3 (out, out, const1_rtx));
10731 /* Not needed with an alignment of 2 */
10734 emit_label (align_2_label);
10736 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10740 emit_insn (gen_adddi3 (out, out, const1_rtx));
10742 emit_insn (gen_addsi3 (out, out, const1_rtx));
10744 emit_label (align_3_label);
10747 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10751 emit_insn (gen_adddi3 (out, out, const1_rtx));
10753 emit_insn (gen_addsi3 (out, out, const1_rtx));
10756 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10757 align this loop. It gives only huge programs, but does not help to
10759 emit_label (align_4_label);
10761 mem = gen_rtx_MEM (SImode, out);
10762 emit_move_insn (scratch, mem);
10764 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10766 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10768 /* This formula yields a nonzero result iff one of the bytes is zero.
10769 This saves three branches inside loop and many cycles. */
10771 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10772 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10773 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10774 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10775 gen_int_mode (0x80808080, SImode)));
10776 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10781 rtx reg = gen_reg_rtx (SImode);
10782 rtx reg2 = gen_reg_rtx (Pmode);
10783 emit_move_insn (reg, tmpreg);
10784 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10786 /* If zero is not in the first two bytes, move two bytes forward. */
10787 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10788 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10789 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10790 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10791 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10794 /* Emit lea manually to avoid clobbering of flags. */
10795 emit_insn (gen_rtx_SET (SImode, reg2,
10796 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10798 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10799 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10800 emit_insn (gen_rtx_SET (VOIDmode, out,
10801 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10808 rtx end_2_label = gen_label_rtx ();
10809 /* Is zero in the first two bytes? */
10811 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10812 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10813 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10814 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10815 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10817 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10818 JUMP_LABEL (tmp) = end_2_label;
10820 /* Not in the first two. Move two bytes forward. */
10821 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10823 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10825 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10827 emit_label (end_2_label);
10831 /* Avoid branch in fixing the byte. */
10832 tmpreg = gen_lowpart (QImode, tmpreg);
10833 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10835 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10837 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10839 emit_label (end_0_label);
10843 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10844 rtx retval, fnaddr, callarg1, callarg2, pop;
10846 rtx use = NULL, call;
10848 if (pop == const0_rtx)
10850 if (TARGET_64BIT && pop)
10854 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10855 fnaddr = machopic_indirect_call_target (fnaddr);
10857 /* Static functions and indirect calls don't need the pic register. */
10858 if (! TARGET_64BIT && flag_pic
10859 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10860 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10861 use_reg (&use, pic_offset_table_rtx);
10863 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10865 rtx al = gen_rtx_REG (QImode, 0);
10866 emit_move_insn (al, callarg2);
10867 use_reg (&use, al);
10869 #endif /* TARGET_MACHO */
10871 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10873 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10874 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10877 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10879 call = gen_rtx_SET (VOIDmode, retval, call);
10882 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10883 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10884 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10887 call = emit_call_insn (call);
10889 CALL_INSN_FUNCTION_USAGE (call) = use;
10893 /* Clear stack slot assignments remembered from previous functions.
10894 This is called from INIT_EXPANDERS once before RTL is emitted for each
10897 static struct machine_function *
10898 ix86_init_machine_status ()
10900 return ggc_alloc_cleared (sizeof (struct machine_function));
10903 /* Return a MEM corresponding to a stack slot with mode MODE.
10904 Allocate a new slot if necessary.
10906 The RTL for a function can have several slots available: N is
10907 which slot to use. */
10910 assign_386_stack_local (mode, n)
10911 enum machine_mode mode;
10914 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10917 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10918 ix86_stack_locals[(int) mode][n]
10919 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10921 return ix86_stack_locals[(int) mode][n];
10924 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10926 static GTY(()) rtx ix86_tls_symbol;
10928 ix86_tls_get_addr ()
10931 if (!ix86_tls_symbol)
10933 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
10934 (TARGET_GNU_TLS && !TARGET_64BIT)
10935 ? "___tls_get_addr"
10936 : "__tls_get_addr");
10939 return ix86_tls_symbol;
10942 /* Calculate the length of the memory address in the instruction
10943 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10946 memory_address_length (addr)
10949 struct ix86_address parts;
10950 rtx base, index, disp;
10953 if (GET_CODE (addr) == PRE_DEC
10954 || GET_CODE (addr) == POST_INC
10955 || GET_CODE (addr) == PRE_MODIFY
10956 || GET_CODE (addr) == POST_MODIFY)
10959 if (! ix86_decompose_address (addr, &parts))
10963 index = parts.index;
10967 /* Register Indirect. */
10968 if (base && !index && !disp)
10970 /* Special cases: ebp and esp need the two-byte modrm form. */
10971 if (addr == stack_pointer_rtx
10972 || addr == arg_pointer_rtx
10973 || addr == frame_pointer_rtx
10974 || addr == hard_frame_pointer_rtx)
10978 /* Direct Addressing. */
10979 else if (disp && !base && !index)
10984 /* Find the length of the displacement constant. */
10987 if (GET_CODE (disp) == CONST_INT
10988 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10994 /* An index requires the two-byte modrm form. */
11002 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11003 is set, expect that insn have 8bit immediate alternative. */
11005 ix86_attr_length_immediate_default (insn, shortform)
11011 extract_insn_cached (insn);
11012 for (i = recog_data.n_operands - 1; i >= 0; --i)
11013 if (CONSTANT_P (recog_data.operand[i]))
11018 && GET_CODE (recog_data.operand[i]) == CONST_INT
11019 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11023 switch (get_attr_mode (insn))
11034 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11039 fatal_insn ("unknown insn mode", insn);
11045 /* Compute default value for "length_address" attribute. */
11047 ix86_attr_length_address_default (insn)
11051 extract_insn_cached (insn);
11052 for (i = recog_data.n_operands - 1; i >= 0; --i)
11053 if (GET_CODE (recog_data.operand[i]) == MEM)
11055 return memory_address_length (XEXP (recog_data.operand[i], 0));
11061 /* Return the maximum number of instructions a cpu can issue. */
11068 case PROCESSOR_PENTIUM:
11072 case PROCESSOR_PENTIUMPRO:
11073 case PROCESSOR_PENTIUM4:
11074 case PROCESSOR_ATHLON:
11082 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11083 by DEP_INSN and nothing set by DEP_INSN. */
11086 ix86_flags_dependant (insn, dep_insn, insn_type)
11087 rtx insn, dep_insn;
11088 enum attr_type insn_type;
11092 /* Simplify the test for uninteresting insns. */
11093 if (insn_type != TYPE_SETCC
11094 && insn_type != TYPE_ICMOV
11095 && insn_type != TYPE_FCMOV
11096 && insn_type != TYPE_IBR)
11099 if ((set = single_set (dep_insn)) != 0)
11101 set = SET_DEST (set);
11104 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11105 && XVECLEN (PATTERN (dep_insn), 0) == 2
11106 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11107 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11109 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11110 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11115 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11118 /* This test is true if the dependent insn reads the flags but
11119 not any other potentially set register. */
11120 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11123 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11129 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11130 address with operands set by DEP_INSN. */
11133 ix86_agi_dependant (insn, dep_insn, insn_type)
11134 rtx insn, dep_insn;
11135 enum attr_type insn_type;
11139 if (insn_type == TYPE_LEA
11142 addr = PATTERN (insn);
11143 if (GET_CODE (addr) == SET)
11145 else if (GET_CODE (addr) == PARALLEL
11146 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11147 addr = XVECEXP (addr, 0, 0);
11150 addr = SET_SRC (addr);
11155 extract_insn_cached (insn);
11156 for (i = recog_data.n_operands - 1; i >= 0; --i)
11157 if (GET_CODE (recog_data.operand[i]) == MEM)
11159 addr = XEXP (recog_data.operand[i], 0);
11166 return modified_in_p (addr, dep_insn);
11170 ix86_adjust_cost (insn, link, dep_insn, cost)
11171 rtx insn, link, dep_insn;
11174 enum attr_type insn_type, dep_insn_type;
11175 enum attr_memory memory, dep_memory;
11177 int dep_insn_code_number;
11179 /* Anti and output depenancies have zero cost on all CPUs. */
11180 if (REG_NOTE_KIND (link) != 0)
11183 dep_insn_code_number = recog_memoized (dep_insn);
11185 /* If we can't recognize the insns, we can't really do anything. */
11186 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11189 insn_type = get_attr_type (insn);
11190 dep_insn_type = get_attr_type (dep_insn);
11194 case PROCESSOR_PENTIUM:
11195 /* Address Generation Interlock adds a cycle of latency. */
11196 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11199 /* ??? Compares pair with jump/setcc. */
11200 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11203 /* Floating point stores require value to be ready one cycle ealier. */
11204 if (insn_type == TYPE_FMOV
11205 && get_attr_memory (insn) == MEMORY_STORE
11206 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11210 case PROCESSOR_PENTIUMPRO:
11211 memory = get_attr_memory (insn);
11212 dep_memory = get_attr_memory (dep_insn);
11214 /* Since we can't represent delayed latencies of load+operation,
11215 increase the cost here for non-imov insns. */
11216 if (dep_insn_type != TYPE_IMOV
11217 && dep_insn_type != TYPE_FMOV
11218 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11221 /* INT->FP conversion is expensive. */
11222 if (get_attr_fp_int_src (dep_insn))
11225 /* There is one cycle extra latency between an FP op and a store. */
11226 if (insn_type == TYPE_FMOV
11227 && (set = single_set (dep_insn)) != NULL_RTX
11228 && (set2 = single_set (insn)) != NULL_RTX
11229 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11230 && GET_CODE (SET_DEST (set2)) == MEM)
11233 /* Show ability of reorder buffer to hide latency of load by executing
11234 in parallel with previous instruction in case
11235 previous instruction is not needed to compute the address. */
11236 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11237 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11239 /* Claim moves to take one cycle, as core can issue one load
11240 at time and the next load can start cycle later. */
11241 if (dep_insn_type == TYPE_IMOV
11242 || dep_insn_type == TYPE_FMOV)
11250 memory = get_attr_memory (insn);
11251 dep_memory = get_attr_memory (dep_insn);
11252 /* The esp dependency is resolved before the instruction is really
11254 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11255 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11258 /* Since we can't represent delayed latencies of load+operation,
11259 increase the cost here for non-imov insns. */
11260 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11261 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11263 /* INT->FP conversion is expensive. */
11264 if (get_attr_fp_int_src (dep_insn))
11267 /* Show ability of reorder buffer to hide latency of load by executing
11268 in parallel with previous instruction in case
11269 previous instruction is not needed to compute the address. */
11270 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11271 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11273 /* Claim moves to take one cycle, as core can issue one load
11274 at time and the next load can start cycle later. */
11275 if (dep_insn_type == TYPE_IMOV
11276 || dep_insn_type == TYPE_FMOV)
11285 case PROCESSOR_ATHLON:
11286 memory = get_attr_memory (insn);
11287 dep_memory = get_attr_memory (dep_insn);
11289 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11291 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11296 /* Show ability of reorder buffer to hide latency of load by executing
11297 in parallel with previous instruction in case
11298 previous instruction is not needed to compute the address. */
11299 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11300 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11302 /* Claim moves to take one cycle, as core can issue one load
11303 at time and the next load can start cycle later. */
11304 if (dep_insn_type == TYPE_IMOV
11305 || dep_insn_type == TYPE_FMOV)
11307 else if (cost >= 3)
11322 struct ppro_sched_data
11325 int issued_this_cycle;
11329 static enum attr_ppro_uops
11330 ix86_safe_ppro_uops (insn)
11333 if (recog_memoized (insn) >= 0)
11334 return get_attr_ppro_uops (insn);
11336 return PPRO_UOPS_MANY;
11340 ix86_dump_ppro_packet (dump)
11343 if (ix86_sched_data.ppro.decode[0])
11345 fprintf (dump, "PPRO packet: %d",
11346 INSN_UID (ix86_sched_data.ppro.decode[0]));
11347 if (ix86_sched_data.ppro.decode[1])
11348 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11349 if (ix86_sched_data.ppro.decode[2])
11350 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11351 fputc ('\n', dump);
11355 /* We're beginning a new block. Initialize data structures as necessary. */
11358 ix86_sched_init (dump, sched_verbose, veclen)
11359 FILE *dump ATTRIBUTE_UNUSED;
11360 int sched_verbose ATTRIBUTE_UNUSED;
11361 int veclen ATTRIBUTE_UNUSED;
11363 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11366 /* Shift INSN to SLOT, and shift everything else down. */
11369 ix86_reorder_insn (insnp, slot)
11376 insnp[0] = insnp[1];
11377 while (++insnp != slot);
11383 ix86_sched_reorder_ppro (ready, e_ready)
11388 enum attr_ppro_uops cur_uops;
11389 int issued_this_cycle;
11393 /* At this point .ppro.decode contains the state of the three
11394 decoders from last "cycle". That is, those insns that were
11395 actually independent. But here we're scheduling for the
11396 decoder, and we may find things that are decodable in the
11399 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11400 issued_this_cycle = 0;
11403 cur_uops = ix86_safe_ppro_uops (*insnp);
11405 /* If the decoders are empty, and we've a complex insn at the
11406 head of the priority queue, let it issue without complaint. */
11407 if (decode[0] == NULL)
11409 if (cur_uops == PPRO_UOPS_MANY)
11411 decode[0] = *insnp;
11415 /* Otherwise, search for a 2-4 uop unsn to issue. */
11416 while (cur_uops != PPRO_UOPS_FEW)
11418 if (insnp == ready)
11420 cur_uops = ix86_safe_ppro_uops (*--insnp);
11423 /* If so, move it to the head of the line. */
11424 if (cur_uops == PPRO_UOPS_FEW)
11425 ix86_reorder_insn (insnp, e_ready);
11427 /* Issue the head of the queue. */
11428 issued_this_cycle = 1;
11429 decode[0] = *e_ready--;
11432 /* Look for simple insns to fill in the other two slots. */
11433 for (i = 1; i < 3; ++i)
11434 if (decode[i] == NULL)
11436 if (ready > e_ready)
11440 cur_uops = ix86_safe_ppro_uops (*insnp);
11441 while (cur_uops != PPRO_UOPS_ONE)
11443 if (insnp == ready)
11445 cur_uops = ix86_safe_ppro_uops (*--insnp);
11448 /* Found one. Move it to the head of the queue and issue it. */
11449 if (cur_uops == PPRO_UOPS_ONE)
11451 ix86_reorder_insn (insnp, e_ready);
11452 decode[i] = *e_ready--;
11453 issued_this_cycle++;
11457 /* ??? Didn't find one. Ideally, here we would do a lazy split
11458 of 2-uop insns, issue one and queue the other. */
11462 if (issued_this_cycle == 0)
11463 issued_this_cycle = 1;
11464 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11467 /* We are about to being issuing insns for this clock cycle.
11468 Override the default sort algorithm to better slot instructions. */
11470 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11471 FILE *dump ATTRIBUTE_UNUSED;
11472 int sched_verbose ATTRIBUTE_UNUSED;
11475 int clock_var ATTRIBUTE_UNUSED;
11477 int n_ready = *n_readyp;
11478 rtx *e_ready = ready + n_ready - 1;
11480 /* Make sure to go ahead and initialize key items in
11481 ix86_sched_data if we are not going to bother trying to
11482 reorder the ready queue. */
11485 ix86_sched_data.ppro.issued_this_cycle = 1;
11494 case PROCESSOR_PENTIUMPRO:
11495 ix86_sched_reorder_ppro (ready, e_ready);
11500 return ix86_issue_rate ();
11503 /* We are about to issue INSN. Return the number of insns left on the
11504 ready queue that can be issued this cycle. */
11507 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11511 int can_issue_more;
11517 return can_issue_more - 1;
11519 case PROCESSOR_PENTIUMPRO:
11521 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11523 if (uops == PPRO_UOPS_MANY)
11526 ix86_dump_ppro_packet (dump);
11527 ix86_sched_data.ppro.decode[0] = insn;
11528 ix86_sched_data.ppro.decode[1] = NULL;
11529 ix86_sched_data.ppro.decode[2] = NULL;
11531 ix86_dump_ppro_packet (dump);
11532 ix86_sched_data.ppro.decode[0] = NULL;
11534 else if (uops == PPRO_UOPS_FEW)
11537 ix86_dump_ppro_packet (dump);
11538 ix86_sched_data.ppro.decode[0] = insn;
11539 ix86_sched_data.ppro.decode[1] = NULL;
11540 ix86_sched_data.ppro.decode[2] = NULL;
11544 for (i = 0; i < 3; ++i)
11545 if (ix86_sched_data.ppro.decode[i] == NULL)
11547 ix86_sched_data.ppro.decode[i] = insn;
11555 ix86_dump_ppro_packet (dump);
11556 ix86_sched_data.ppro.decode[0] = NULL;
11557 ix86_sched_data.ppro.decode[1] = NULL;
11558 ix86_sched_data.ppro.decode[2] = NULL;
11562 return --ix86_sched_data.ppro.issued_this_cycle;
11567 ia32_use_dfa_pipeline_interface ()
11569 if (ix86_cpu == PROCESSOR_PENTIUM)
11574 /* How many alternative schedules to try. This should be as wide as the
11575 scheduling freedom in the DFA, but no wider. Making this value too
11576 large results extra work for the scheduler. */
11579 ia32_multipass_dfa_lookahead ()
11581 if (ix86_cpu == PROCESSOR_PENTIUM)
11588 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11589 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11593 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11595 rtx dstref, srcref, dstreg, srcreg;
11599 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11601 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11605 /* Subroutine of above to actually do the updating by recursively walking
11609 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11611 rtx dstref, srcref, dstreg, srcreg;
11613 enum rtx_code code = GET_CODE (x);
11614 const char *format_ptr = GET_RTX_FORMAT (code);
11617 if (code == MEM && XEXP (x, 0) == dstreg)
11618 MEM_COPY_ATTRIBUTES (x, dstref);
11619 else if (code == MEM && XEXP (x, 0) == srcreg)
11620 MEM_COPY_ATTRIBUTES (x, srcref);
11622 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11624 if (*format_ptr == 'e')
11625 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11627 else if (*format_ptr == 'E')
11628 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11629 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11634 /* Compute the alignment given to a constant that is being placed in memory.
11635 EXP is the constant and ALIGN is the alignment that the object would
11637 The value of this function is used instead of that alignment to align
11641 ix86_constant_alignment (exp, align)
11645 if (TREE_CODE (exp) == REAL_CST)
11647 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11649 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11652 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11659 /* Compute the alignment for a static variable.
11660 TYPE is the data type, and ALIGN is the alignment that
11661 the object would ordinarily have. The value of this function is used
11662 instead of that alignment to align the object. */
11665 ix86_data_alignment (type, align)
11669 if (AGGREGATE_TYPE_P (type)
11670 && TYPE_SIZE (type)
11671 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11672 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11673 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11676 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11677 to 16byte boundary. */
11680 if (AGGREGATE_TYPE_P (type)
11681 && TYPE_SIZE (type)
11682 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11683 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11684 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11688 if (TREE_CODE (type) == ARRAY_TYPE)
11690 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11692 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11695 else if (TREE_CODE (type) == COMPLEX_TYPE)
11698 if (TYPE_MODE (type) == DCmode && align < 64)
11700 if (TYPE_MODE (type) == XCmode && align < 128)
11703 else if ((TREE_CODE (type) == RECORD_TYPE
11704 || TREE_CODE (type) == UNION_TYPE
11705 || TREE_CODE (type) == QUAL_UNION_TYPE)
11706 && TYPE_FIELDS (type))
11708 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11710 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11713 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11714 || TREE_CODE (type) == INTEGER_TYPE)
11716 if (TYPE_MODE (type) == DFmode && align < 64)
11718 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11725 /* Compute the alignment for a local variable.
11726 TYPE is the data type, and ALIGN is the alignment that
11727 the object would ordinarily have. The value of this macro is used
11728 instead of that alignment to align the object. */
11731 ix86_local_alignment (type, align)
11735 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11736 to 16byte boundary. */
11739 if (AGGREGATE_TYPE_P (type)
11740 && TYPE_SIZE (type)
11741 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11742 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11743 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11746 if (TREE_CODE (type) == ARRAY_TYPE)
11748 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11750 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11753 else if (TREE_CODE (type) == COMPLEX_TYPE)
11755 if (TYPE_MODE (type) == DCmode && align < 64)
11757 if (TYPE_MODE (type) == XCmode && align < 128)
11760 else if ((TREE_CODE (type) == RECORD_TYPE
11761 || TREE_CODE (type) == UNION_TYPE
11762 || TREE_CODE (type) == QUAL_UNION_TYPE)
11763 && TYPE_FIELDS (type))
11765 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11767 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11770 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11771 || TREE_CODE (type) == INTEGER_TYPE)
11774 if (TYPE_MODE (type) == DFmode && align < 64)
11776 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11782 /* Emit RTL insns to initialize the variable parts of a trampoline.
11783 FNADDR is an RTX for the address of the function's pure code.
11784 CXT is an RTX for the static chain value for the function. */
11786 x86_initialize_trampoline (tramp, fnaddr, cxt)
11787 rtx tramp, fnaddr, cxt;
11791 /* Compute offset from the end of the jmp to the target function. */
11792 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11793 plus_constant (tramp, 10),
11794 NULL_RTX, 1, OPTAB_DIRECT);
11795 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11796 gen_int_mode (0xb9, QImode));
11797 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11798 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11799 gen_int_mode (0xe9, QImode));
11800 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11805 /* Try to load address using shorter movl instead of movabs.
11806 We may want to support movq for kernel mode, but kernel does not use
11807 trampolines at the moment. */
11808 if (x86_64_zero_extended_value (fnaddr))
11810 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11811 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11812 gen_int_mode (0xbb41, HImode));
11813 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11814 gen_lowpart (SImode, fnaddr));
11819 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11820 gen_int_mode (0xbb49, HImode));
11821 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11825 /* Load static chain using movabs to r10. */
11826 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11827 gen_int_mode (0xba49, HImode));
11828 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11831 /* Jump to the r11 */
11832 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11833 gen_int_mode (0xff49, HImode));
11834 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11835 gen_int_mode (0xe3, QImode));
11837 if (offset > TRAMPOLINE_SIZE)
11841 #ifdef TRANSFER_FROM_TRAMPOLINE
11842 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
11843 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11847 #define def_builtin(MASK, NAME, TYPE, CODE) \
11849 if ((MASK) & target_flags) \
11850 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11851 NULL, NULL_TREE); \
11854 struct builtin_description
11856 const unsigned int mask;
11857 const enum insn_code icode;
11858 const char *const name;
11859 const enum ix86_builtins code;
11860 const enum rtx_code comparison;
11861 const unsigned int flag;
11864 /* Used for builtins that are enabled both by -msse and -msse2. */
11865 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11867 static const struct builtin_description bdesc_comi[] =
11869 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11870 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11871 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11872 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11873 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11874 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11875 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11876 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11877 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11878 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11879 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11880 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11881 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11882 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11883 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11884 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11885 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11886 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11887 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11888 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11889 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11890 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11891 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11892 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11895 static const struct builtin_description bdesc_2arg[] =
11898 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11899 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11900 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11901 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11902 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11903 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11904 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11905 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11907 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11908 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11909 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11910 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11911 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11912 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11913 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11914 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11915 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11916 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11917 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11918 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11919 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11920 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11921 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11922 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11923 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11924 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11925 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11926 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11928 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11929 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11930 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11931 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11933 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11934 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11935 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11936 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11938 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11939 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11940 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11941 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11942 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11945 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11946 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11947 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11948 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11949 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11950 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11952 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11953 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11954 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11955 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11956 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11957 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11958 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11959 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11961 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11962 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11963 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11965 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11966 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11967 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11968 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11970 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11971 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11973 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11974 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11975 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11976 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11977 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11978 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11980 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11981 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11982 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11983 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11985 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11986 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11987 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11988 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11989 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11990 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11993 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11994 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11995 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11997 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11998 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12000 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12001 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12002 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12003 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12004 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12005 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12007 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12008 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12009 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12010 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12011 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12012 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12014 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12015 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12016 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12017 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12019 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12020 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12023 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12024 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12025 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12026 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12027 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12028 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12029 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12030 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12032 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12033 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12034 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12035 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12036 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12037 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12038 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12039 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12040 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12041 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12042 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12043 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12044 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12045 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12046 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12047 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12048 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12049 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12050 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12051 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12053 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12054 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12055 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12056 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12058 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12059 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12060 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12061 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12063 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12064 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12065 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12068 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12069 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12070 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12071 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12072 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12073 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12074 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12075 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12077 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12078 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12079 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12080 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12081 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12082 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12083 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12084 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12086 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12087 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12088 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12089 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12091 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12092 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12093 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12096 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12097 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12099 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12100 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12101 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12102 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12103 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12104 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12106 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12107 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12108 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12109 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12111 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12112 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12113 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12114 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12115 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12116 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12117 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12118 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12120 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12121 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12122 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12124 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12125 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12127 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12128 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12129 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12130 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12132 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12134 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12135 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12136 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12137 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12138 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12139 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12141 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12142 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12143 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12144 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12146 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12148 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12149 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12150 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12153 static const struct builtin_description bdesc_1arg[] =
12155 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12156 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12158 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12159 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12160 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12162 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12163 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12164 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12165 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12167 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12168 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12170 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12172 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12174 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12175 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12177 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12178 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12179 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12180 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12181 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12183 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12185 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12186 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12188 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12189 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12190 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12192 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12196 ix86_init_builtins ()
12199 ix86_init_mmx_sse_builtins ();
12202 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12203 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12206 ix86_init_mmx_sse_builtins ()
12208 const struct builtin_description * d;
12211 tree pchar_type_node = build_pointer_type (char_type_node);
12212 tree pfloat_type_node = build_pointer_type (float_type_node);
12213 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12214 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12215 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12218 tree int_ftype_v4sf_v4sf
12219 = build_function_type_list (integer_type_node,
12220 V4SF_type_node, V4SF_type_node, NULL_TREE);
12221 tree v4si_ftype_v4sf_v4sf
12222 = build_function_type_list (V4SI_type_node,
12223 V4SF_type_node, V4SF_type_node, NULL_TREE);
12224 /* MMX/SSE/integer conversions. */
12225 tree int_ftype_v4sf
12226 = build_function_type_list (integer_type_node,
12227 V4SF_type_node, NULL_TREE);
12228 tree int_ftype_v8qi
12229 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12230 tree v4sf_ftype_v4sf_int
12231 = build_function_type_list (V4SF_type_node,
12232 V4SF_type_node, integer_type_node, NULL_TREE);
12233 tree v4sf_ftype_v4sf_v2si
12234 = build_function_type_list (V4SF_type_node,
12235 V4SF_type_node, V2SI_type_node, NULL_TREE);
12236 tree int_ftype_v4hi_int
12237 = build_function_type_list (integer_type_node,
12238 V4HI_type_node, integer_type_node, NULL_TREE);
12239 tree v4hi_ftype_v4hi_int_int
12240 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12241 integer_type_node, integer_type_node,
12243 /* Miscellaneous. */
12244 tree v8qi_ftype_v4hi_v4hi
12245 = build_function_type_list (V8QI_type_node,
12246 V4HI_type_node, V4HI_type_node, NULL_TREE);
12247 tree v4hi_ftype_v2si_v2si
12248 = build_function_type_list (V4HI_type_node,
12249 V2SI_type_node, V2SI_type_node, NULL_TREE);
12250 tree v4sf_ftype_v4sf_v4sf_int
12251 = build_function_type_list (V4SF_type_node,
12252 V4SF_type_node, V4SF_type_node,
12253 integer_type_node, NULL_TREE);
12254 tree v2si_ftype_v4hi_v4hi
12255 = build_function_type_list (V2SI_type_node,
12256 V4HI_type_node, V4HI_type_node, NULL_TREE);
12257 tree v4hi_ftype_v4hi_int
12258 = build_function_type_list (V4HI_type_node,
12259 V4HI_type_node, integer_type_node, NULL_TREE);
12260 tree v4hi_ftype_v4hi_di
12261 = build_function_type_list (V4HI_type_node,
12262 V4HI_type_node, long_long_unsigned_type_node,
12264 tree v2si_ftype_v2si_di
12265 = build_function_type_list (V2SI_type_node,
12266 V2SI_type_node, long_long_unsigned_type_node,
12268 tree void_ftype_void
12269 = build_function_type (void_type_node, void_list_node);
12270 tree void_ftype_unsigned
12271 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12272 tree unsigned_ftype_void
12273 = build_function_type (unsigned_type_node, void_list_node);
12275 = build_function_type (long_long_unsigned_type_node, void_list_node);
12276 tree v4sf_ftype_void
12277 = build_function_type (V4SF_type_node, void_list_node);
12278 tree v2si_ftype_v4sf
12279 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12280 /* Loads/stores. */
12281 tree void_ftype_v8qi_v8qi_pchar
12282 = build_function_type_list (void_type_node,
12283 V8QI_type_node, V8QI_type_node,
12284 pchar_type_node, NULL_TREE);
12285 tree v4sf_ftype_pfloat
12286 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12287 /* @@@ the type is bogus */
12288 tree v4sf_ftype_v4sf_pv2si
12289 = build_function_type_list (V4SF_type_node,
12290 V4SF_type_node, pv2di_type_node, NULL_TREE);
12291 tree void_ftype_pv2si_v4sf
12292 = build_function_type_list (void_type_node,
12293 pv2di_type_node, V4SF_type_node, NULL_TREE);
12294 tree void_ftype_pfloat_v4sf
12295 = build_function_type_list (void_type_node,
12296 pfloat_type_node, V4SF_type_node, NULL_TREE);
12297 tree void_ftype_pdi_di
12298 = build_function_type_list (void_type_node,
12299 pdi_type_node, long_long_unsigned_type_node,
12301 tree void_ftype_pv2di_v2di
12302 = build_function_type_list (void_type_node,
12303 pv2di_type_node, V2DI_type_node, NULL_TREE);
12304 /* Normal vector unops. */
12305 tree v4sf_ftype_v4sf
12306 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12308 /* Normal vector binops. */
12309 tree v4sf_ftype_v4sf_v4sf
12310 = build_function_type_list (V4SF_type_node,
12311 V4SF_type_node, V4SF_type_node, NULL_TREE);
12312 tree v8qi_ftype_v8qi_v8qi
12313 = build_function_type_list (V8QI_type_node,
12314 V8QI_type_node, V8QI_type_node, NULL_TREE);
12315 tree v4hi_ftype_v4hi_v4hi
12316 = build_function_type_list (V4HI_type_node,
12317 V4HI_type_node, V4HI_type_node, NULL_TREE);
12318 tree v2si_ftype_v2si_v2si
12319 = build_function_type_list (V2SI_type_node,
12320 V2SI_type_node, V2SI_type_node, NULL_TREE);
12321 tree di_ftype_di_di
12322 = build_function_type_list (long_long_unsigned_type_node,
12323 long_long_unsigned_type_node,
12324 long_long_unsigned_type_node, NULL_TREE);
12326 tree v2si_ftype_v2sf
12327 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12328 tree v2sf_ftype_v2si
12329 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12330 tree v2si_ftype_v2si
12331 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12332 tree v2sf_ftype_v2sf
12333 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12334 tree v2sf_ftype_v2sf_v2sf
12335 = build_function_type_list (V2SF_type_node,
12336 V2SF_type_node, V2SF_type_node, NULL_TREE);
12337 tree v2si_ftype_v2sf_v2sf
12338 = build_function_type_list (V2SI_type_node,
12339 V2SF_type_node, V2SF_type_node, NULL_TREE);
12340 tree pint_type_node = build_pointer_type (integer_type_node);
12341 tree pdouble_type_node = build_pointer_type (double_type_node);
12342 tree int_ftype_v2df_v2df
12343 = build_function_type_list (integer_type_node,
12344 V2DF_type_node, V2DF_type_node, NULL_TREE);
12347 = build_function_type (intTI_type_node, void_list_node);
12348 tree v2di_ftype_void
12349 = build_function_type (V2DI_type_node, void_list_node);
12350 tree ti_ftype_ti_ti
12351 = build_function_type_list (intTI_type_node,
12352 intTI_type_node, intTI_type_node, NULL_TREE);
12353 tree void_ftype_pvoid
12354 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12356 = build_function_type_list (V2DI_type_node,
12357 long_long_unsigned_type_node, NULL_TREE);
12359 = build_function_type_list (long_long_unsigned_type_node,
12360 V2DI_type_node, NULL_TREE);
12361 tree v4sf_ftype_v4si
12362 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12363 tree v4si_ftype_v4sf
12364 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12365 tree v2df_ftype_v4si
12366 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12367 tree v4si_ftype_v2df
12368 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12369 tree v2si_ftype_v2df
12370 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12371 tree v4sf_ftype_v2df
12372 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12373 tree v2df_ftype_v2si
12374 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12375 tree v2df_ftype_v4sf
12376 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12377 tree int_ftype_v2df
12378 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12379 tree v2df_ftype_v2df_int
12380 = build_function_type_list (V2DF_type_node,
12381 V2DF_type_node, integer_type_node, NULL_TREE);
12382 tree v4sf_ftype_v4sf_v2df
12383 = build_function_type_list (V4SF_type_node,
12384 V4SF_type_node, V2DF_type_node, NULL_TREE);
12385 tree v2df_ftype_v2df_v4sf
12386 = build_function_type_list (V2DF_type_node,
12387 V2DF_type_node, V4SF_type_node, NULL_TREE);
12388 tree v2df_ftype_v2df_v2df_int
12389 = build_function_type_list (V2DF_type_node,
12390 V2DF_type_node, V2DF_type_node,
12393 tree v2df_ftype_v2df_pv2si
12394 = build_function_type_list (V2DF_type_node,
12395 V2DF_type_node, pv2si_type_node, NULL_TREE);
12396 tree void_ftype_pv2si_v2df
12397 = build_function_type_list (void_type_node,
12398 pv2si_type_node, V2DF_type_node, NULL_TREE);
12399 tree void_ftype_pdouble_v2df
12400 = build_function_type_list (void_type_node,
12401 pdouble_type_node, V2DF_type_node, NULL_TREE);
12402 tree void_ftype_pint_int
12403 = build_function_type_list (void_type_node,
12404 pint_type_node, integer_type_node, NULL_TREE);
12405 tree void_ftype_v16qi_v16qi_pchar
12406 = build_function_type_list (void_type_node,
12407 V16QI_type_node, V16QI_type_node,
12408 pchar_type_node, NULL_TREE);
12409 tree v2df_ftype_pdouble
12410 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12411 tree v2df_ftype_v2df_v2df
12412 = build_function_type_list (V2DF_type_node,
12413 V2DF_type_node, V2DF_type_node, NULL_TREE);
12414 tree v16qi_ftype_v16qi_v16qi
12415 = build_function_type_list (V16QI_type_node,
12416 V16QI_type_node, V16QI_type_node, NULL_TREE);
12417 tree v8hi_ftype_v8hi_v8hi
12418 = build_function_type_list (V8HI_type_node,
12419 V8HI_type_node, V8HI_type_node, NULL_TREE);
12420 tree v4si_ftype_v4si_v4si
12421 = build_function_type_list (V4SI_type_node,
12422 V4SI_type_node, V4SI_type_node, NULL_TREE);
12423 tree v2di_ftype_v2di_v2di
12424 = build_function_type_list (V2DI_type_node,
12425 V2DI_type_node, V2DI_type_node, NULL_TREE);
12426 tree v2di_ftype_v2df_v2df
12427 = build_function_type_list (V2DI_type_node,
12428 V2DF_type_node, V2DF_type_node, NULL_TREE);
12429 tree v2df_ftype_v2df
12430 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12431 tree v2df_ftype_double
12432 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12433 tree v2df_ftype_double_double
12434 = build_function_type_list (V2DF_type_node,
12435 double_type_node, double_type_node, NULL_TREE);
12436 tree int_ftype_v8hi_int
12437 = build_function_type_list (integer_type_node,
12438 V8HI_type_node, integer_type_node, NULL_TREE);
12439 tree v8hi_ftype_v8hi_int_int
12440 = build_function_type_list (V8HI_type_node,
12441 V8HI_type_node, integer_type_node,
12442 integer_type_node, NULL_TREE);
12443 tree v2di_ftype_v2di_int
12444 = build_function_type_list (V2DI_type_node,
12445 V2DI_type_node, integer_type_node, NULL_TREE);
12446 tree v4si_ftype_v4si_int
12447 = build_function_type_list (V4SI_type_node,
12448 V4SI_type_node, integer_type_node, NULL_TREE);
12449 tree v8hi_ftype_v8hi_int
12450 = build_function_type_list (V8HI_type_node,
12451 V8HI_type_node, integer_type_node, NULL_TREE);
12452 tree v8hi_ftype_v8hi_v2di
12453 = build_function_type_list (V8HI_type_node,
12454 V8HI_type_node, V2DI_type_node, NULL_TREE);
12455 tree v4si_ftype_v4si_v2di
12456 = build_function_type_list (V4SI_type_node,
12457 V4SI_type_node, V2DI_type_node, NULL_TREE);
12458 tree v4si_ftype_v8hi_v8hi
12459 = build_function_type_list (V4SI_type_node,
12460 V8HI_type_node, V8HI_type_node, NULL_TREE);
12461 tree di_ftype_v8qi_v8qi
12462 = build_function_type_list (long_long_unsigned_type_node,
12463 V8QI_type_node, V8QI_type_node, NULL_TREE);
12464 tree v2di_ftype_v16qi_v16qi
12465 = build_function_type_list (V2DI_type_node,
12466 V16QI_type_node, V16QI_type_node, NULL_TREE);
12467 tree int_ftype_v16qi
12468 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12469 tree v16qi_ftype_pchar
12470 = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
12471 tree void_ftype_pchar_v16qi
12472 = build_function_type_list (void_type_node,
12473 pchar_type_node, V16QI_type_node, NULL_TREE);
12474 tree v4si_ftype_pchar
12475 = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
12476 tree void_ftype_pchar_v4si
12477 = build_function_type_list (void_type_node,
12478 pchar_type_node, V4SI_type_node, NULL_TREE);
12479 tree v2di_ftype_v2di
12480 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12482 /* Add all builtins that are more or less simple operations on two
12484 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12486 /* Use one of the operands; the target can have a different mode for
12487 mask-generating compares. */
12488 enum machine_mode mode;
12493 mode = insn_data[d->icode].operand[1].mode;
12498 type = v16qi_ftype_v16qi_v16qi;
12501 type = v8hi_ftype_v8hi_v8hi;
12504 type = v4si_ftype_v4si_v4si;
12507 type = v2di_ftype_v2di_v2di;
12510 type = v2df_ftype_v2df_v2df;
12513 type = ti_ftype_ti_ti;
12516 type = v4sf_ftype_v4sf_v4sf;
12519 type = v8qi_ftype_v8qi_v8qi;
12522 type = v4hi_ftype_v4hi_v4hi;
12525 type = v2si_ftype_v2si_v2si;
12528 type = di_ftype_di_di;
12535 /* Override for comparisons. */
12536 if (d->icode == CODE_FOR_maskcmpv4sf3
12537 || d->icode == CODE_FOR_maskncmpv4sf3
12538 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12539 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12540 type = v4si_ftype_v4sf_v4sf;
12542 if (d->icode == CODE_FOR_maskcmpv2df3
12543 || d->icode == CODE_FOR_maskncmpv2df3
12544 || d->icode == CODE_FOR_vmmaskcmpv2df3
12545 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12546 type = v2di_ftype_v2df_v2df;
12548 def_builtin (d->mask, d->name, type, d->code);
12551 /* Add the remaining MMX insns with somewhat more complicated types. */
12552 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12553 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12554 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12555 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12556 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12557 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12558 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12560 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12561 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12562 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12564 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12565 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12567 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12568 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12570 /* comi/ucomi insns. */
12571 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12572 if (d->mask == MASK_SSE2)
12573 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12575 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12577 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12578 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12579 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12581 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12582 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12583 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12584 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12585 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12586 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12588 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12589 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12591 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12593 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12594 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12595 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12596 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12597 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12598 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12600 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12601 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12602 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12603 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12605 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12606 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12607 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12608 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12610 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12612 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12614 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12615 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12616 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12617 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12618 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12619 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12621 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12623 /* Original 3DNow! */
12624 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12625 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12626 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12631 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12634 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12635 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12636 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12637 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12638 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12639 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12640 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12641 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12642 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12643 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12645 /* 3DNow! extension as used in the Athlon CPU. */
12646 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12647 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12648 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12649 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12650 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12651 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12653 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12656 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12657 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12659 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12660 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12661 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12663 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12664 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12665 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12666 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12667 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12668 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12670 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12671 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12672 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12673 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12675 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12676 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12677 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12678 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12679 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12681 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12682 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12683 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12684 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12686 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12687 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12689 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12691 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12692 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12694 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12695 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12696 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12697 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12698 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12700 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12702 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12703 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12705 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12706 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12707 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12709 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12710 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12711 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12713 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12714 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12715 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12716 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12717 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12718 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12719 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12721 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12722 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12723 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12725 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
12726 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
12727 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
12728 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12729 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12730 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
12731 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12733 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12735 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12736 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12737 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12739 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12740 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12741 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12743 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12744 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12746 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12747 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12748 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12749 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12751 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12752 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12753 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12754 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12756 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12757 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12759 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12762 /* Errors in the source file can cause expand_expr to return const0_rtx
12763 where we expect a vector. To avoid crashing, use one of the vector
12764 clear instructions. */
12766 safe_vector_operand (x, mode)
12768 enum machine_mode mode;
12770 if (x != const0_rtx)
12772 x = gen_reg_rtx (mode);
12774 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12775 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12776 : gen_rtx_SUBREG (DImode, x, 0)));
12778 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12779 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12783 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12786 ix86_expand_binop_builtin (icode, arglist, target)
12787 enum insn_code icode;
12792 tree arg0 = TREE_VALUE (arglist);
12793 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12794 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12795 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12796 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12797 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12798 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12800 if (VECTOR_MODE_P (mode0))
12801 op0 = safe_vector_operand (op0, mode0);
12802 if (VECTOR_MODE_P (mode1))
12803 op1 = safe_vector_operand (op1, mode1);
12806 || GET_MODE (target) != tmode
12807 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12808 target = gen_reg_rtx (tmode);
12810 /* In case the insn wants input operands in modes different from
12811 the result, abort. */
12812 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12815 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12816 op0 = copy_to_mode_reg (mode0, op0);
12817 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12818 op1 = copy_to_mode_reg (mode1, op1);
12820 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12821 yet one of the two must not be a memory. This is normally enforced
12822 by expanders, but we didn't bother to create one here. */
12823 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12824 op0 = copy_to_mode_reg (mode0, op0);
12826 pat = GEN_FCN (icode) (target, op0, op1);
12833 /* Subroutine of ix86_expand_builtin to take care of stores. */
12836 ix86_expand_store_builtin (icode, arglist)
12837 enum insn_code icode;
12841 tree arg0 = TREE_VALUE (arglist);
12842 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12843 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12844 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12845 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12846 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12848 if (VECTOR_MODE_P (mode1))
12849 op1 = safe_vector_operand (op1, mode1);
12851 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12853 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12854 op1 = copy_to_mode_reg (mode1, op1);
12856 pat = GEN_FCN (icode) (op0, op1);
12862 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12865 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12866 enum insn_code icode;
12872 tree arg0 = TREE_VALUE (arglist);
12873 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12874 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12875 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12878 || GET_MODE (target) != tmode
12879 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12880 target = gen_reg_rtx (tmode);
12882 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12885 if (VECTOR_MODE_P (mode0))
12886 op0 = safe_vector_operand (op0, mode0);
12888 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12889 op0 = copy_to_mode_reg (mode0, op0);
12892 pat = GEN_FCN (icode) (target, op0);
12899 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12900 sqrtss, rsqrtss, rcpss. */
12903 ix86_expand_unop1_builtin (icode, arglist, target)
12904 enum insn_code icode;
12909 tree arg0 = TREE_VALUE (arglist);
12910 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12911 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12912 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12915 || GET_MODE (target) != tmode
12916 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12917 target = gen_reg_rtx (tmode);
12919 if (VECTOR_MODE_P (mode0))
12920 op0 = safe_vector_operand (op0, mode0);
12922 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12923 op0 = copy_to_mode_reg (mode0, op0);
12926 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12927 op1 = copy_to_mode_reg (mode0, op1);
12929 pat = GEN_FCN (icode) (target, op0, op1);
12936 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12939 ix86_expand_sse_compare (d, arglist, target)
12940 const struct builtin_description *d;
12945 tree arg0 = TREE_VALUE (arglist);
12946 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12947 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12948 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12950 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12951 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12952 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12953 enum rtx_code comparison = d->comparison;
12955 if (VECTOR_MODE_P (mode0))
12956 op0 = safe_vector_operand (op0, mode0);
12957 if (VECTOR_MODE_P (mode1))
12958 op1 = safe_vector_operand (op1, mode1);
12960 /* Swap operands if we have a comparison that isn't available in
12964 rtx tmp = gen_reg_rtx (mode1);
12965 emit_move_insn (tmp, op1);
12971 || GET_MODE (target) != tmode
12972 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12973 target = gen_reg_rtx (tmode);
12975 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12976 op0 = copy_to_mode_reg (mode0, op0);
12977 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12978 op1 = copy_to_mode_reg (mode1, op1);
12980 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12981 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12988 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12991 ix86_expand_sse_comi (d, arglist, target)
12992 const struct builtin_description *d;
12997 tree arg0 = TREE_VALUE (arglist);
12998 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12999 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13000 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13002 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13003 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13004 enum rtx_code comparison = d->comparison;
13006 if (VECTOR_MODE_P (mode0))
13007 op0 = safe_vector_operand (op0, mode0);
13008 if (VECTOR_MODE_P (mode1))
13009 op1 = safe_vector_operand (op1, mode1);
13011 /* Swap operands if we have a comparison that isn't available in
13020 target = gen_reg_rtx (SImode);
13021 emit_move_insn (target, const0_rtx);
13022 target = gen_rtx_SUBREG (QImode, target, 0);
13024 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13025 op0 = copy_to_mode_reg (mode0, op0);
13026 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13027 op1 = copy_to_mode_reg (mode1, op1);
13029 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13030 pat = GEN_FCN (d->icode) (op0, op1);
13034 emit_insn (gen_rtx_SET (VOIDmode,
13035 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13036 gen_rtx_fmt_ee (comparison, QImode,
13040 return SUBREG_REG (target);
13043 /* Expand an expression EXP that calls a built-in function,
13044 with result going to TARGET if that's convenient
13045 (and in mode MODE if that's convenient).
13046 SUBTARGET may be used as the target for computing one of EXP's operands.
13047 IGNORE is nonzero if the value is to be ignored. */
13050 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13053 rtx subtarget ATTRIBUTE_UNUSED;
13054 enum machine_mode mode ATTRIBUTE_UNUSED;
13055 int ignore ATTRIBUTE_UNUSED;
13057 const struct builtin_description *d;
13059 enum insn_code icode;
13060 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13061 tree arglist = TREE_OPERAND (exp, 1);
13062 tree arg0, arg1, arg2;
13063 rtx op0, op1, op2, pat;
13064 enum machine_mode tmode, mode0, mode1, mode2;
13065 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13069 case IX86_BUILTIN_EMMS:
13070 emit_insn (gen_emms ());
13073 case IX86_BUILTIN_SFENCE:
13074 emit_insn (gen_sfence ());
13077 case IX86_BUILTIN_PEXTRW:
13078 case IX86_BUILTIN_PEXTRW128:
13079 icode = (fcode == IX86_BUILTIN_PEXTRW
13080 ? CODE_FOR_mmx_pextrw
13081 : CODE_FOR_sse2_pextrw);
13082 arg0 = TREE_VALUE (arglist);
13083 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13084 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13085 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13086 tmode = insn_data[icode].operand[0].mode;
13087 mode0 = insn_data[icode].operand[1].mode;
13088 mode1 = insn_data[icode].operand[2].mode;
13090 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13091 op0 = copy_to_mode_reg (mode0, op0);
13092 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13094 /* @@@ better error message */
13095 error ("selector must be an immediate");
13096 return gen_reg_rtx (tmode);
13099 || GET_MODE (target) != tmode
13100 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13101 target = gen_reg_rtx (tmode);
13102 pat = GEN_FCN (icode) (target, op0, op1);
13108 case IX86_BUILTIN_PINSRW:
13109 case IX86_BUILTIN_PINSRW128:
13110 icode = (fcode == IX86_BUILTIN_PINSRW
13111 ? CODE_FOR_mmx_pinsrw
13112 : CODE_FOR_sse2_pinsrw);
13113 arg0 = TREE_VALUE (arglist);
13114 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13115 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13116 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13117 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13118 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13119 tmode = insn_data[icode].operand[0].mode;
13120 mode0 = insn_data[icode].operand[1].mode;
13121 mode1 = insn_data[icode].operand[2].mode;
13122 mode2 = insn_data[icode].operand[3].mode;
13124 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13125 op0 = copy_to_mode_reg (mode0, op0);
13126 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13127 op1 = copy_to_mode_reg (mode1, op1);
13128 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13130 /* @@@ better error message */
13131 error ("selector must be an immediate");
13135 || GET_MODE (target) != tmode
13136 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13137 target = gen_reg_rtx (tmode);
13138 pat = GEN_FCN (icode) (target, op0, op1, op2);
13144 case IX86_BUILTIN_MASKMOVQ:
13145 case IX86_BUILTIN_MASKMOVDQU:
13146 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13147 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13148 : CODE_FOR_sse2_maskmovdqu);
13149 /* Note the arg order is different from the operand order. */
13150 arg1 = TREE_VALUE (arglist);
13151 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13152 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13153 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13154 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13155 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13156 mode0 = insn_data[icode].operand[0].mode;
13157 mode1 = insn_data[icode].operand[1].mode;
13158 mode2 = insn_data[icode].operand[2].mode;
13160 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13161 op0 = copy_to_mode_reg (mode0, op0);
13162 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13163 op1 = copy_to_mode_reg (mode1, op1);
13164 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13165 op2 = copy_to_mode_reg (mode2, op2);
13166 pat = GEN_FCN (icode) (op0, op1, op2);
13172 case IX86_BUILTIN_SQRTSS:
13173 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13174 case IX86_BUILTIN_RSQRTSS:
13175 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13176 case IX86_BUILTIN_RCPSS:
13177 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13179 case IX86_BUILTIN_LOADAPS:
13180 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13182 case IX86_BUILTIN_LOADUPS:
13183 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13185 case IX86_BUILTIN_STOREAPS:
13186 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13188 case IX86_BUILTIN_STOREUPS:
13189 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13191 case IX86_BUILTIN_LOADSS:
13192 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13194 case IX86_BUILTIN_STORESS:
13195 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13197 case IX86_BUILTIN_LOADHPS:
13198 case IX86_BUILTIN_LOADLPS:
13199 case IX86_BUILTIN_LOADHPD:
13200 case IX86_BUILTIN_LOADLPD:
13201 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13202 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13203 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13204 : CODE_FOR_sse2_movlpd);
13205 arg0 = TREE_VALUE (arglist);
13206 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13207 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13208 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13209 tmode = insn_data[icode].operand[0].mode;
13210 mode0 = insn_data[icode].operand[1].mode;
13211 mode1 = insn_data[icode].operand[2].mode;
13213 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13214 op0 = copy_to_mode_reg (mode0, op0);
13215 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13217 || GET_MODE (target) != tmode
13218 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13219 target = gen_reg_rtx (tmode);
13220 pat = GEN_FCN (icode) (target, op0, op1);
13226 case IX86_BUILTIN_STOREHPS:
13227 case IX86_BUILTIN_STORELPS:
13228 case IX86_BUILTIN_STOREHPD:
13229 case IX86_BUILTIN_STORELPD:
13230 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13231 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13232 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13233 : CODE_FOR_sse2_movlpd);
13234 arg0 = TREE_VALUE (arglist);
13235 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13236 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13237 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13238 mode0 = insn_data[icode].operand[1].mode;
13239 mode1 = insn_data[icode].operand[2].mode;
13241 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13242 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13243 op1 = copy_to_mode_reg (mode1, op1);
13245 pat = GEN_FCN (icode) (op0, op0, op1);
13251 case IX86_BUILTIN_MOVNTPS:
13252 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13253 case IX86_BUILTIN_MOVNTQ:
13254 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13256 case IX86_BUILTIN_LDMXCSR:
13257 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13258 target = assign_386_stack_local (SImode, 0);
13259 emit_move_insn (target, op0);
13260 emit_insn (gen_ldmxcsr (target));
13263 case IX86_BUILTIN_STMXCSR:
13264 target = assign_386_stack_local (SImode, 0);
13265 emit_insn (gen_stmxcsr (target));
13266 return copy_to_mode_reg (SImode, target);
13268 case IX86_BUILTIN_SHUFPS:
13269 case IX86_BUILTIN_SHUFPD:
13270 icode = (fcode == IX86_BUILTIN_SHUFPS
13271 ? CODE_FOR_sse_shufps
13272 : CODE_FOR_sse2_shufpd);
13273 arg0 = TREE_VALUE (arglist);
13274 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13275 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13276 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13277 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13278 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13279 tmode = insn_data[icode].operand[0].mode;
13280 mode0 = insn_data[icode].operand[1].mode;
13281 mode1 = insn_data[icode].operand[2].mode;
13282 mode2 = insn_data[icode].operand[3].mode;
13284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13285 op0 = copy_to_mode_reg (mode0, op0);
13286 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13287 op1 = copy_to_mode_reg (mode1, op1);
13288 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13290 /* @@@ better error message */
13291 error ("mask must be an immediate");
13292 return gen_reg_rtx (tmode);
13295 || GET_MODE (target) != tmode
13296 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13297 target = gen_reg_rtx (tmode);
13298 pat = GEN_FCN (icode) (target, op0, op1, op2);
13304 case IX86_BUILTIN_PSHUFW:
13305 case IX86_BUILTIN_PSHUFD:
13306 case IX86_BUILTIN_PSHUFHW:
13307 case IX86_BUILTIN_PSHUFLW:
13308 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13309 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13310 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13311 : CODE_FOR_mmx_pshufw);
13312 arg0 = TREE_VALUE (arglist);
13313 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13314 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13315 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13316 tmode = insn_data[icode].operand[0].mode;
13317 mode1 = insn_data[icode].operand[1].mode;
13318 mode2 = insn_data[icode].operand[2].mode;
13320 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13321 op0 = copy_to_mode_reg (mode1, op0);
13322 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13324 /* @@@ better error message */
13325 error ("mask must be an immediate");
13329 || GET_MODE (target) != tmode
13330 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13331 target = gen_reg_rtx (tmode);
13332 pat = GEN_FCN (icode) (target, op0, op1);
13338 case IX86_BUILTIN_PSLLDQI128:
13339 case IX86_BUILTIN_PSRLDQI128:
13340 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13341 : CODE_FOR_sse2_lshrti3);
13342 arg0 = TREE_VALUE (arglist);
13343 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13344 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13345 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13346 tmode = insn_data[icode].operand[0].mode;
13347 mode1 = insn_data[icode].operand[1].mode;
13348 mode2 = insn_data[icode].operand[2].mode;
13350 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13352 op0 = copy_to_reg (op0);
13353 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13355 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13357 error ("shift must be an immediate");
13360 target = gen_reg_rtx (V2DImode);
13361 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13367 case IX86_BUILTIN_FEMMS:
13368 emit_insn (gen_femms ());
13371 case IX86_BUILTIN_PAVGUSB:
13372 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13374 case IX86_BUILTIN_PF2ID:
13375 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13377 case IX86_BUILTIN_PFACC:
13378 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13380 case IX86_BUILTIN_PFADD:
13381 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13383 case IX86_BUILTIN_PFCMPEQ:
13384 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13386 case IX86_BUILTIN_PFCMPGE:
13387 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13389 case IX86_BUILTIN_PFCMPGT:
13390 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13392 case IX86_BUILTIN_PFMAX:
13393 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13395 case IX86_BUILTIN_PFMIN:
13396 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13398 case IX86_BUILTIN_PFMUL:
13399 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13401 case IX86_BUILTIN_PFRCP:
13402 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13404 case IX86_BUILTIN_PFRCPIT1:
13405 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13407 case IX86_BUILTIN_PFRCPIT2:
13408 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13410 case IX86_BUILTIN_PFRSQIT1:
13411 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13413 case IX86_BUILTIN_PFRSQRT:
13414 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13416 case IX86_BUILTIN_PFSUB:
13417 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13419 case IX86_BUILTIN_PFSUBR:
13420 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13422 case IX86_BUILTIN_PI2FD:
13423 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13425 case IX86_BUILTIN_PMULHRW:
13426 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13428 case IX86_BUILTIN_PF2IW:
13429 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13431 case IX86_BUILTIN_PFNACC:
13432 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13434 case IX86_BUILTIN_PFPNACC:
13435 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13437 case IX86_BUILTIN_PI2FW:
13438 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13440 case IX86_BUILTIN_PSWAPDSI:
13441 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13443 case IX86_BUILTIN_PSWAPDSF:
13444 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13446 case IX86_BUILTIN_SSE_ZERO:
13447 target = gen_reg_rtx (V4SFmode);
13448 emit_insn (gen_sse_clrv4sf (target));
13451 case IX86_BUILTIN_MMX_ZERO:
13452 target = gen_reg_rtx (DImode);
13453 emit_insn (gen_mmx_clrdi (target));
13456 case IX86_BUILTIN_CLRTI:
13457 target = gen_reg_rtx (V2DImode);
13458 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13462 case IX86_BUILTIN_SQRTSD:
13463 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13464 case IX86_BUILTIN_LOADAPD:
13465 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13466 case IX86_BUILTIN_LOADUPD:
13467 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13469 case IX86_BUILTIN_STOREAPD:
13470 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13471 case IX86_BUILTIN_STOREUPD:
13472 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13474 case IX86_BUILTIN_LOADSD:
13475 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13477 case IX86_BUILTIN_STORESD:
13478 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13480 case IX86_BUILTIN_SETPD1:
13481 target = assign_386_stack_local (DFmode, 0);
13482 arg0 = TREE_VALUE (arglist);
13483 emit_move_insn (adjust_address (target, DFmode, 0),
13484 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13485 op0 = gen_reg_rtx (V2DFmode);
13486 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13487 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13490 case IX86_BUILTIN_SETPD:
13491 target = assign_386_stack_local (V2DFmode, 0);
13492 arg0 = TREE_VALUE (arglist);
13493 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13494 emit_move_insn (adjust_address (target, DFmode, 0),
13495 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13496 emit_move_insn (adjust_address (target, DFmode, 8),
13497 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13498 op0 = gen_reg_rtx (V2DFmode);
13499 emit_insn (gen_sse2_movapd (op0, target));
13502 case IX86_BUILTIN_LOADRPD:
13503 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13504 gen_reg_rtx (V2DFmode), 1);
13505 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13508 case IX86_BUILTIN_LOADPD1:
13509 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13510 gen_reg_rtx (V2DFmode), 1);
13511 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13514 case IX86_BUILTIN_STOREPD1:
13515 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13516 case IX86_BUILTIN_STORERPD:
13517 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13519 case IX86_BUILTIN_CLRPD:
13520 target = gen_reg_rtx (V2DFmode);
13521 emit_insn (gen_sse_clrv2df (target));
13524 case IX86_BUILTIN_MFENCE:
13525 emit_insn (gen_sse2_mfence ());
13527 case IX86_BUILTIN_LFENCE:
13528 emit_insn (gen_sse2_lfence ());
13531 case IX86_BUILTIN_CLFLUSH:
13532 arg0 = TREE_VALUE (arglist);
13533 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13534 icode = CODE_FOR_sse2_clflush;
13535 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13536 op0 = copy_to_mode_reg (Pmode, op0);
13538 emit_insn (gen_sse2_clflush (op0));
13541 case IX86_BUILTIN_MOVNTPD:
13542 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13543 case IX86_BUILTIN_MOVNTDQ:
13544 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13545 case IX86_BUILTIN_MOVNTI:
13546 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13548 case IX86_BUILTIN_LOADDQA:
13549 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13550 case IX86_BUILTIN_LOADDQU:
13551 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13552 case IX86_BUILTIN_LOADD:
13553 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13555 case IX86_BUILTIN_STOREDQA:
13556 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13557 case IX86_BUILTIN_STOREDQU:
13558 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13559 case IX86_BUILTIN_STORED:
13560 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13566 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13567 if (d->code == fcode)
13569 /* Compares are treated specially. */
13570 if (d->icode == CODE_FOR_maskcmpv4sf3
13571 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13572 || d->icode == CODE_FOR_maskncmpv4sf3
13573 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13574 || d->icode == CODE_FOR_maskcmpv2df3
13575 || d->icode == CODE_FOR_vmmaskcmpv2df3
13576 || d->icode == CODE_FOR_maskncmpv2df3
13577 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13578 return ix86_expand_sse_compare (d, arglist, target);
13580 return ix86_expand_binop_builtin (d->icode, arglist, target);
13583 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13584 if (d->code == fcode)
13585 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13587 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13588 if (d->code == fcode)
13589 return ix86_expand_sse_comi (d, arglist, target);
13591 /* @@@ Should really do something sensible here. */
13595 /* Store OPERAND to the memory after reload is completed. This means
13596 that we can't easily use assign_stack_local. */
13598 ix86_force_to_memory (mode, operand)
13599 enum machine_mode mode;
13603 if (!reload_completed)
13605 if (TARGET_64BIT && TARGET_RED_ZONE)
13607 result = gen_rtx_MEM (mode,
13608 gen_rtx_PLUS (Pmode,
13610 GEN_INT (-RED_ZONE_SIZE)));
13611 emit_move_insn (result, operand);
13613 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13619 operand = gen_lowpart (DImode, operand);
13623 gen_rtx_SET (VOIDmode,
13624 gen_rtx_MEM (DImode,
13625 gen_rtx_PRE_DEC (DImode,
13626 stack_pointer_rtx)),
13632 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13641 split_di (&operand, 1, operands, operands + 1);
13643 gen_rtx_SET (VOIDmode,
13644 gen_rtx_MEM (SImode,
13645 gen_rtx_PRE_DEC (Pmode,
13646 stack_pointer_rtx)),
13649 gen_rtx_SET (VOIDmode,
13650 gen_rtx_MEM (SImode,
13651 gen_rtx_PRE_DEC (Pmode,
13652 stack_pointer_rtx)),
13657 /* It is better to store HImodes as SImodes. */
13658 if (!TARGET_PARTIAL_REG_STALL)
13659 operand = gen_lowpart (SImode, operand);
13663 gen_rtx_SET (VOIDmode,
13664 gen_rtx_MEM (GET_MODE (operand),
13665 gen_rtx_PRE_DEC (SImode,
13666 stack_pointer_rtx)),
13672 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13677 /* Free operand from the memory. */
13679 ix86_free_from_memory (mode)
13680 enum machine_mode mode;
13682 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13686 if (mode == DImode || TARGET_64BIT)
13688 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13692 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13693 to pop or add instruction if registers are available. */
13694 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13695 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13700 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13701 QImode must go into class Q_REGS.
13702 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13703 movdf to do mem-to-mem moves through integer regs. */
13705 ix86_preferred_reload_class (x, class)
13707 enum reg_class class;
13709 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13711 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13713 /* SSE can't load any constant directly yet. */
13714 if (SSE_CLASS_P (class))
13716 /* Floats can load 0 and 1. */
13717 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13719 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13720 if (MAYBE_SSE_CLASS_P (class))
13721 return (reg_class_subset_p (class, GENERAL_REGS)
13722 ? GENERAL_REGS : FLOAT_REGS);
13726 /* General regs can load everything. */
13727 if (reg_class_subset_p (class, GENERAL_REGS))
13728 return GENERAL_REGS;
13729 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13730 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13733 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13735 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13740 /* If we are copying between general and FP registers, we need a memory
13741 location. The same is true for SSE and MMX registers.
13743 The macro can't work reliably when one of the CLASSES is class containing
13744 registers from multiple units (SSE, MMX, integer). We avoid this by never
13745 combining those units in single alternative in the machine description.
13746 Ensure that this constraint holds to avoid unexpected surprises.
13748 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13749 enforce these sanity checks. */
13751 ix86_secondary_memory_needed (class1, class2, mode, strict)
13752 enum reg_class class1, class2;
13753 enum machine_mode mode;
13756 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13757 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13758 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13759 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13760 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13761 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13768 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13769 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13770 && (mode) != SImode)
13771 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13772 && (mode) != SImode));
13774 /* Return the cost of moving data from a register in class CLASS1 to
13775 one in class CLASS2.
13777 It is not required that the cost always equal 2 when FROM is the same as TO;
13778 on some machines it is expensive to move between registers if they are not
13779 general registers. */
13781 ix86_register_move_cost (mode, class1, class2)
13782 enum machine_mode mode;
13783 enum reg_class class1, class2;
13785 /* In case we require secondary memory, compute cost of the store followed
13786 by load. In order to avoid bad register allocation choices, we need
13787 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13789 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13793 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13794 MEMORY_MOVE_COST (mode, class1, 1));
13795 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13796 MEMORY_MOVE_COST (mode, class2, 1));
13798 /* In case of copying from general_purpose_register we may emit multiple
13799 stores followed by single load causing memory size mismatch stall.
13800 Count this as arbitarily high cost of 20. */
13801 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13804 /* In the case of FP/MMX moves, the registers actually overlap, and we
13805 have to switch modes in order to treat them differently. */
13806 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13807 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13813 /* Moves between SSE/MMX and integer unit are expensive. */
13814 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13815 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13816 return ix86_cost->mmxsse_to_integer;
13817 if (MAYBE_FLOAT_CLASS_P (class1))
13818 return ix86_cost->fp_move;
13819 if (MAYBE_SSE_CLASS_P (class1))
13820 return ix86_cost->sse_move;
13821 if (MAYBE_MMX_CLASS_P (class1))
13822 return ix86_cost->mmx_move;
13826 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13828 ix86_hard_regno_mode_ok (regno, mode)
13830 enum machine_mode mode;
13832 /* Flags and only flags can only hold CCmode values. */
13833 if (CC_REGNO_P (regno))
13834 return GET_MODE_CLASS (mode) == MODE_CC;
13835 if (GET_MODE_CLASS (mode) == MODE_CC
13836 || GET_MODE_CLASS (mode) == MODE_RANDOM
13837 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13839 if (FP_REGNO_P (regno))
13840 return VALID_FP_MODE_P (mode);
13841 if (SSE_REGNO_P (regno))
13842 return VALID_SSE_REG_MODE (mode);
13843 if (MMX_REGNO_P (regno))
13844 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13845 /* We handle both integer and floats in the general purpose registers.
13846 In future we should be able to handle vector modes as well. */
13847 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13849 /* Take care for QImode values - they can be in non-QI regs, but then
13850 they do cause partial register stalls. */
13851 if (regno < 4 || mode != QImode || TARGET_64BIT)
13853 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13856 /* Return the cost of moving data of mode M between a
13857 register and memory. A value of 2 is the default; this cost is
13858 relative to those in `REGISTER_MOVE_COST'.
13860 If moving between registers and memory is more expensive than
13861 between two registers, you should define this macro to express the
13864 Model also increased moving costs of QImode registers in non
13868 ix86_memory_move_cost (mode, class, in)
13869 enum machine_mode mode;
13870 enum reg_class class;
13873 if (FLOAT_CLASS_P (class))
13891 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13893 if (SSE_CLASS_P (class))
13896 switch (GET_MODE_SIZE (mode))
13910 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13912 if (MMX_CLASS_P (class))
13915 switch (GET_MODE_SIZE (mode))
13926 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13928 switch (GET_MODE_SIZE (mode))
13932 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13933 : ix86_cost->movzbl_load);
13935 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13936 : ix86_cost->int_store[0] + 4);
13939 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13941 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13942 if (mode == TFmode)
13944 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13945 * ((int) GET_MODE_SIZE (mode)
13946 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
13950 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13952 ix86_svr3_asm_out_constructor (symbol, priority)
13954 int priority ATTRIBUTE_UNUSED;
13957 fputs ("\tpushl $", asm_out_file);
13958 assemble_name (asm_out_file, XSTR (symbol, 0));
13959 fputc ('\n', asm_out_file);
13965 static int current_machopic_label_num;
13967 /* Given a symbol name and its associated stub, write out the
13968 definition of the stub. */
13971 machopic_output_stub (file, symb, stub)
13973 const char *symb, *stub;
13975 unsigned int length;
13976 char *binder_name, *symbol_name, lazy_ptr_name[32];
13977 int label = ++current_machopic_label_num;
13979 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13980 symb = (*targetm.strip_name_encoding) (symb);
13982 length = strlen (stub);
13983 binder_name = alloca (length + 32);
13984 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13986 length = strlen (symb);
13987 symbol_name = alloca (length + 32);
13988 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13990 sprintf (lazy_ptr_name, "L%d$lz", label);
13993 machopic_picsymbol_stub_section ();
13995 machopic_symbol_stub_section ();
13997 fprintf (file, "%s:\n", stub);
13998 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14002 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14003 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14004 fprintf (file, "\tjmp %%edx\n");
14007 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14009 fprintf (file, "%s:\n", binder_name);
14013 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14014 fprintf (file, "\tpushl %%eax\n");
14017 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14019 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14021 machopic_lazy_symbol_ptr_section ();
14022 fprintf (file, "%s:\n", lazy_ptr_name);
14023 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14024 fprintf (file, "\t.long %s\n", binder_name);
14026 #endif /* TARGET_MACHO */
14028 /* Order the registers for register allocator. */
14031 x86_order_regs_for_local_alloc ()
14036 /* First allocate the local general purpose registers. */
14037 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14038 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14039 reg_alloc_order [pos++] = i;
14041 /* Global general purpose registers. */
14042 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14043 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14044 reg_alloc_order [pos++] = i;
14046 /* x87 registers come first in case we are doing FP math
14048 if (!TARGET_SSE_MATH)
14049 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14050 reg_alloc_order [pos++] = i;
14052 /* SSE registers. */
14053 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14054 reg_alloc_order [pos++] = i;
14055 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14056 reg_alloc_order [pos++] = i;
14058 /* x87 registerts. */
14059 if (TARGET_SSE_MATH)
14060 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14061 reg_alloc_order [pos++] = i;
14063 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14064 reg_alloc_order [pos++] = i;
14066 /* Initialize the rest of array as we do not allocate some registers
14068 while (pos < FIRST_PSEUDO_REGISTER)
14069 reg_alloc_order [pos++] = 0;
14072 /* Returns an expression indicating where the this parameter is
14073 located on entry to the FUNCTION. */
14076 x86_this_parameter (function)
14079 tree type = TREE_TYPE (function);
14083 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14084 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14087 if (ix86_fntype_regparm (type) > 0)
14091 parm = TYPE_ARG_TYPES (type);
14092 /* Figure out whether or not the function has a variable number of
14094 for (; parm; parm = TREE_CHAIN (parm))
14095 if (TREE_VALUE (parm) == void_type_node)
14097 /* If not, the this parameter is in %eax. */
14099 return gen_rtx_REG (SImode, 0);
14102 if (aggregate_value_p (TREE_TYPE (type)))
14103 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14105 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14108 /* Determine whether x86_output_mi_thunk can succeed. */
14111 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14112 tree thunk ATTRIBUTE_UNUSED;
14113 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14114 HOST_WIDE_INT vcall_offset;
14117 /* 64-bit can handle anything. */
14121 /* For 32-bit, everything's fine if we have one free register. */
14122 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14125 /* Need a free register for vcall_offset. */
14129 /* Need a free register for GOT references. */
14130 if (flag_pic && !(*targetm.binds_local_p) (function))
14133 /* Otherwise ok. */
14137 /* Output the assembler code for a thunk function. THUNK_DECL is the
14138 declaration for the thunk function itself, FUNCTION is the decl for
14139 the target function. DELTA is an immediate constant offset to be
14140 added to THIS. If VCALL_OFFSET is non-zero, the word at
14141 *(*this + vcall_offset) should be added to THIS. */
14144 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14145 FILE *file ATTRIBUTE_UNUSED;
14146 tree thunk ATTRIBUTE_UNUSED;
14147 HOST_WIDE_INT delta;
14148 HOST_WIDE_INT vcall_offset;
14152 rtx this = x86_this_parameter (function);
14155 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14156 pull it in now and let DELTA benefit. */
14159 else if (vcall_offset)
14161 /* Put the this parameter into %eax. */
14163 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14164 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14167 this_reg = NULL_RTX;
14169 /* Adjust the this parameter by a fixed constant. */
14172 xops[0] = GEN_INT (delta);
14173 xops[1] = this_reg ? this_reg : this;
14176 if (!x86_64_general_operand (xops[0], DImode))
14178 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14180 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14184 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14187 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14190 /* Adjust the this parameter by a value stored in the vtable. */
14194 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14196 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14198 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14201 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14203 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14205 /* Adjust the this parameter. */
14206 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14207 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14209 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14210 xops[0] = GEN_INT (vcall_offset);
14212 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14213 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14215 xops[1] = this_reg;
14217 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14219 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14222 /* If necessary, drop THIS back to its stack slot. */
14223 if (this_reg && this_reg != this)
14225 xops[0] = this_reg;
14227 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14230 xops[0] = DECL_RTL (function);
14233 if (!flag_pic || (*targetm.binds_local_p) (function))
14234 output_asm_insn ("jmp\t%P0", xops);
14236 output_asm_insn ("jmp\t*%P0@GOTPCREL(%%rip)", xops);
14240 if (!flag_pic || (*targetm.binds_local_p) (function))
14241 output_asm_insn ("jmp\t%P0", xops);
14244 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14245 output_set_got (tmp);
14248 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14249 output_asm_insn ("jmp\t{*}%1", xops);
14255 x86_field_alignment (field, computed)
14259 enum machine_mode mode;
14260 tree type = TREE_TYPE (field);
14262 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14264 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14265 ? get_inner_array_type (type) : type);
14266 if (mode == DFmode || mode == DCmode
14267 || GET_MODE_CLASS (mode) == MODE_INT
14268 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14269 return MIN (32, computed);
14273 /* Implement machine specific optimizations.
14274 At the moment we implement single transformation: AMD Athlon works faster
14275 when RET is not destination of conditional jump or directly preceeded
14276 by other jump instruction. We avoid the penalty by inserting NOP just
14277 before the RET instructions in such cases. */
14279 x86_machine_dependent_reorg (first)
14280 rtx first ATTRIBUTE_UNUSED;
14284 if (!TARGET_ATHLON || !optimize || optimize_size)
14286 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14288 basic_block bb = e->src;
14291 bool insert = false;
14293 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14295 prev = prev_nonnote_insn (ret);
14296 if (prev && GET_CODE (prev) == CODE_LABEL)
14299 for (e = bb->pred; e; e = e->pred_next)
14300 if (EDGE_FREQUENCY (e) && e->src->index > 0
14301 && !(e->flags & EDGE_FALLTHRU))
14306 prev = prev_real_insn (ret);
14307 if (prev && GET_CODE (prev) == JUMP_INSN
14308 && any_condjump_p (prev))
14312 emit_insn_before (gen_nop (), ret);
14316 #include "gt-i386.h"