1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
95 /* Processor costs (relative to an add) */
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
397 const struct processor_costs *ix86_cost = &pentium_cost;
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
452 #define FAST_PROLOGUE_INSN_COUNT 30
454 /* Set by prologue expander and used by epilogue expander to determine
456 static int use_fast_prologue_epilogue;
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
469 AREG, DREG, CREG, BREG,
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
489 /* The "default" register map used in 32bit mode. */
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
502 static int const x86_64_int_parameter_registers[6] =
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
508 static int const x86_64_int_return_registers[4] =
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
596 /* The encoding characters for the four TLS models present in ELF. */
598 static char const tls_model_chars[] = " GLil";
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
616 /* Structure describing stack frame layout.
617 Stack grows downward:
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
630 > to_allocate <- FRAME_POINTER
642 int outgoing_arguments_size;
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string;
657 enum cmodel ix86_cmodel;
659 const char *ix86_asm_string;
660 enum asm_dialect ix86_asm_dialect = ASM_ATT;
662 const char *ix86_tls_dialect_string;
663 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath;
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch;
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string; /* for -march=<xxx> */
676 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string;
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse;
684 /* ix86_regparm_string as a number */
687 /* Alignment to use for loops and jumps: */
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string;
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string;
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string;
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary;
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost;
703 const char *ix86_branch_cost_string;
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string;
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix[16];
710 static int internal_label_prefix_len;
712 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
713 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
714 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
717 static const char *get_some_local_dynamic_name PARAMS ((void));
718 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719 static rtx maybe_get_pool_constant PARAMS ((rtx));
720 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
721 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
723 static rtx get_thread_pointer PARAMS ((void));
724 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
725 static rtx gen_push PARAMS ((rtx));
726 static int memory_address_length PARAMS ((rtx addr));
727 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
729 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730 static void ix86_dump_ppro_packet PARAMS ((FILE *));
731 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
732 static struct machine_function * ix86_init_machine_status PARAMS ((void));
733 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
734 static int ix86_nsaved_regs PARAMS ((void));
735 static void ix86_emit_save_regs PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
737 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
738 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
739 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
740 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
741 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
742 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
743 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
745 static int ix86_issue_rate PARAMS ((void));
746 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747 static void ix86_sched_init PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
750 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins PARAMS ((void));
753 static rtx ia32_this_parameter PARAMS ((tree));
754 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, tree));
755 static void x86_output_mi_vcall_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
756 HOST_WIDE_INT, tree));
760 rtx base, index, disp;
764 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
766 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
767 static const char *ix86_strip_name_encoding PARAMS ((const char *))
770 struct builtin_description;
771 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
773 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
775 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
776 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
777 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
778 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
779 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
780 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
781 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
785 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
787 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
788 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
789 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
790 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
791 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
792 static int ix86_save_reg PARAMS ((unsigned int, int));
793 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
794 static int ix86_comp_type_attributes PARAMS ((tree, tree));
795 static int ix86_fntype_regparm PARAMS ((tree));
796 const struct attribute_spec ix86_attribute_table[];
797 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
798 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
799 static int ix86_value_regno PARAMS ((enum machine_mode));
801 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
802 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
805 /* Register class used for passing given 64bit part of the argument.
806 These represent classes as documented by the PS ABI, with the exception
807 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
808 use SF or DFmode move instead of DImode to avoid reformating penalties.
810 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
811 whenever possible (upper half does contain padding).
813 enum x86_64_reg_class
816 X86_64_INTEGER_CLASS,
817 X86_64_INTEGERSI_CLASS,
826 static const char * const x86_64_reg_class_name[] =
827 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
829 #define MAX_CLASSES 4
830 static int classify_argument PARAMS ((enum machine_mode, tree,
831 enum x86_64_reg_class [MAX_CLASSES],
833 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
835 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
837 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
838 enum x86_64_reg_class));
840 /* Initialize the GCC target structure. */
841 #undef TARGET_ATTRIBUTE_TABLE
842 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
843 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
844 # undef TARGET_MERGE_DECL_ATTRIBUTES
845 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
848 #undef TARGET_COMP_TYPE_ATTRIBUTES
849 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
851 #undef TARGET_INIT_BUILTINS
852 #define TARGET_INIT_BUILTINS ix86_init_builtins
854 #undef TARGET_EXPAND_BUILTIN
855 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
857 #undef TARGET_ASM_FUNCTION_EPILOGUE
858 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
860 #undef TARGET_ASM_OPEN_PAREN
861 #define TARGET_ASM_OPEN_PAREN ""
862 #undef TARGET_ASM_CLOSE_PAREN
863 #define TARGET_ASM_CLOSE_PAREN ""
865 #undef TARGET_ASM_ALIGNED_HI_OP
866 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
867 #undef TARGET_ASM_ALIGNED_SI_OP
868 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
870 #undef TARGET_ASM_ALIGNED_DI_OP
871 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
874 #undef TARGET_ASM_UNALIGNED_HI_OP
875 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
876 #undef TARGET_ASM_UNALIGNED_SI_OP
877 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
878 #undef TARGET_ASM_UNALIGNED_DI_OP
879 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
881 #undef TARGET_SCHED_ADJUST_COST
882 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
883 #undef TARGET_SCHED_ISSUE_RATE
884 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
885 #undef TARGET_SCHED_VARIABLE_ISSUE
886 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
887 #undef TARGET_SCHED_INIT
888 #define TARGET_SCHED_INIT ix86_sched_init
889 #undef TARGET_SCHED_REORDER
890 #define TARGET_SCHED_REORDER ix86_sched_reorder
891 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
892 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
893 ia32_use_dfa_pipeline_interface
894 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
895 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
896 ia32_multipass_dfa_lookahead
899 #undef TARGET_HAVE_TLS
900 #define TARGET_HAVE_TLS true
903 #undef TARGET_ASM_OUTPUT_MI_THUNK
904 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
905 #undef TARGET_ASM_OUTPUT_MI_VCALL_THUNK
906 #define TARGET_ASM_OUTPUT_MI_VCALL_THUNK x86_output_mi_vcall_thunk
908 struct gcc_target targetm = TARGET_INITIALIZER;
910 /* Sometimes certain combinations of command options do not make
911 sense on a particular target machine. You can define a macro
912 `OVERRIDE_OPTIONS' to take account of this. This macro, if
913 defined, is executed once just after all the command options have
916 Don't use this macro to turn on various extra optimizations for
917 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
923 /* Comes from final.c -- no real reason to change it. */
924 #define MAX_CODE_ALIGN 16
928 const struct processor_costs *cost; /* Processor costs */
929 const int target_enable; /* Target flags to enable. */
930 const int target_disable; /* Target flags to disable. */
931 const int align_loop; /* Default alignments. */
932 const int align_loop_max_skip;
933 const int align_jump;
934 const int align_jump_max_skip;
935 const int align_func;
936 const int branch_cost;
938 const processor_target_table[PROCESSOR_max] =
940 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
941 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
942 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
943 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
944 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
945 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
946 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
949 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
952 const char *const name; /* processor name or nickname. */
953 const enum processor_type processor;
959 PTA_PREFETCH_SSE = 8,
964 const processor_alias_table[] =
966 {"i386", PROCESSOR_I386, 0},
967 {"i486", PROCESSOR_I486, 0},
968 {"i586", PROCESSOR_PENTIUM, 0},
969 {"pentium", PROCESSOR_PENTIUM, 0},
970 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
971 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
972 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
973 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
974 {"i686", PROCESSOR_PENTIUMPRO, 0},
975 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
976 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
977 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
978 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
979 PTA_MMX | PTA_PREFETCH_SSE},
980 {"k6", PROCESSOR_K6, PTA_MMX},
981 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
982 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
983 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
985 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
986 | PTA_3DNOW | PTA_3DNOW_A},
987 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
988 | PTA_3DNOW_A | PTA_SSE},
989 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
990 | PTA_3DNOW_A | PTA_SSE},
991 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
992 | PTA_3DNOW_A | PTA_SSE},
995 int const pta_size = ARRAY_SIZE (processor_alias_table);
997 /* By default our XFmode is the 80-bit extended format. If we have
998 use TFmode instead, it's also the 80-bit format, but with padding. */
999 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1000 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1002 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1003 SUBTARGET_OVERRIDE_OPTIONS;
1006 if (!ix86_cpu_string && ix86_arch_string)
1007 ix86_cpu_string = ix86_arch_string;
1008 if (!ix86_cpu_string)
1009 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1010 if (!ix86_arch_string)
1011 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1013 if (ix86_cmodel_string != 0)
1015 if (!strcmp (ix86_cmodel_string, "small"))
1016 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1018 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1019 else if (!strcmp (ix86_cmodel_string, "32"))
1020 ix86_cmodel = CM_32;
1021 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1022 ix86_cmodel = CM_KERNEL;
1023 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1024 ix86_cmodel = CM_MEDIUM;
1025 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1026 ix86_cmodel = CM_LARGE;
1028 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1032 ix86_cmodel = CM_32;
1034 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1036 if (ix86_asm_string != 0)
1038 if (!strcmp (ix86_asm_string, "intel"))
1039 ix86_asm_dialect = ASM_INTEL;
1040 else if (!strcmp (ix86_asm_string, "att"))
1041 ix86_asm_dialect = ASM_ATT;
1043 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1045 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1046 error ("code model `%s' not supported in the %s bit mode",
1047 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1048 if (ix86_cmodel == CM_LARGE)
1049 sorry ("code model `large' not supported yet");
1050 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1051 sorry ("%i-bit mode not compiled in",
1052 (target_flags & MASK_64BIT) ? 64 : 32);
1054 for (i = 0; i < pta_size; i++)
1055 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1057 ix86_arch = processor_alias_table[i].processor;
1058 /* Default cpu tuning to the architecture. */
1059 ix86_cpu = ix86_arch;
1060 if (processor_alias_table[i].flags & PTA_MMX
1061 && !(target_flags_explicit & MASK_MMX))
1062 target_flags |= MASK_MMX;
1063 if (processor_alias_table[i].flags & PTA_3DNOW
1064 && !(target_flags_explicit & MASK_3DNOW))
1065 target_flags |= MASK_3DNOW;
1066 if (processor_alias_table[i].flags & PTA_3DNOW_A
1067 && !(target_flags_explicit & MASK_3DNOW_A))
1068 target_flags |= MASK_3DNOW_A;
1069 if (processor_alias_table[i].flags & PTA_SSE
1070 && !(target_flags_explicit & MASK_SSE))
1071 target_flags |= MASK_SSE;
1072 if (processor_alias_table[i].flags & PTA_SSE2
1073 && !(target_flags_explicit & MASK_SSE2))
1074 target_flags |= MASK_SSE2;
1075 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1076 x86_prefetch_sse = true;
1081 error ("bad value (%s) for -march= switch", ix86_arch_string);
1083 for (i = 0; i < pta_size; i++)
1084 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1086 ix86_cpu = processor_alias_table[i].processor;
1089 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1090 x86_prefetch_sse = true;
1092 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1095 ix86_cost = &size_cost;
1097 ix86_cost = processor_target_table[ix86_cpu].cost;
1098 target_flags |= processor_target_table[ix86_cpu].target_enable;
1099 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1101 /* Arrange to set up i386_stack_locals for all functions. */
1102 init_machine_status = ix86_init_machine_status;
1104 /* Validate -mregparm= value. */
1105 if (ix86_regparm_string)
1107 i = atoi (ix86_regparm_string);
1108 if (i < 0 || i > REGPARM_MAX)
1109 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1115 ix86_regparm = REGPARM_MAX;
1117 /* If the user has provided any of the -malign-* options,
1118 warn and use that value only if -falign-* is not set.
1119 Remove this code in GCC 3.2 or later. */
1120 if (ix86_align_loops_string)
1122 warning ("-malign-loops is obsolete, use -falign-loops");
1123 if (align_loops == 0)
1125 i = atoi (ix86_align_loops_string);
1126 if (i < 0 || i > MAX_CODE_ALIGN)
1127 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1129 align_loops = 1 << i;
1133 if (ix86_align_jumps_string)
1135 warning ("-malign-jumps is obsolete, use -falign-jumps");
1136 if (align_jumps == 0)
1138 i = atoi (ix86_align_jumps_string);
1139 if (i < 0 || i > MAX_CODE_ALIGN)
1140 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1142 align_jumps = 1 << i;
1146 if (ix86_align_funcs_string)
1148 warning ("-malign-functions is obsolete, use -falign-functions");
1149 if (align_functions == 0)
1151 i = atoi (ix86_align_funcs_string);
1152 if (i < 0 || i > MAX_CODE_ALIGN)
1153 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1155 align_functions = 1 << i;
1159 /* Default align_* from the processor table. */
1160 if (align_loops == 0)
1162 align_loops = processor_target_table[ix86_cpu].align_loop;
1163 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1165 if (align_jumps == 0)
1167 align_jumps = processor_target_table[ix86_cpu].align_jump;
1168 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1170 if (align_functions == 0)
1172 align_functions = processor_target_table[ix86_cpu].align_func;
1175 /* Validate -mpreferred-stack-boundary= value, or provide default.
1176 The default of 128 bits is for Pentium III's SSE __m128, but we
1177 don't want additional code to keep the stack aligned when
1178 optimizing for code size. */
1179 ix86_preferred_stack_boundary = (optimize_size
1180 ? TARGET_64BIT ? 128 : 32
1182 if (ix86_preferred_stack_boundary_string)
1184 i = atoi (ix86_preferred_stack_boundary_string);
1185 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1186 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1187 TARGET_64BIT ? 4 : 2);
1189 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1192 /* Validate -mbranch-cost= value, or provide default. */
1193 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1194 if (ix86_branch_cost_string)
1196 i = atoi (ix86_branch_cost_string);
1198 error ("-mbranch-cost=%d is not between 0 and 5", i);
1200 ix86_branch_cost = i;
1203 if (ix86_tls_dialect_string)
1205 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1206 ix86_tls_dialect = TLS_DIALECT_GNU;
1207 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1208 ix86_tls_dialect = TLS_DIALECT_SUN;
1210 error ("bad value (%s) for -mtls-dialect= switch",
1211 ix86_tls_dialect_string);
1215 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1217 /* Keep nonleaf frame pointers. */
1218 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1219 flag_omit_frame_pointer = 1;
1221 /* If we're doing fast math, we don't care about comparison order
1222 wrt NaNs. This lets us use a shorter comparison sequence. */
1223 if (flag_unsafe_math_optimizations)
1224 target_flags &= ~MASK_IEEE_FP;
1226 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1227 since the insns won't need emulation. */
1228 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1229 target_flags &= ~MASK_NO_FANCY_MATH_387;
1233 if (TARGET_ALIGN_DOUBLE)
1234 error ("-malign-double makes no sense in the 64bit mode");
1236 error ("-mrtd calling convention not supported in the 64bit mode");
1237 /* Enable by default the SSE and MMX builtins. */
1238 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1239 ix86_fpmath = FPMATH_SSE;
1242 ix86_fpmath = FPMATH_387;
1244 if (ix86_fpmath_string != 0)
1246 if (! strcmp (ix86_fpmath_string, "387"))
1247 ix86_fpmath = FPMATH_387;
1248 else if (! strcmp (ix86_fpmath_string, "sse"))
1252 warning ("SSE instruction set disabled, using 387 arithmetics");
1253 ix86_fpmath = FPMATH_387;
1256 ix86_fpmath = FPMATH_SSE;
1258 else if (! strcmp (ix86_fpmath_string, "387,sse")
1259 || ! strcmp (ix86_fpmath_string, "sse,387"))
1263 warning ("SSE instruction set disabled, using 387 arithmetics");
1264 ix86_fpmath = FPMATH_387;
1266 else if (!TARGET_80387)
1268 warning ("387 instruction set disabled, using SSE arithmetics");
1269 ix86_fpmath = FPMATH_SSE;
1272 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1275 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1278 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1282 target_flags |= MASK_MMX;
1283 x86_prefetch_sse = true;
1286 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1289 target_flags |= MASK_MMX;
1290 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1291 extensions it adds. */
1292 if (x86_3dnow_a & (1 << ix86_arch))
1293 target_flags |= MASK_3DNOW_A;
1295 if ((x86_accumulate_outgoing_args & CPUMASK)
1296 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1298 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1300 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1303 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1304 p = strchr (internal_label_prefix, 'X');
1305 internal_label_prefix_len = p - internal_label_prefix;
1309 /* In 64-bit mode, we do not have support for vcall thunks. */
1311 targetm.asm_out.output_mi_vcall_thunk = NULL;
1315 optimization_options (level, size)
1317 int size ATTRIBUTE_UNUSED;
1319 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1320 make the problem with not enough registers even worse. */
1321 #ifdef INSN_SCHEDULING
1323 flag_schedule_insns = 0;
1325 if (TARGET_64BIT && optimize >= 1)
1326 flag_omit_frame_pointer = 1;
1329 flag_pcc_struct_return = 0;
1330 flag_asynchronous_unwind_tables = 1;
1333 flag_omit_frame_pointer = 0;
1336 /* Table of valid machine attributes. */
1337 const struct attribute_spec ix86_attribute_table[] =
1339 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1340 /* Stdcall attribute says callee is responsible for popping arguments
1341 if they are not variable. */
1342 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1343 /* Cdecl attribute says the callee is a normal C declaration */
1344 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1345 /* Regparm attribute specifies how many integer arguments are to be
1346 passed in registers. */
1347 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1348 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1349 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1350 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1351 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1353 { NULL, 0, 0, false, false, false, NULL }
1356 /* Handle a "cdecl" or "stdcall" attribute;
1357 arguments as in struct attribute_spec.handler. */
1359 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1362 tree args ATTRIBUTE_UNUSED;
1363 int flags ATTRIBUTE_UNUSED;
1366 if (TREE_CODE (*node) != FUNCTION_TYPE
1367 && TREE_CODE (*node) != METHOD_TYPE
1368 && TREE_CODE (*node) != FIELD_DECL
1369 && TREE_CODE (*node) != TYPE_DECL)
1371 warning ("`%s' attribute only applies to functions",
1372 IDENTIFIER_POINTER (name));
1373 *no_add_attrs = true;
1378 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1379 *no_add_attrs = true;
1385 /* Handle a "regparm" attribute;
1386 arguments as in struct attribute_spec.handler. */
1388 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1392 int flags ATTRIBUTE_UNUSED;
1395 if (TREE_CODE (*node) != FUNCTION_TYPE
1396 && TREE_CODE (*node) != METHOD_TYPE
1397 && TREE_CODE (*node) != FIELD_DECL
1398 && TREE_CODE (*node) != TYPE_DECL)
1400 warning ("`%s' attribute only applies to functions",
1401 IDENTIFIER_POINTER (name));
1402 *no_add_attrs = true;
1408 cst = TREE_VALUE (args);
1409 if (TREE_CODE (cst) != INTEGER_CST)
1411 warning ("`%s' attribute requires an integer constant argument",
1412 IDENTIFIER_POINTER (name));
1413 *no_add_attrs = true;
1415 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1417 warning ("argument to `%s' attribute larger than %d",
1418 IDENTIFIER_POINTER (name), REGPARM_MAX);
1419 *no_add_attrs = true;
1426 /* Return 0 if the attributes for two types are incompatible, 1 if they
1427 are compatible, and 2 if they are nearly compatible (which causes a
1428 warning to be generated). */
1431 ix86_comp_type_attributes (type1, type2)
1435 /* Check for mismatch of non-default calling convention. */
1436 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1438 if (TREE_CODE (type1) != FUNCTION_TYPE)
1441 /* Check for mismatched return types (cdecl vs stdcall). */
1442 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1443 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1448 /* Return the regparm value for a fuctio with the indicated TYPE. */
1451 ix86_fntype_regparm (type)
1456 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1458 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1460 return ix86_regparm;
1463 /* Value is the number of bytes of arguments automatically
1464 popped when returning from a subroutine call.
1465 FUNDECL is the declaration node of the function (as a tree),
1466 FUNTYPE is the data type of the function (as a tree),
1467 or for a library call it is an identifier node for the subroutine name.
1468 SIZE is the number of bytes of arguments passed on the stack.
1470 On the 80386, the RTD insn may be used to pop them if the number
1471 of args is fixed, but if the number is variable then the caller
1472 must pop them all. RTD can't be used for library calls now
1473 because the library is compiled with the Unix compiler.
1474 Use of RTD is a selectable option, since it is incompatible with
1475 standard Unix calling sequences. If the option is not selected,
1476 the caller must always pop the args.
1478 The attribute stdcall is equivalent to RTD on a per module basis. */
1481 ix86_return_pops_args (fundecl, funtype, size)
1486 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1488 /* Cdecl functions override -mrtd, and never pop the stack. */
1489 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1491 /* Stdcall functions will pop the stack if not variable args. */
1492 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1496 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1497 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1498 == void_type_node)))
1502 /* Lose any fake structure return argument if it is passed on the stack. */
1503 if (aggregate_value_p (TREE_TYPE (funtype))
1506 int nregs = ix86_fntype_regparm (funtype);
1509 return GET_MODE_SIZE (Pmode);
1515 /* Argument support functions. */
1517 /* Return true when register may be used to pass function parameters. */
1519 ix86_function_arg_regno_p (regno)
1524 return (regno < REGPARM_MAX
1525 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1526 if (SSE_REGNO_P (regno) && TARGET_SSE)
1528 /* RAX is used as hidden argument to va_arg functions. */
1531 for (i = 0; i < REGPARM_MAX; i++)
1532 if (regno == x86_64_int_parameter_registers[i])
1537 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1538 for a call to a function whose data type is FNTYPE.
1539 For a library call, FNTYPE is 0. */
1542 init_cumulative_args (cum, fntype, libname)
1543 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1544 tree fntype; /* tree ptr for function decl */
1545 rtx libname; /* SYMBOL_REF of library name or 0 */
1547 static CUMULATIVE_ARGS zero_cum;
1548 tree param, next_param;
1550 if (TARGET_DEBUG_ARG)
1552 fprintf (stderr, "\ninit_cumulative_args (");
1554 fprintf (stderr, "fntype code = %s, ret code = %s",
1555 tree_code_name[(int) TREE_CODE (fntype)],
1556 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1558 fprintf (stderr, "no fntype");
1561 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1566 /* Set up the number of registers to use for passing arguments. */
1567 cum->nregs = ix86_regparm;
1568 cum->sse_nregs = SSE_REGPARM_MAX;
1569 if (fntype && !TARGET_64BIT)
1571 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1574 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1576 cum->maybe_vaarg = false;
1578 /* Determine if this function has variable arguments. This is
1579 indicated by the last argument being 'void_type_mode' if there
1580 are no variable arguments. If there are variable arguments, then
1581 we won't pass anything in registers */
1585 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1586 param != 0; param = next_param)
1588 next_param = TREE_CHAIN (param);
1589 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1593 cum->maybe_vaarg = true;
1597 if ((!fntype && !libname)
1598 || (fntype && !TYPE_ARG_TYPES (fntype)))
1599 cum->maybe_vaarg = 1;
1601 if (TARGET_DEBUG_ARG)
1602 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1607 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1608 of this code is to classify each 8bytes of incoming argument by the register
1609 class and assign registers accordingly. */
1611 /* Return the union class of CLASS1 and CLASS2.
1612 See the x86-64 PS ABI for details. */
1614 static enum x86_64_reg_class
1615 merge_classes (class1, class2)
1616 enum x86_64_reg_class class1, class2;
1618 /* Rule #1: If both classes are equal, this is the resulting class. */
1619 if (class1 == class2)
1622 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1624 if (class1 == X86_64_NO_CLASS)
1626 if (class2 == X86_64_NO_CLASS)
1629 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1630 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1631 return X86_64_MEMORY_CLASS;
1633 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1634 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1635 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1636 return X86_64_INTEGERSI_CLASS;
1637 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1638 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1639 return X86_64_INTEGER_CLASS;
1641 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1642 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1643 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1644 return X86_64_MEMORY_CLASS;
1646 /* Rule #6: Otherwise class SSE is used. */
1647 return X86_64_SSE_CLASS;
1650 /* Classify the argument of type TYPE and mode MODE.
1651 CLASSES will be filled by the register class used to pass each word
1652 of the operand. The number of words is returned. In case the parameter
1653 should be passed in memory, 0 is returned. As a special case for zero
1654 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1656 BIT_OFFSET is used internally for handling records and specifies offset
1657 of the offset in bits modulo 256 to avoid overflow cases.
1659 See the x86-64 PS ABI for details.
1663 classify_argument (mode, type, classes, bit_offset)
1664 enum machine_mode mode;
1666 enum x86_64_reg_class classes[MAX_CLASSES];
1670 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1671 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1673 /* Variable sized entities are always passed/returned in memory. */
1677 if (type && AGGREGATE_TYPE_P (type))
1681 enum x86_64_reg_class subclasses[MAX_CLASSES];
1683 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1687 for (i = 0; i < words; i++)
1688 classes[i] = X86_64_NO_CLASS;
1690 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1691 signalize memory class, so handle it as special case. */
1694 classes[0] = X86_64_NO_CLASS;
1698 /* Classify each field of record and merge classes. */
1699 if (TREE_CODE (type) == RECORD_TYPE)
1701 /* For classes first merge in the field of the subclasses. */
1702 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1704 tree bases = TYPE_BINFO_BASETYPES (type);
1705 int n_bases = TREE_VEC_LENGTH (bases);
1708 for (i = 0; i < n_bases; ++i)
1710 tree binfo = TREE_VEC_ELT (bases, i);
1712 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1713 tree type = BINFO_TYPE (binfo);
1715 num = classify_argument (TYPE_MODE (type),
1717 (offset + bit_offset) % 256);
1720 for (i = 0; i < num; i++)
1722 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1724 merge_classes (subclasses[i], classes[i + pos]);
1728 /* And now merge the fields of structure. */
1729 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1731 if (TREE_CODE (field) == FIELD_DECL)
1735 /* Bitfields are always classified as integer. Handle them
1736 early, since later code would consider them to be
1737 misaligned integers. */
1738 if (DECL_BIT_FIELD (field))
1740 for (i = int_bit_position (field) / 8 / 8;
1741 i < (int_bit_position (field)
1742 + tree_low_cst (DECL_SIZE (field), 0)
1745 merge_classes (X86_64_INTEGER_CLASS,
1750 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1751 TREE_TYPE (field), subclasses,
1752 (int_bit_position (field)
1753 + bit_offset) % 256);
1756 for (i = 0; i < num; i++)
1759 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1761 merge_classes (subclasses[i], classes[i + pos]);
1767 /* Arrays are handled as small records. */
1768 else if (TREE_CODE (type) == ARRAY_TYPE)
1771 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1772 TREE_TYPE (type), subclasses, bit_offset);
1776 /* The partial classes are now full classes. */
1777 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1778 subclasses[0] = X86_64_SSE_CLASS;
1779 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1780 subclasses[0] = X86_64_INTEGER_CLASS;
1782 for (i = 0; i < words; i++)
1783 classes[i] = subclasses[i % num];
1785 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1786 else if (TREE_CODE (type) == UNION_TYPE
1787 || TREE_CODE (type) == QUAL_UNION_TYPE)
1789 /* For classes first merge in the field of the subclasses. */
1790 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1792 tree bases = TYPE_BINFO_BASETYPES (type);
1793 int n_bases = TREE_VEC_LENGTH (bases);
1796 for (i = 0; i < n_bases; ++i)
1798 tree binfo = TREE_VEC_ELT (bases, i);
1800 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1801 tree type = BINFO_TYPE (binfo);
1803 num = classify_argument (TYPE_MODE (type),
1805 (offset + (bit_offset % 64)) % 256);
1808 for (i = 0; i < num; i++)
1810 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1812 merge_classes (subclasses[i], classes[i + pos]);
1816 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1818 if (TREE_CODE (field) == FIELD_DECL)
1821 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1822 TREE_TYPE (field), subclasses,
1826 for (i = 0; i < num; i++)
1827 classes[i] = merge_classes (subclasses[i], classes[i]);
1834 /* Final merger cleanup. */
1835 for (i = 0; i < words; i++)
1837 /* If one class is MEMORY, everything should be passed in
1839 if (classes[i] == X86_64_MEMORY_CLASS)
1842 /* The X86_64_SSEUP_CLASS should be always preceded by
1843 X86_64_SSE_CLASS. */
1844 if (classes[i] == X86_64_SSEUP_CLASS
1845 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1846 classes[i] = X86_64_SSE_CLASS;
1848 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1849 if (classes[i] == X86_64_X87UP_CLASS
1850 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1851 classes[i] = X86_64_SSE_CLASS;
1856 /* Compute alignment needed. We align all types to natural boundaries with
1857 exception of XFmode that is aligned to 64bits. */
1858 if (mode != VOIDmode && mode != BLKmode)
1860 int mode_alignment = GET_MODE_BITSIZE (mode);
1863 mode_alignment = 128;
1864 else if (mode == XCmode)
1865 mode_alignment = 256;
1866 /* Misaligned fields are always returned in memory. */
1867 if (bit_offset % mode_alignment)
1871 /* Classification of atomic types. */
1881 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1882 classes[0] = X86_64_INTEGERSI_CLASS;
1884 classes[0] = X86_64_INTEGER_CLASS;
1888 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1891 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1892 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1895 if (!(bit_offset % 64))
1896 classes[0] = X86_64_SSESF_CLASS;
1898 classes[0] = X86_64_SSE_CLASS;
1901 classes[0] = X86_64_SSEDF_CLASS;
1904 classes[0] = X86_64_X87_CLASS;
1905 classes[1] = X86_64_X87UP_CLASS;
1908 classes[0] = X86_64_X87_CLASS;
1909 classes[1] = X86_64_X87UP_CLASS;
1910 classes[2] = X86_64_X87_CLASS;
1911 classes[3] = X86_64_X87UP_CLASS;
1914 classes[0] = X86_64_SSEDF_CLASS;
1915 classes[1] = X86_64_SSEDF_CLASS;
1918 classes[0] = X86_64_SSE_CLASS;
1926 classes[0] = X86_64_SSE_CLASS;
1927 classes[1] = X86_64_SSEUP_CLASS;
1942 /* Examine the argument and return set number of register required in each
1943 class. Return 0 iff parameter should be passed in memory. */
1945 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1946 enum machine_mode mode;
1948 int *int_nregs, *sse_nregs;
1951 enum x86_64_reg_class class[MAX_CLASSES];
1952 int n = classify_argument (mode, type, class, 0);
1958 for (n--; n >= 0; n--)
1961 case X86_64_INTEGER_CLASS:
1962 case X86_64_INTEGERSI_CLASS:
1965 case X86_64_SSE_CLASS:
1966 case X86_64_SSESF_CLASS:
1967 case X86_64_SSEDF_CLASS:
1970 case X86_64_NO_CLASS:
1971 case X86_64_SSEUP_CLASS:
1973 case X86_64_X87_CLASS:
1974 case X86_64_X87UP_CLASS:
1978 case X86_64_MEMORY_CLASS:
1983 /* Construct container for the argument used by GCC interface. See
1984 FUNCTION_ARG for the detailed description. */
1986 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1987 enum machine_mode mode;
1990 int nintregs, nsseregs;
1994 enum machine_mode tmpmode;
1996 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1997 enum x86_64_reg_class class[MAX_CLASSES];
2001 int needed_sseregs, needed_intregs;
2002 rtx exp[MAX_CLASSES];
2005 n = classify_argument (mode, type, class, 0);
2006 if (TARGET_DEBUG_ARG)
2009 fprintf (stderr, "Memory class\n");
2012 fprintf (stderr, "Classes:");
2013 for (i = 0; i < n; i++)
2015 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2017 fprintf (stderr, "\n");
2022 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2024 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2027 /* First construct simple cases. Avoid SCmode, since we want to use
2028 single register to pass this type. */
2029 if (n == 1 && mode != SCmode)
2032 case X86_64_INTEGER_CLASS:
2033 case X86_64_INTEGERSI_CLASS:
2034 return gen_rtx_REG (mode, intreg[0]);
2035 case X86_64_SSE_CLASS:
2036 case X86_64_SSESF_CLASS:
2037 case X86_64_SSEDF_CLASS:
2038 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2039 case X86_64_X87_CLASS:
2040 return gen_rtx_REG (mode, FIRST_STACK_REG);
2041 case X86_64_NO_CLASS:
2042 /* Zero sized array, struct or class. */
2047 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2048 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2050 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2051 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2052 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2053 && class[1] == X86_64_INTEGER_CLASS
2054 && (mode == CDImode || mode == TImode)
2055 && intreg[0] + 1 == intreg[1])
2056 return gen_rtx_REG (mode, intreg[0]);
2058 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2059 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2060 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2062 /* Otherwise figure out the entries of the PARALLEL. */
2063 for (i = 0; i < n; i++)
2067 case X86_64_NO_CLASS:
2069 case X86_64_INTEGER_CLASS:
2070 case X86_64_INTEGERSI_CLASS:
2071 /* Merge TImodes on aligned occassions here too. */
2072 if (i * 8 + 8 > bytes)
2073 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2074 else if (class[i] == X86_64_INTEGERSI_CLASS)
2078 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2079 if (tmpmode == BLKmode)
2081 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2082 gen_rtx_REG (tmpmode, *intreg),
2086 case X86_64_SSESF_CLASS:
2087 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2088 gen_rtx_REG (SFmode,
2089 SSE_REGNO (sse_regno)),
2093 case X86_64_SSEDF_CLASS:
2094 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2095 gen_rtx_REG (DFmode,
2096 SSE_REGNO (sse_regno)),
2100 case X86_64_SSE_CLASS:
2101 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2102 tmpmode = TImode, i++;
2105 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2106 gen_rtx_REG (tmpmode,
2107 SSE_REGNO (sse_regno)),
2115 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2116 for (i = 0; i < nexps; i++)
2117 XVECEXP (ret, 0, i) = exp [i];
2121 /* Update the data in CUM to advance over an argument
2122 of mode MODE and data type TYPE.
2123 (TYPE is null for libcalls where that information may not be available.) */
2126 function_arg_advance (cum, mode, type, named)
2127 CUMULATIVE_ARGS *cum; /* current arg information */
2128 enum machine_mode mode; /* current arg mode */
2129 tree type; /* type of the argument or 0 if lib support */
2130 int named; /* whether or not the argument was named */
2133 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2134 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2136 if (TARGET_DEBUG_ARG)
2138 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2139 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2142 int int_nregs, sse_nregs;
2143 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2144 cum->words += words;
2145 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2147 cum->nregs -= int_nregs;
2148 cum->sse_nregs -= sse_nregs;
2149 cum->regno += int_nregs;
2150 cum->sse_regno += sse_nregs;
2153 cum->words += words;
2157 if (TARGET_SSE && mode == TImode)
2159 cum->sse_words += words;
2160 cum->sse_nregs -= 1;
2161 cum->sse_regno += 1;
2162 if (cum->sse_nregs <= 0)
2170 cum->words += words;
2171 cum->nregs -= words;
2172 cum->regno += words;
2174 if (cum->nregs <= 0)
2184 /* Define where to put the arguments to a function.
2185 Value is zero to push the argument on the stack,
2186 or a hard register in which to store the argument.
2188 MODE is the argument's machine mode.
2189 TYPE is the data type of the argument (as a tree).
2190 This is null for libcalls where that information may
2192 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2193 the preceding args and about the function being called.
2194 NAMED is nonzero if this argument is a named parameter
2195 (otherwise it is an extra parameter matching an ellipsis). */
2198 function_arg (cum, mode, type, named)
2199 CUMULATIVE_ARGS *cum; /* current arg information */
2200 enum machine_mode mode; /* current arg mode */
2201 tree type; /* type of the argument or 0 if lib support */
2202 int named; /* != 0 for normal args, == 0 for ... args */
2206 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2207 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2209 /* Handle an hidden AL argument containing number of registers for varargs
2210 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2212 if (mode == VOIDmode)
2215 return GEN_INT (cum->maybe_vaarg
2216 ? (cum->sse_nregs < 0
2224 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2225 &x86_64_int_parameter_registers [cum->regno],
2230 /* For now, pass fp/complex values on the stack. */
2239 if (words <= cum->nregs)
2240 ret = gen_rtx_REG (mode, cum->regno);
2244 ret = gen_rtx_REG (mode, cum->sse_regno);
2248 if (TARGET_DEBUG_ARG)
2251 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2252 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2255 print_simple_rtl (stderr, ret);
2257 fprintf (stderr, ", stack");
2259 fprintf (stderr, " )\n");
2265 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2269 ix86_function_arg_boundary (mode, type)
2270 enum machine_mode mode;
2275 return PARM_BOUNDARY;
2277 align = TYPE_ALIGN (type);
2279 align = GET_MODE_ALIGNMENT (mode);
2280 if (align < PARM_BOUNDARY)
2281 align = PARM_BOUNDARY;
2287 /* Return true if N is a possible register number of function value. */
2289 ix86_function_value_regno_p (regno)
2294 return ((regno) == 0
2295 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2296 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2298 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2299 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2300 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2303 /* Define how to find the value returned by a function.
2304 VALTYPE is the data type of the value (as a tree).
2305 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2306 otherwise, FUNC is 0. */
2308 ix86_function_value (valtype)
2313 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2314 REGPARM_MAX, SSE_REGPARM_MAX,
2315 x86_64_int_return_registers, 0);
2316 /* For zero sized structures, construct_continer return NULL, but we need
2317 to keep rest of compiler happy by returning meaningfull value. */
2319 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2323 return gen_rtx_REG (TYPE_MODE (valtype),
2324 ix86_value_regno (TYPE_MODE (valtype)));
2327 /* Return false iff type is returned in memory. */
2329 ix86_return_in_memory (type)
2332 int needed_intregs, needed_sseregs;
2335 return !examine_argument (TYPE_MODE (type), type, 1,
2336 &needed_intregs, &needed_sseregs);
2340 if (TYPE_MODE (type) == BLKmode
2341 || (VECTOR_MODE_P (TYPE_MODE (type))
2342 && int_size_in_bytes (type) == 8)
2343 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2344 && TYPE_MODE (type) != TFmode
2345 && !VECTOR_MODE_P (TYPE_MODE (type))))
2351 /* Define how to find the value returned by a library function
2352 assuming the value has mode MODE. */
2354 ix86_libcall_value (mode)
2355 enum machine_mode mode;
2365 return gen_rtx_REG (mode, FIRST_SSE_REG);
2368 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2370 return gen_rtx_REG (mode, 0);
2374 return gen_rtx_REG (mode, ix86_value_regno (mode));
2377 /* Given a mode, return the register to use for a return value. */
2380 ix86_value_regno (mode)
2381 enum machine_mode mode;
2383 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2384 return FIRST_FLOAT_REG;
2385 if (mode == TImode || VECTOR_MODE_P (mode))
2386 return FIRST_SSE_REG;
2390 /* Create the va_list data type. */
2393 ix86_build_va_list ()
2395 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2397 /* For i386 we use plain pointer to argument area. */
2399 return build_pointer_type (char_type_node);
2401 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2402 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2404 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2405 unsigned_type_node);
2406 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2407 unsigned_type_node);
2408 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2410 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2413 DECL_FIELD_CONTEXT (f_gpr) = record;
2414 DECL_FIELD_CONTEXT (f_fpr) = record;
2415 DECL_FIELD_CONTEXT (f_ovf) = record;
2416 DECL_FIELD_CONTEXT (f_sav) = record;
2418 TREE_CHAIN (record) = type_decl;
2419 TYPE_NAME (record) = type_decl;
2420 TYPE_FIELDS (record) = f_gpr;
2421 TREE_CHAIN (f_gpr) = f_fpr;
2422 TREE_CHAIN (f_fpr) = f_ovf;
2423 TREE_CHAIN (f_ovf) = f_sav;
2425 layout_type (record);
2427 /* The correct type is an array type of one element. */
2428 return build_array_type (record, build_index_type (size_zero_node));
2431 /* Perform any needed actions needed for a function that is receiving a
2432 variable number of arguments.
2436 MODE and TYPE are the mode and type of the current parameter.
2438 PRETEND_SIZE is a variable that should be set to the amount of stack
2439 that must be pushed by the prolog to pretend that our caller pushed
2442 Normally, this macro will push all remaining incoming registers on the
2443 stack and set PRETEND_SIZE to the length of the registers pushed. */
2446 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2447 CUMULATIVE_ARGS *cum;
2448 enum machine_mode mode;
2450 int *pretend_size ATTRIBUTE_UNUSED;
2454 CUMULATIVE_ARGS next_cum;
2455 rtx save_area = NULL_RTX, mem;
2468 /* Indicate to allocate space on the stack for varargs save area. */
2469 ix86_save_varrargs_registers = 1;
2471 fntype = TREE_TYPE (current_function_decl);
2472 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2473 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2474 != void_type_node));
2476 /* For varargs, we do not want to skip the dummy va_dcl argument.
2477 For stdargs, we do want to skip the last named argument. */
2480 function_arg_advance (&next_cum, mode, type, 1);
2483 save_area = frame_pointer_rtx;
2485 set = get_varargs_alias_set ();
2487 for (i = next_cum.regno; i < ix86_regparm; i++)
2489 mem = gen_rtx_MEM (Pmode,
2490 plus_constant (save_area, i * UNITS_PER_WORD));
2491 set_mem_alias_set (mem, set);
2492 emit_move_insn (mem, gen_rtx_REG (Pmode,
2493 x86_64_int_parameter_registers[i]));
2496 if (next_cum.sse_nregs)
2498 /* Now emit code to save SSE registers. The AX parameter contains number
2499 of SSE parameter regsiters used to call this function. We use
2500 sse_prologue_save insn template that produces computed jump across
2501 SSE saves. We need some preparation work to get this working. */
2503 label = gen_label_rtx ();
2504 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2506 /* Compute address to jump to :
2507 label - 5*eax + nnamed_sse_arguments*5 */
2508 tmp_reg = gen_reg_rtx (Pmode);
2509 nsse_reg = gen_reg_rtx (Pmode);
2510 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2511 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2512 gen_rtx_MULT (Pmode, nsse_reg,
2514 if (next_cum.sse_regno)
2517 gen_rtx_CONST (DImode,
2518 gen_rtx_PLUS (DImode,
2520 GEN_INT (next_cum.sse_regno * 4))));
2522 emit_move_insn (nsse_reg, label_ref);
2523 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2525 /* Compute address of memory block we save into. We always use pointer
2526 pointing 127 bytes after first byte to store - this is needed to keep
2527 instruction size limited by 4 bytes. */
2528 tmp_reg = gen_reg_rtx (Pmode);
2529 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2530 plus_constant (save_area,
2531 8 * REGPARM_MAX + 127)));
2532 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2533 set_mem_alias_set (mem, set);
2534 set_mem_align (mem, BITS_PER_WORD);
2536 /* And finally do the dirty job! */
2537 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2538 GEN_INT (next_cum.sse_regno), label));
2543 /* Implement va_start. */
2546 ix86_va_start (valist, nextarg)
2550 HOST_WIDE_INT words, n_gpr, n_fpr;
2551 tree f_gpr, f_fpr, f_ovf, f_sav;
2552 tree gpr, fpr, ovf, sav, t;
2554 /* Only 64bit target needs something special. */
2557 std_expand_builtin_va_start (valist, nextarg);
2561 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2562 f_fpr = TREE_CHAIN (f_gpr);
2563 f_ovf = TREE_CHAIN (f_fpr);
2564 f_sav = TREE_CHAIN (f_ovf);
2566 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2567 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2568 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2569 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2570 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2572 /* Count number of gp and fp argument registers used. */
2573 words = current_function_args_info.words;
2574 n_gpr = current_function_args_info.regno;
2575 n_fpr = current_function_args_info.sse_regno;
2577 if (TARGET_DEBUG_ARG)
2578 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2579 (int) words, (int) n_gpr, (int) n_fpr);
2581 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2582 build_int_2 (n_gpr * 8, 0));
2583 TREE_SIDE_EFFECTS (t) = 1;
2584 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2586 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2587 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2588 TREE_SIDE_EFFECTS (t) = 1;
2589 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2591 /* Find the overflow area. */
2592 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2594 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2595 build_int_2 (words * UNITS_PER_WORD, 0));
2596 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2597 TREE_SIDE_EFFECTS (t) = 1;
2598 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2600 /* Find the register save area.
2601 Prologue of the function save it right above stack frame. */
2602 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2603 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2604 TREE_SIDE_EFFECTS (t) = 1;
2605 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2608 /* Implement va_arg. */
2610 ix86_va_arg (valist, type)
2613 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2614 tree f_gpr, f_fpr, f_ovf, f_sav;
2615 tree gpr, fpr, ovf, sav, t;
2617 rtx lab_false, lab_over = NULL_RTX;
2621 /* Only 64bit target needs something special. */
2624 return std_expand_builtin_va_arg (valist, type);
2627 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2628 f_fpr = TREE_CHAIN (f_gpr);
2629 f_ovf = TREE_CHAIN (f_fpr);
2630 f_sav = TREE_CHAIN (f_ovf);
2632 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2633 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2634 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2635 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2636 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2638 size = int_size_in_bytes (type);
2639 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2641 container = construct_container (TYPE_MODE (type), type, 0,
2642 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2644 * Pull the value out of the saved registers ...
2647 addr_rtx = gen_reg_rtx (Pmode);
2651 rtx int_addr_rtx, sse_addr_rtx;
2652 int needed_intregs, needed_sseregs;
2655 lab_over = gen_label_rtx ();
2656 lab_false = gen_label_rtx ();
2658 examine_argument (TYPE_MODE (type), type, 0,
2659 &needed_intregs, &needed_sseregs);
2662 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2663 || TYPE_ALIGN (type) > 128);
2665 /* In case we are passing structure, verify that it is consetuctive block
2666 on the register save area. If not we need to do moves. */
2667 if (!need_temp && !REG_P (container))
2669 /* Verify that all registers are strictly consetuctive */
2670 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2674 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2676 rtx slot = XVECEXP (container, 0, i);
2677 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2678 || INTVAL (XEXP (slot, 1)) != i * 16)
2686 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2688 rtx slot = XVECEXP (container, 0, i);
2689 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2690 || INTVAL (XEXP (slot, 1)) != i * 8)
2697 int_addr_rtx = addr_rtx;
2698 sse_addr_rtx = addr_rtx;
2702 int_addr_rtx = gen_reg_rtx (Pmode);
2703 sse_addr_rtx = gen_reg_rtx (Pmode);
2705 /* First ensure that we fit completely in registers. */
2708 emit_cmp_and_jump_insns (expand_expr
2709 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2710 GEN_INT ((REGPARM_MAX - needed_intregs +
2711 1) * 8), GE, const1_rtx, SImode,
2716 emit_cmp_and_jump_insns (expand_expr
2717 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2718 GEN_INT ((SSE_REGPARM_MAX -
2719 needed_sseregs + 1) * 16 +
2720 REGPARM_MAX * 8), GE, const1_rtx,
2721 SImode, 1, lab_false);
2724 /* Compute index to start of area used for integer regs. */
2727 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2728 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2729 if (r != int_addr_rtx)
2730 emit_move_insn (int_addr_rtx, r);
2734 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2735 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2736 if (r != sse_addr_rtx)
2737 emit_move_insn (sse_addr_rtx, r);
2744 /* Never use the memory itself, as it has the alias set. */
2745 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2746 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2747 set_mem_alias_set (mem, get_varargs_alias_set ());
2748 set_mem_align (mem, BITS_PER_UNIT);
2750 for (i = 0; i < XVECLEN (container, 0); i++)
2752 rtx slot = XVECEXP (container, 0, i);
2753 rtx reg = XEXP (slot, 0);
2754 enum machine_mode mode = GET_MODE (reg);
2760 if (SSE_REGNO_P (REGNO (reg)))
2762 src_addr = sse_addr_rtx;
2763 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2767 src_addr = int_addr_rtx;
2768 src_offset = REGNO (reg) * 8;
2770 src_mem = gen_rtx_MEM (mode, src_addr);
2771 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2772 src_mem = adjust_address (src_mem, mode, src_offset);
2773 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2774 emit_move_insn (dest_mem, src_mem);
2781 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2782 build_int_2 (needed_intregs * 8, 0));
2783 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2784 TREE_SIDE_EFFECTS (t) = 1;
2785 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2790 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2791 build_int_2 (needed_sseregs * 16, 0));
2792 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2793 TREE_SIDE_EFFECTS (t) = 1;
2794 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2797 emit_jump_insn (gen_jump (lab_over));
2799 emit_label (lab_false);
2802 /* ... otherwise out of the overflow area. */
2804 /* Care for on-stack alignment if needed. */
2805 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2809 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2810 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2811 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2815 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2817 emit_move_insn (addr_rtx, r);
2820 build (PLUS_EXPR, TREE_TYPE (t), t,
2821 build_int_2 (rsize * UNITS_PER_WORD, 0));
2822 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2823 TREE_SIDE_EFFECTS (t) = 1;
2824 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2827 emit_label (lab_over);
2832 /* Return nonzero if OP is either a i387 or SSE fp register. */
2834 any_fp_register_operand (op, mode)
2836 enum machine_mode mode ATTRIBUTE_UNUSED;
2838 return ANY_FP_REG_P (op);
2841 /* Return nonzero if OP is an i387 fp register. */
2843 fp_register_operand (op, mode)
2845 enum machine_mode mode ATTRIBUTE_UNUSED;
2847 return FP_REG_P (op);
2850 /* Return nonzero if OP is a non-fp register_operand. */
2852 register_and_not_any_fp_reg_operand (op, mode)
2854 enum machine_mode mode;
2856 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2859 /* Return nonzero of OP is a register operand other than an
2860 i387 fp register. */
2862 register_and_not_fp_reg_operand (op, mode)
2864 enum machine_mode mode;
2866 return register_operand (op, mode) && !FP_REG_P (op);
2869 /* Return nonzero if OP is general operand representable on x86_64. */
2872 x86_64_general_operand (op, mode)
2874 enum machine_mode mode;
2877 return general_operand (op, mode);
2878 if (nonimmediate_operand (op, mode))
2880 return x86_64_sign_extended_value (op, 1);
2883 /* Return nonzero if OP is general operand representable on x86_64
2884 as either sign extended or zero extended constant. */
2887 x86_64_szext_general_operand (op, mode)
2889 enum machine_mode mode;
2892 return general_operand (op, mode);
2893 if (nonimmediate_operand (op, mode))
2895 return x86_64_sign_extended_value (op, 1) || x86_64_zero_extended_value (op);
2898 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2901 x86_64_nonmemory_operand (op, mode)
2903 enum machine_mode mode;
2906 return nonmemory_operand (op, mode);
2907 if (register_operand (op, mode))
2909 return x86_64_sign_extended_value (op, 1);
2912 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2915 x86_64_movabs_operand (op, mode)
2917 enum machine_mode mode;
2919 if (!TARGET_64BIT || !flag_pic)
2920 return nonmemory_operand (op, mode);
2921 if (register_operand (op, mode) || x86_64_sign_extended_value (op, 0))
2923 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2928 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2931 x86_64_szext_nonmemory_operand (op, mode)
2933 enum machine_mode mode;
2936 return nonmemory_operand (op, mode);
2937 if (register_operand (op, mode))
2939 return x86_64_sign_extended_value (op, 0) || x86_64_zero_extended_value (op);
2942 /* Return nonzero if OP is immediate operand representable on x86_64. */
2945 x86_64_immediate_operand (op, mode)
2947 enum machine_mode mode;
2950 return immediate_operand (op, mode);
2951 return x86_64_sign_extended_value (op, 0);
2954 /* Return nonzero if OP is immediate operand representable on x86_64. */
2957 x86_64_zext_immediate_operand (op, mode)
2959 enum machine_mode mode ATTRIBUTE_UNUSED;
2961 return x86_64_zero_extended_value (op);
2964 /* Return nonzero if OP is (const_int 1), else return zero. */
2967 const_int_1_operand (op, mode)
2969 enum machine_mode mode ATTRIBUTE_UNUSED;
2971 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2974 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2975 for shift & compare patterns, as shifting by 0 does not change flags),
2976 else return zero. */
2979 const_int_1_31_operand (op, mode)
2981 enum machine_mode mode ATTRIBUTE_UNUSED;
2983 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2986 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2987 reference and a constant. */
2990 symbolic_operand (op, mode)
2992 enum machine_mode mode ATTRIBUTE_UNUSED;
2994 switch (GET_CODE (op))
3002 if (GET_CODE (op) == SYMBOL_REF
3003 || GET_CODE (op) == LABEL_REF
3004 || (GET_CODE (op) == UNSPEC
3005 && (XINT (op, 1) == UNSPEC_GOT
3006 || XINT (op, 1) == UNSPEC_GOTOFF
3007 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3009 if (GET_CODE (op) != PLUS
3010 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3014 if (GET_CODE (op) == SYMBOL_REF
3015 || GET_CODE (op) == LABEL_REF)
3017 /* Only @GOTOFF gets offsets. */
3018 if (GET_CODE (op) != UNSPEC
3019 || XINT (op, 1) != UNSPEC_GOTOFF)
3022 op = XVECEXP (op, 0, 0);
3023 if (GET_CODE (op) == SYMBOL_REF
3024 || GET_CODE (op) == LABEL_REF)
3033 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3036 pic_symbolic_operand (op, mode)
3038 enum machine_mode mode ATTRIBUTE_UNUSED;
3040 if (GET_CODE (op) != CONST)
3045 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3050 if (GET_CODE (op) == UNSPEC)
3052 if (GET_CODE (op) != PLUS
3053 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3056 if (GET_CODE (op) == UNSPEC)
3062 /* Return true if OP is a symbolic operand that resolves locally. */
3065 local_symbolic_operand (op, mode)
3067 enum machine_mode mode ATTRIBUTE_UNUSED;
3069 if (GET_CODE (op) == LABEL_REF)
3072 if (GET_CODE (op) == CONST
3073 && GET_CODE (XEXP (op, 0)) == PLUS
3074 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3075 && (ix86_cmodel != CM_SMALL_PIC
3076 || (INTVAL (XEXP (XEXP (op, 0), 1)) >= -16*1024*1024
3077 && INTVAL (XEXP (XEXP (op, 0), 1)) < 16*1024*1024)))
3078 op = XEXP (XEXP (op, 0), 0);
3080 if (GET_CODE (op) != SYMBOL_REF)
3083 /* These we've been told are local by varasm and encode_section_info
3085 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3088 /* There is, however, a not insubstantial body of code in the rest of
3089 the compiler that assumes it can just stick the results of
3090 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3091 /* ??? This is a hack. Should update the body of the compiler to
3092 always create a DECL an invoke targetm.encode_section_info. */
3093 if (strncmp (XSTR (op, 0), internal_label_prefix,
3094 internal_label_prefix_len) == 0)
3100 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3103 tls_symbolic_operand (op, mode)
3105 enum machine_mode mode ATTRIBUTE_UNUSED;
3107 const char *symbol_str;
3109 if (GET_CODE (op) != SYMBOL_REF)
3111 symbol_str = XSTR (op, 0);
3113 if (symbol_str[0] != '%')
3115 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3119 tls_symbolic_operand_1 (op, kind)
3121 enum tls_model kind;
3123 const char *symbol_str;
3125 if (GET_CODE (op) != SYMBOL_REF)
3127 symbol_str = XSTR (op, 0);
3129 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3133 global_dynamic_symbolic_operand (op, mode)
3135 enum machine_mode mode ATTRIBUTE_UNUSED;
3137 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3141 local_dynamic_symbolic_operand (op, mode)
3143 enum machine_mode mode ATTRIBUTE_UNUSED;
3145 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3149 initial_exec_symbolic_operand (op, mode)
3151 enum machine_mode mode ATTRIBUTE_UNUSED;
3153 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3157 local_exec_symbolic_operand (op, mode)
3159 enum machine_mode mode ATTRIBUTE_UNUSED;
3161 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3164 /* Test for a valid operand for a call instruction. Don't allow the
3165 arg pointer register or virtual regs since they may decay into
3166 reg + const, which the patterns can't handle. */
3169 call_insn_operand (op, mode)
3171 enum machine_mode mode ATTRIBUTE_UNUSED;
3173 /* Disallow indirect through a virtual register. This leads to
3174 compiler aborts when trying to eliminate them. */
3175 if (GET_CODE (op) == REG
3176 && (op == arg_pointer_rtx
3177 || op == frame_pointer_rtx
3178 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3179 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3182 /* Disallow `call 1234'. Due to varying assembler lameness this
3183 gets either rejected or translated to `call .+1234'. */
3184 if (GET_CODE (op) == CONST_INT)
3187 /* Explicitly allow SYMBOL_REF even if pic. */
3188 if (GET_CODE (op) == SYMBOL_REF)
3191 /* Otherwise we can allow any general_operand in the address. */
3192 return general_operand (op, Pmode);
3196 constant_call_address_operand (op, mode)
3198 enum machine_mode mode ATTRIBUTE_UNUSED;
3200 if (GET_CODE (op) == CONST
3201 && GET_CODE (XEXP (op, 0)) == PLUS
3202 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3203 op = XEXP (XEXP (op, 0), 0);
3204 return GET_CODE (op) == SYMBOL_REF;
3207 /* Match exactly zero and one. */
3210 const0_operand (op, mode)
3212 enum machine_mode mode;
3214 return op == CONST0_RTX (mode);
3218 const1_operand (op, mode)
3220 enum machine_mode mode ATTRIBUTE_UNUSED;
3222 return op == const1_rtx;
3225 /* Match 2, 4, or 8. Used for leal multiplicands. */
3228 const248_operand (op, mode)
3230 enum machine_mode mode ATTRIBUTE_UNUSED;
3232 return (GET_CODE (op) == CONST_INT
3233 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3236 /* True if this is a constant appropriate for an increment or decremenmt. */
3239 incdec_operand (op, mode)
3241 enum machine_mode mode ATTRIBUTE_UNUSED;
3243 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3244 registers, since carry flag is not set. */
3245 if (TARGET_PENTIUM4 && !optimize_size)
3247 return op == const1_rtx || op == constm1_rtx;
3250 /* Return nonzero if OP is acceptable as operand of DImode shift
3254 shiftdi_operand (op, mode)
3256 enum machine_mode mode ATTRIBUTE_UNUSED;
3259 return nonimmediate_operand (op, mode);
3261 return register_operand (op, mode);
3264 /* Return false if this is the stack pointer, or any other fake
3265 register eliminable to the stack pointer. Otherwise, this is
3268 This is used to prevent esp from being used as an index reg.
3269 Which would only happen in pathological cases. */
3272 reg_no_sp_operand (op, mode)
3274 enum machine_mode mode;
3277 if (GET_CODE (t) == SUBREG)
3279 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3282 return register_operand (op, mode);
3286 mmx_reg_operand (op, mode)
3288 enum machine_mode mode ATTRIBUTE_UNUSED;
3290 return MMX_REG_P (op);
3293 /* Return false if this is any eliminable register. Otherwise
3297 general_no_elim_operand (op, mode)
3299 enum machine_mode mode;
3302 if (GET_CODE (t) == SUBREG)
3304 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3305 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3306 || t == virtual_stack_dynamic_rtx)
3309 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3310 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3313 return general_operand (op, mode);
3316 /* Return false if this is any eliminable register. Otherwise
3317 register_operand or const_int. */
3320 nonmemory_no_elim_operand (op, mode)
3322 enum machine_mode mode;
3325 if (GET_CODE (t) == SUBREG)
3327 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3328 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3329 || t == virtual_stack_dynamic_rtx)
3332 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3335 /* Return false if this is any eliminable register or stack register,
3336 otherwise work like register_operand. */
3339 index_register_operand (op, mode)
3341 enum machine_mode mode;
3344 if (GET_CODE (t) == SUBREG)
3348 if (t == arg_pointer_rtx
3349 || t == frame_pointer_rtx
3350 || t == virtual_incoming_args_rtx
3351 || t == virtual_stack_vars_rtx
3352 || t == virtual_stack_dynamic_rtx
3353 || REGNO (t) == STACK_POINTER_REGNUM)
3356 return general_operand (op, mode);
3359 /* Return true if op is a Q_REGS class register. */
3362 q_regs_operand (op, mode)
3364 enum machine_mode mode;
3366 if (mode != VOIDmode && GET_MODE (op) != mode)
3368 if (GET_CODE (op) == SUBREG)
3369 op = SUBREG_REG (op);
3370 return ANY_QI_REG_P (op);
3373 /* Return true if op is a NON_Q_REGS class register. */
3376 non_q_regs_operand (op, mode)
3378 enum machine_mode mode;
3380 if (mode != VOIDmode && GET_MODE (op) != mode)
3382 if (GET_CODE (op) == SUBREG)
3383 op = SUBREG_REG (op);
3384 return NON_QI_REG_P (op);
3387 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3390 sse_comparison_operator (op, mode)
3392 enum machine_mode mode ATTRIBUTE_UNUSED;
3394 enum rtx_code code = GET_CODE (op);
3397 /* Operations supported directly. */
3407 /* These are equivalent to ones above in non-IEEE comparisons. */
3414 return !TARGET_IEEE_FP;
3419 /* Return 1 if OP is a valid comparison operator in valid mode. */
3421 ix86_comparison_operator (op, mode)
3423 enum machine_mode mode;
3425 enum machine_mode inmode;
3426 enum rtx_code code = GET_CODE (op);
3427 if (mode != VOIDmode && GET_MODE (op) != mode)
3429 if (GET_RTX_CLASS (code) != '<')
3431 inmode = GET_MODE (XEXP (op, 0));
3433 if (inmode == CCFPmode || inmode == CCFPUmode)
3435 enum rtx_code second_code, bypass_code;
3436 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3437 return (bypass_code == NIL && second_code == NIL);
3444 if (inmode == CCmode || inmode == CCGCmode
3445 || inmode == CCGOCmode || inmode == CCNOmode)
3448 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3449 if (inmode == CCmode)
3453 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3461 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3464 fcmov_comparison_operator (op, mode)
3466 enum machine_mode mode;
3468 enum machine_mode inmode;
3469 enum rtx_code code = GET_CODE (op);
3470 if (mode != VOIDmode && GET_MODE (op) != mode)
3472 if (GET_RTX_CLASS (code) != '<')
3474 inmode = GET_MODE (XEXP (op, 0));
3475 if (inmode == CCFPmode || inmode == CCFPUmode)
3477 enum rtx_code second_code, bypass_code;
3478 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3479 if (bypass_code != NIL || second_code != NIL)
3481 code = ix86_fp_compare_code_to_integer (code);
3483 /* i387 supports just limited amount of conditional codes. */
3486 case LTU: case GTU: case LEU: case GEU:
3487 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3490 case ORDERED: case UNORDERED:
3498 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3501 promotable_binary_operator (op, mode)
3503 enum machine_mode mode ATTRIBUTE_UNUSED;
3505 switch (GET_CODE (op))
3508 /* Modern CPUs have same latency for HImode and SImode multiply,
3509 but 386 and 486 do HImode multiply faster. */
3510 return ix86_cpu > PROCESSOR_I486;
3522 /* Nearly general operand, but accept any const_double, since we wish
3523 to be able to drop them into memory rather than have them get pulled
3527 cmp_fp_expander_operand (op, mode)
3529 enum machine_mode mode;
3531 if (mode != VOIDmode && mode != GET_MODE (op))
3533 if (GET_CODE (op) == CONST_DOUBLE)
3535 return general_operand (op, mode);
3538 /* Match an SI or HImode register for a zero_extract. */
3541 ext_register_operand (op, mode)
3543 enum machine_mode mode ATTRIBUTE_UNUSED;
3546 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3547 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3550 if (!register_operand (op, VOIDmode))
3553 /* Be curefull to accept only registers having upper parts. */
3554 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3555 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3558 /* Return 1 if this is a valid binary floating-point operation.
3559 OP is the expression matched, and MODE is its mode. */
3562 binary_fp_operator (op, mode)
3564 enum machine_mode mode;
3566 if (mode != VOIDmode && mode != GET_MODE (op))
3569 switch (GET_CODE (op))
3575 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3583 mult_operator (op, mode)
3585 enum machine_mode mode ATTRIBUTE_UNUSED;
3587 return GET_CODE (op) == MULT;
3591 div_operator (op, mode)
3593 enum machine_mode mode ATTRIBUTE_UNUSED;
3595 return GET_CODE (op) == DIV;
3599 arith_or_logical_operator (op, mode)
3601 enum machine_mode mode;
3603 return ((mode == VOIDmode || GET_MODE (op) == mode)
3604 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3605 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3608 /* Returns 1 if OP is memory operand with a displacement. */
3611 memory_displacement_operand (op, mode)
3613 enum machine_mode mode;
3615 struct ix86_address parts;
3617 if (! memory_operand (op, mode))
3620 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3623 return parts.disp != NULL_RTX;
3626 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3627 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3629 ??? It seems likely that this will only work because cmpsi is an
3630 expander, and no actual insns use this. */
3633 cmpsi_operand (op, mode)
3635 enum machine_mode mode;
3637 if (nonimmediate_operand (op, mode))
3640 if (GET_CODE (op) == AND
3641 && GET_MODE (op) == SImode
3642 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3643 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3644 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3645 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3646 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3647 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3653 /* Returns 1 if OP is memory operand that can not be represented by the
3657 long_memory_operand (op, mode)
3659 enum machine_mode mode;
3661 if (! memory_operand (op, mode))
3664 return memory_address_length (op) != 0;
3667 /* Return nonzero if the rtx is known aligned. */
3670 aligned_operand (op, mode)
3672 enum machine_mode mode;
3674 struct ix86_address parts;
3676 if (!general_operand (op, mode))
3679 /* Registers and immediate operands are always "aligned". */
3680 if (GET_CODE (op) != MEM)
3683 /* Don't even try to do any aligned optimizations with volatiles. */
3684 if (MEM_VOLATILE_P (op))
3689 /* Pushes and pops are only valid on the stack pointer. */
3690 if (GET_CODE (op) == PRE_DEC
3691 || GET_CODE (op) == POST_INC)
3694 /* Decode the address. */
3695 if (! ix86_decompose_address (op, &parts))
3698 if (parts.base && GET_CODE (parts.base) == SUBREG)
3699 parts.base = SUBREG_REG (parts.base);
3700 if (parts.index && GET_CODE (parts.index) == SUBREG)
3701 parts.index = SUBREG_REG (parts.index);
3703 /* Look for some component that isn't known to be aligned. */
3707 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3712 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3717 if (GET_CODE (parts.disp) != CONST_INT
3718 || (INTVAL (parts.disp) & 3) != 0)
3722 /* Didn't find one -- this must be an aligned address. */
3726 /* Return true if the constant is something that can be loaded with
3727 a special instruction. Only handle 0.0 and 1.0; others are less
3731 standard_80387_constant_p (x)
3734 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3736 /* Note that on the 80387, other constants, such as pi, that we should support
3737 too. On some machines, these are much slower to load as standard constant,
3738 than to load from doubles in memory. */
3739 if (x == CONST0_RTX (GET_MODE (x)))
3741 if (x == CONST1_RTX (GET_MODE (x)))
3746 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3749 standard_sse_constant_p (x)
3752 if (GET_CODE (x) != CONST_DOUBLE)
3754 return (x == CONST0_RTX (GET_MODE (x)));
3757 /* Returns 1 if OP contains a symbol reference */
3760 symbolic_reference_mentioned_p (op)
3763 register const char *fmt;
3766 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3769 fmt = GET_RTX_FORMAT (GET_CODE (op));
3770 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3776 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3777 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3781 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3788 /* Return 1 if it is appropriate to emit `ret' instructions in the
3789 body of a function. Do this only if the epilogue is simple, needing a
3790 couple of insns. Prior to reloading, we can't tell how many registers
3791 must be saved, so return 0 then. Return 0 if there is no frame
3792 marker to de-allocate.
3794 If NON_SAVING_SETJMP is defined and true, then it is not possible
3795 for the epilogue to be simple, so return 0. This is a special case
3796 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3797 until final, but jump_optimize may need to know sooner if a
3801 ix86_can_use_return_insn_p ()
3803 struct ix86_frame frame;
3805 #ifdef NON_SAVING_SETJMP
3806 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3810 if (! reload_completed || frame_pointer_needed)
3813 /* Don't allow more than 32 pop, since that's all we can do
3814 with one instruction. */
3815 if (current_function_pops_args
3816 && current_function_args_size >= 32768)
3819 ix86_compute_frame_layout (&frame);
3820 return frame.to_allocate == 0 && frame.nregs == 0;
3823 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3825 x86_64_sign_extended_value (value, allow_rip)
3829 switch (GET_CODE (value))
3831 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3832 to be at least 32 and this all acceptable constants are
3833 represented as CONST_INT. */
3835 if (HOST_BITS_PER_WIDE_INT == 32)
3839 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3840 return trunc_int_for_mode (val, SImode) == val;
3844 /* For certain code models, the symbolic references are known to fit.
3845 in CM_SMALL_PIC model we know it fits if it is local to the shared
3846 library. Don't count TLS SYMBOL_REFs here, since they should fit
3847 only if inside of UNSPEC handled below. */
3849 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
3851 && ix86_cmodel == CM_SMALL_PIC
3852 && (CONSTANT_POOL_ADDRESS_P (value)
3853 || SYMBOL_REF_FLAG (value))
3854 && ! tls_symbolic_operand (value, GET_MODE (value))));
3856 /* For certain code models, the code is near as well. */
3858 return ix86_cmodel != CM_LARGE
3859 && (allow_rip || ix86_cmodel != CM_SMALL_PIC);
3861 /* We also may accept the offsetted memory references in certain special
3864 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
3865 switch (XINT (XEXP (value, 0), 1))
3867 case UNSPEC_GOTPCREL:
3869 case UNSPEC_GOTNTPOFF:
3875 if (GET_CODE (XEXP (value, 0)) == PLUS)
3877 rtx op1 = XEXP (XEXP (value, 0), 0);
3878 rtx op2 = XEXP (XEXP (value, 0), 1);
3879 HOST_WIDE_INT offset;
3881 if (ix86_cmodel == CM_LARGE)
3883 if (GET_CODE (op2) != CONST_INT)
3885 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3886 switch (GET_CODE (op1))
3889 /* For CM_SMALL assume that latest object is 16MB before
3890 end of 31bits boundary. We may also accept pretty
3891 large negative constants knowing that all objects are
3892 in the positive half of address space. */
3893 if (ix86_cmodel == CM_SMALL
3894 && offset < 16*1024*1024
3895 && trunc_int_for_mode (offset, SImode) == offset)
3897 /* For CM_KERNEL we know that all object resist in the
3898 negative half of 32bits address space. We may not
3899 accept negative offsets, since they may be just off
3900 and we may accept pretty large positive ones. */
3901 if (ix86_cmodel == CM_KERNEL
3903 && trunc_int_for_mode (offset, SImode) == offset)
3905 /* For CM_SMALL_PIC, we can make similar assumptions
3906 as for CM_SMALL model, if we know the symbol is local
3907 to the shared library. Disallow any TLS symbols,
3908 since they should always be enclosed in an UNSPEC. */
3909 if (ix86_cmodel == CM_SMALL_PIC
3911 && (CONSTANT_POOL_ADDRESS_P (op1)
3912 || SYMBOL_REF_FLAG (op1))
3913 && ! tls_symbolic_operand (op1, GET_MODE (op1))
3914 && offset < 16*1024*1024
3915 && offset >= -16*1024*1024
3916 && trunc_int_for_mode (offset, SImode) == offset)
3920 /* These conditions are similar to SYMBOL_REF ones, just the
3921 constraints for code models differ. */
3922 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
3923 || (ix86_cmodel == CM_SMALL_PIC && allow_rip
3924 && offset >= -16*1024*1024))
3925 && offset < 16*1024*1024
3926 && trunc_int_for_mode (offset, SImode) == offset)
3928 if (ix86_cmodel == CM_KERNEL
3930 && trunc_int_for_mode (offset, SImode) == offset)
3934 switch (XINT (op1, 1))
3939 && trunc_int_for_mode (offset, SImode) == offset)
3953 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3955 x86_64_zero_extended_value (value)
3958 switch (GET_CODE (value))
3961 if (HOST_BITS_PER_WIDE_INT == 32)
3962 return (GET_MODE (value) == VOIDmode
3963 && !CONST_DOUBLE_HIGH (value));
3967 if (HOST_BITS_PER_WIDE_INT == 32)
3968 return INTVAL (value) >= 0;
3970 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3973 /* For certain code models, the symbolic references are known to fit. */
3975 return ix86_cmodel == CM_SMALL;
3977 /* For certain code models, the code is near as well. */
3979 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3981 /* We also may accept the offsetted memory references in certain special
3984 if (GET_CODE (XEXP (value, 0)) == PLUS)
3986 rtx op1 = XEXP (XEXP (value, 0), 0);
3987 rtx op2 = XEXP (XEXP (value, 0), 1);
3989 if (ix86_cmodel == CM_LARGE)
3991 switch (GET_CODE (op1))
3995 /* For small code model we may accept pretty large positive
3996 offsets, since one bit is available for free. Negative
3997 offsets are limited by the size of NULL pointer area
3998 specified by the ABI. */
3999 if (ix86_cmodel == CM_SMALL
4000 && GET_CODE (op2) == CONST_INT
4001 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4002 && (trunc_int_for_mode (INTVAL (op2), SImode)
4005 /* ??? For the kernel, we may accept adjustment of
4006 -0x10000000, since we know that it will just convert
4007 negative address space to positive, but perhaps this
4008 is not worthwhile. */
4011 /* These conditions are similar to SYMBOL_REF ones, just the
4012 constraints for code models differ. */
4013 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4014 && GET_CODE (op2) == CONST_INT
4015 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4016 && (trunc_int_for_mode (INTVAL (op2), SImode)
4030 /* Value should be nonzero if functions must have frame pointers.
4031 Zero means the frame pointer need not be set up (and parms may
4032 be accessed via the stack pointer) in functions that seem suitable. */
4035 ix86_frame_pointer_required ()
4037 /* If we accessed previous frames, then the generated code expects
4038 to be able to access the saved ebp value in our frame. */
4039 if (cfun->machine->accesses_prev_frame)
4042 /* Several x86 os'es need a frame pointer for other reasons,
4043 usually pertaining to setjmp. */
4044 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4047 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4048 the frame pointer by default. Turn it back on now if we've not
4049 got a leaf function. */
4050 if (TARGET_OMIT_LEAF_FRAME_POINTER
4051 && (!current_function_is_leaf || current_function_profile))
4057 /* Record that the current function accesses previous call frames. */
4060 ix86_setup_frame_addresses ()
4062 cfun->machine->accesses_prev_frame = 1;
4065 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4066 # define USE_HIDDEN_LINKONCE 1
4068 # define USE_HIDDEN_LINKONCE 0
4071 static int pic_labels_used;
4073 /* Fills in the label name that should be used for a pc thunk for
4074 the given register. */
4077 get_pc_thunk_name (name, regno)
4081 if (USE_HIDDEN_LINKONCE)
4082 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4084 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4088 /* This function generates code for -fpic that loads %ebx with
4089 the return address of the caller and then returns. */
4092 ix86_asm_file_end (file)
4098 for (regno = 0; regno < 8; ++regno)
4102 if (! ((pic_labels_used >> regno) & 1))
4105 get_pc_thunk_name (name, regno);
4107 if (USE_HIDDEN_LINKONCE)
4111 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4113 TREE_PUBLIC (decl) = 1;
4114 TREE_STATIC (decl) = 1;
4115 DECL_ONE_ONLY (decl) = 1;
4117 (*targetm.asm_out.unique_section) (decl, 0);
4118 named_section (decl, NULL, 0);
4120 (*targetm.asm_out.globalize_label) (file, name);
4121 fputs ("\t.hidden\t", file);
4122 assemble_name (file, name);
4124 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4129 ASM_OUTPUT_LABEL (file, name);
4132 xops[0] = gen_rtx_REG (SImode, regno);
4133 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4134 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4135 output_asm_insn ("ret", xops);
4139 /* Emit code for the SET_GOT patterns. */
4142 output_set_got (dest)
4148 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4150 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4152 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4155 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4157 output_asm_insn ("call\t%a2", xops);
4160 /* Output the "canonical" label name ("Lxx$pb") here too. This
4161 is what will be referred to by the Mach-O PIC subsystem. */
4162 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4164 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4165 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4168 output_asm_insn ("pop{l}\t%0", xops);
4173 get_pc_thunk_name (name, REGNO (dest));
4174 pic_labels_used |= 1 << REGNO (dest);
4176 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4177 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4178 output_asm_insn ("call\t%X2", xops);
4181 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4182 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4183 else if (!TARGET_MACHO)
4184 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4189 /* Generate an "push" pattern for input ARG. */
4195 return gen_rtx_SET (VOIDmode,
4197 gen_rtx_PRE_DEC (Pmode,
4198 stack_pointer_rtx)),
4202 /* Return >= 0 if there is an unused call-clobbered register available
4203 for the entire function. */
4206 ix86_select_alt_pic_regnum ()
4208 if (current_function_is_leaf && !current_function_profile)
4211 for (i = 2; i >= 0; --i)
4212 if (!regs_ever_live[i])
4216 return INVALID_REGNUM;
4219 /* Return 1 if we need to save REGNO. */
4221 ix86_save_reg (regno, maybe_eh_return)
4223 int maybe_eh_return;
4225 if (pic_offset_table_rtx
4226 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4227 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4228 || current_function_profile
4229 || current_function_calls_eh_return))
4231 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4236 if (current_function_calls_eh_return && maybe_eh_return)
4241 unsigned test = EH_RETURN_DATA_REGNO (i);
4242 if (test == INVALID_REGNUM)
4249 return (regs_ever_live[regno]
4250 && !call_used_regs[regno]
4251 && !fixed_regs[regno]
4252 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4255 /* Return number of registers to be saved on the stack. */
4263 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4264 if (ix86_save_reg (regno, true))
4269 /* Return the offset between two registers, one to be eliminated, and the other
4270 its replacement, at the start of a routine. */
4273 ix86_initial_elimination_offset (from, to)
4277 struct ix86_frame frame;
4278 ix86_compute_frame_layout (&frame);
4280 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4281 return frame.hard_frame_pointer_offset;
4282 else if (from == FRAME_POINTER_REGNUM
4283 && to == HARD_FRAME_POINTER_REGNUM)
4284 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4287 if (to != STACK_POINTER_REGNUM)
4289 else if (from == ARG_POINTER_REGNUM)
4290 return frame.stack_pointer_offset;
4291 else if (from != FRAME_POINTER_REGNUM)
4294 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4298 /* Fill structure ix86_frame about frame of currently computed function. */
4301 ix86_compute_frame_layout (frame)
4302 struct ix86_frame *frame;
4304 HOST_WIDE_INT total_size;
4305 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4307 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4308 HOST_WIDE_INT size = get_frame_size ();
4310 frame->nregs = ix86_nsaved_regs ();
4313 /* Skip return address and saved base pointer. */
4314 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4316 frame->hard_frame_pointer_offset = offset;
4318 /* Do some sanity checking of stack_alignment_needed and
4319 preferred_alignment, since i386 port is the only using those features
4320 that may break easily. */
4322 if (size && !stack_alignment_needed)
4324 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4326 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4328 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4331 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4332 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4334 /* Register save area */
4335 offset += frame->nregs * UNITS_PER_WORD;
4338 if (ix86_save_varrargs_registers)
4340 offset += X86_64_VARARGS_SIZE;
4341 frame->va_arg_size = X86_64_VARARGS_SIZE;
4344 frame->va_arg_size = 0;
4346 /* Align start of frame for local function. */
4347 frame->padding1 = ((offset + stack_alignment_needed - 1)
4348 & -stack_alignment_needed) - offset;
4350 offset += frame->padding1;
4352 /* Frame pointer points here. */
4353 frame->frame_pointer_offset = offset;
4357 /* Add outgoing arguments area. Can be skipped if we eliminated
4358 all the function calls as dead code. */
4359 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4361 offset += current_function_outgoing_args_size;
4362 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4365 frame->outgoing_arguments_size = 0;
4367 /* Align stack boundary. Only needed if we're calling another function
4369 if (!current_function_is_leaf || current_function_calls_alloca)
4370 frame->padding2 = ((offset + preferred_alignment - 1)
4371 & -preferred_alignment) - offset;
4373 frame->padding2 = 0;
4375 offset += frame->padding2;
4377 /* We've reached end of stack frame. */
4378 frame->stack_pointer_offset = offset;
4380 /* Size prologue needs to allocate. */
4381 frame->to_allocate =
4382 (size + frame->padding1 + frame->padding2
4383 + frame->outgoing_arguments_size + frame->va_arg_size);
4385 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4386 && current_function_is_leaf)
4388 frame->red_zone_size = frame->to_allocate;
4389 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4390 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4393 frame->red_zone_size = 0;
4394 frame->to_allocate -= frame->red_zone_size;
4395 frame->stack_pointer_offset -= frame->red_zone_size;
4397 fprintf (stderr, "nregs: %i\n", frame->nregs);
4398 fprintf (stderr, "size: %i\n", size);
4399 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4400 fprintf (stderr, "padding1: %i\n", frame->padding1);
4401 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4402 fprintf (stderr, "padding2: %i\n", frame->padding2);
4403 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4404 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4405 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4406 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4407 frame->hard_frame_pointer_offset);
4408 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4412 /* Emit code to save registers in the prologue. */
4415 ix86_emit_save_regs ()
4420 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4421 if (ix86_save_reg (regno, true))
4423 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4424 RTX_FRAME_RELATED_P (insn) = 1;
4428 /* Emit code to save registers using MOV insns. First register
4429 is restored from POINTER + OFFSET. */
4431 ix86_emit_save_regs_using_mov (pointer, offset)
4433 HOST_WIDE_INT offset;
4438 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4439 if (ix86_save_reg (regno, true))
4441 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4443 gen_rtx_REG (Pmode, regno));
4444 RTX_FRAME_RELATED_P (insn) = 1;
4445 offset += UNITS_PER_WORD;
4449 /* Expand the prologue into a bunch of separate insns. */
4452 ix86_expand_prologue ()
4456 struct ix86_frame frame;
4458 HOST_WIDE_INT allocate;
4462 use_fast_prologue_epilogue
4463 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4464 if (TARGET_PROLOGUE_USING_MOVE)
4465 use_mov = use_fast_prologue_epilogue;
4467 ix86_compute_frame_layout (&frame);
4469 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4470 slower on all targets. Also sdb doesn't like it. */
4472 if (frame_pointer_needed)
4474 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4475 RTX_FRAME_RELATED_P (insn) = 1;
4477 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4478 RTX_FRAME_RELATED_P (insn) = 1;
4481 allocate = frame.to_allocate;
4482 /* In case we are dealing only with single register and empty frame,
4483 push is equivalent of the mov+add sequence. */
4484 if (allocate == 0 && frame.nregs <= 1)
4488 ix86_emit_save_regs ();
4490 allocate += frame.nregs * UNITS_PER_WORD;
4494 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4496 insn = emit_insn (gen_pro_epilogue_adjust_stack
4497 (stack_pointer_rtx, stack_pointer_rtx,
4498 GEN_INT (-allocate)));
4499 RTX_FRAME_RELATED_P (insn) = 1;
4503 /* ??? Is this only valid for Win32? */
4510 arg0 = gen_rtx_REG (SImode, 0);
4511 emit_move_insn (arg0, GEN_INT (allocate));
4513 sym = gen_rtx_MEM (FUNCTION_MODE,
4514 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4515 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4517 CALL_INSN_FUNCTION_USAGE (insn)
4518 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4519 CALL_INSN_FUNCTION_USAGE (insn));
4523 if (!frame_pointer_needed || !frame.to_allocate)
4524 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4526 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4527 -frame.nregs * UNITS_PER_WORD);
4530 #ifdef SUBTARGET_PROLOGUE
4534 pic_reg_used = false;
4535 if (pic_offset_table_rtx
4536 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4537 || current_function_profile))
4539 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4541 if (alt_pic_reg_used != INVALID_REGNUM)
4542 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4544 pic_reg_used = true;
4549 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4551 /* Even with accurate pre-reload life analysis, we can wind up
4552 deleting all references to the pic register after reload.
4553 Consider if cross-jumping unifies two sides of a branch
4554 controled by a comparison vs the only read from a global.
4555 In which case, allow the set_got to be deleted, though we're
4556 too late to do anything about the ebx save in the prologue. */
4557 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4560 /* Prevent function calls from be scheduled before the call to mcount.
4561 In the pic_reg_used case, make sure that the got load isn't deleted. */
4562 if (current_function_profile)
4563 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4566 /* Emit code to restore saved registers using MOV insns. First register
4567 is restored from POINTER + OFFSET. */
4569 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4572 int maybe_eh_return;
4576 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4577 if (ix86_save_reg (regno, maybe_eh_return))
4579 emit_move_insn (gen_rtx_REG (Pmode, regno),
4580 adjust_address (gen_rtx_MEM (Pmode, pointer),
4582 offset += UNITS_PER_WORD;
4586 /* Restore function stack, frame, and registers. */
4589 ix86_expand_epilogue (style)
4593 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4594 struct ix86_frame frame;
4595 HOST_WIDE_INT offset;
4597 ix86_compute_frame_layout (&frame);
4599 /* Calculate start of saved registers relative to ebp. Special care
4600 must be taken for the normal return case of a function using
4601 eh_return: the eax and edx registers are marked as saved, but not
4602 restored along this path. */
4603 offset = frame.nregs;
4604 if (current_function_calls_eh_return && style != 2)
4606 offset *= -UNITS_PER_WORD;
4608 /* If we're only restoring one register and sp is not valid then
4609 using a move instruction to restore the register since it's
4610 less work than reloading sp and popping the register.
4612 The default code result in stack adjustment using add/lea instruction,
4613 while this code results in LEAVE instruction (or discrete equivalent),
4614 so it is profitable in some other cases as well. Especially when there
4615 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4616 and there is exactly one register to pop. This heruistic may need some
4617 tuning in future. */
4618 if ((!sp_valid && frame.nregs <= 1)
4619 || (TARGET_EPILOGUE_USING_MOVE
4620 && use_fast_prologue_epilogue
4621 && (frame.nregs > 1 || frame.to_allocate))
4622 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4623 || (frame_pointer_needed && TARGET_USE_LEAVE
4624 && use_fast_prologue_epilogue && frame.nregs == 1)
4625 || current_function_calls_eh_return)
4627 /* Restore registers. We can use ebp or esp to address the memory
4628 locations. If both are available, default to ebp, since offsets
4629 are known to be small. Only exception is esp pointing directly to the
4630 end of block of saved registers, where we may simplify addressing
4633 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4634 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4635 frame.to_allocate, style == 2);
4637 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4638 offset, style == 2);
4640 /* eh_return epilogues need %ecx added to the stack pointer. */
4643 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4645 if (frame_pointer_needed)
4647 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4648 tmp = plus_constant (tmp, UNITS_PER_WORD);
4649 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4651 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4652 emit_move_insn (hard_frame_pointer_rtx, tmp);
4654 emit_insn (gen_pro_epilogue_adjust_stack
4655 (stack_pointer_rtx, sa, const0_rtx));
4659 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4660 tmp = plus_constant (tmp, (frame.to_allocate
4661 + frame.nregs * UNITS_PER_WORD));
4662 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4665 else if (!frame_pointer_needed)
4666 emit_insn (gen_pro_epilogue_adjust_stack
4667 (stack_pointer_rtx, stack_pointer_rtx,
4668 GEN_INT (frame.to_allocate
4669 + frame.nregs * UNITS_PER_WORD)));
4670 /* If not an i386, mov & pop is faster than "leave". */
4671 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4672 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4675 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4676 hard_frame_pointer_rtx,
4679 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4681 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4686 /* First step is to deallocate the stack frame so that we can
4687 pop the registers. */
4690 if (!frame_pointer_needed)
4692 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4693 hard_frame_pointer_rtx,
4696 else if (frame.to_allocate)
4697 emit_insn (gen_pro_epilogue_adjust_stack
4698 (stack_pointer_rtx, stack_pointer_rtx,
4699 GEN_INT (frame.to_allocate)));
4701 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4702 if (ix86_save_reg (regno, false))
4705 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4707 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4709 if (frame_pointer_needed)
4711 /* Leave results in shorter dependency chains on CPUs that are
4712 able to grok it fast. */
4713 if (TARGET_USE_LEAVE)
4714 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4715 else if (TARGET_64BIT)
4716 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4718 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4722 /* Sibcall epilogues don't want a return instruction. */
4726 if (current_function_pops_args && current_function_args_size)
4728 rtx popc = GEN_INT (current_function_pops_args);
4730 /* i386 can only pop 64K bytes. If asked to pop more, pop
4731 return address, do explicit add, and jump indirectly to the
4734 if (current_function_pops_args >= 65536)
4736 rtx ecx = gen_rtx_REG (SImode, 2);
4738 /* There are is no "pascal" calling convention in 64bit ABI. */
4742 emit_insn (gen_popsi1 (ecx));
4743 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4744 emit_jump_insn (gen_return_indirect_internal (ecx));
4747 emit_jump_insn (gen_return_pop_internal (popc));
4750 emit_jump_insn (gen_return_internal ());
4753 /* Reset from the function's potential modifications. */
4756 ix86_output_function_epilogue (file, size)
4757 FILE *file ATTRIBUTE_UNUSED;
4758 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4760 if (pic_offset_table_rtx)
4761 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4764 /* Extract the parts of an RTL expression that is a valid memory address
4765 for an instruction. Return 0 if the structure of the address is
4766 grossly off. Return -1 if the address contains ASHIFT, so it is not
4767 strictly valid, but still used for computing length of lea instruction.
4771 ix86_decompose_address (addr, out)
4773 struct ix86_address *out;
4775 rtx base = NULL_RTX;
4776 rtx index = NULL_RTX;
4777 rtx disp = NULL_RTX;
4778 HOST_WIDE_INT scale = 1;
4779 rtx scale_rtx = NULL_RTX;
4782 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4784 else if (GET_CODE (addr) == PLUS)
4786 rtx op0 = XEXP (addr, 0);
4787 rtx op1 = XEXP (addr, 1);
4788 enum rtx_code code0 = GET_CODE (op0);
4789 enum rtx_code code1 = GET_CODE (op1);
4791 if (code0 == REG || code0 == SUBREG)
4793 if (code1 == REG || code1 == SUBREG)
4794 index = op0, base = op1; /* index + base */
4796 base = op0, disp = op1; /* base + displacement */
4798 else if (code0 == MULT)
4800 index = XEXP (op0, 0);
4801 scale_rtx = XEXP (op0, 1);
4802 if (code1 == REG || code1 == SUBREG)
4803 base = op1; /* index*scale + base */
4805 disp = op1; /* index*scale + disp */
4807 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4809 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4810 scale_rtx = XEXP (XEXP (op0, 0), 1);
4811 base = XEXP (op0, 1);
4814 else if (code0 == PLUS)
4816 index = XEXP (op0, 0); /* index + base + disp */
4817 base = XEXP (op0, 1);
4823 else if (GET_CODE (addr) == MULT)
4825 index = XEXP (addr, 0); /* index*scale */
4826 scale_rtx = XEXP (addr, 1);
4828 else if (GET_CODE (addr) == ASHIFT)
4832 /* We're called for lea too, which implements ashift on occasion. */
4833 index = XEXP (addr, 0);
4834 tmp = XEXP (addr, 1);
4835 if (GET_CODE (tmp) != CONST_INT)
4837 scale = INTVAL (tmp);
4838 if ((unsigned HOST_WIDE_INT) scale > 3)
4844 disp = addr; /* displacement */
4846 /* Extract the integral value of scale. */
4849 if (GET_CODE (scale_rtx) != CONST_INT)
4851 scale = INTVAL (scale_rtx);
4854 /* Allow arg pointer and stack pointer as index if there is not scaling */
4855 if (base && index && scale == 1
4856 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4857 || index == stack_pointer_rtx))
4864 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4865 if ((base == hard_frame_pointer_rtx
4866 || base == frame_pointer_rtx
4867 || base == arg_pointer_rtx) && !disp)
4870 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4871 Avoid this by transforming to [%esi+0]. */
4872 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4873 && base && !index && !disp
4875 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4878 /* Special case: encode reg+reg instead of reg*2. */
4879 if (!base && index && scale && scale == 2)
4880 base = index, scale = 1;
4882 /* Special case: scaling cannot be encoded without base or displacement. */
4883 if (!base && !disp && index && scale != 1)
4894 /* Return cost of the memory address x.
4895 For i386, it is better to use a complex address than let gcc copy
4896 the address into a reg and make a new pseudo. But not if the address
4897 requires to two regs - that would mean more pseudos with longer
4900 ix86_address_cost (x)
4903 struct ix86_address parts;
4906 if (!ix86_decompose_address (x, &parts))
4909 if (parts.base && GET_CODE (parts.base) == SUBREG)
4910 parts.base = SUBREG_REG (parts.base);
4911 if (parts.index && GET_CODE (parts.index) == SUBREG)
4912 parts.index = SUBREG_REG (parts.index);
4914 /* More complex memory references are better. */
4915 if (parts.disp && parts.disp != const0_rtx)
4918 /* Attempt to minimize number of registers in the address. */
4920 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4922 && (!REG_P (parts.index)
4923 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4927 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4929 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4930 && parts.base != parts.index)
4933 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4934 since it's predecode logic can't detect the length of instructions
4935 and it degenerates to vector decoded. Increase cost of such
4936 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4937 to split such addresses or even refuse such addresses at all.
4939 Following addressing modes are affected:
4944 The first and last case may be avoidable by explicitly coding the zero in
4945 memory address, but I don't have AMD-K6 machine handy to check this
4949 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4950 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4951 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4957 /* If X is a machine specific address (i.e. a symbol or label being
4958 referenced as a displacement from the GOT implemented using an
4959 UNSPEC), then return the base term. Otherwise return X. */
4962 ix86_find_base_term (x)
4969 if (GET_CODE (x) != CONST)
4972 if (GET_CODE (term) == PLUS
4973 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4974 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4975 term = XEXP (term, 0);
4976 if (GET_CODE (term) != UNSPEC
4977 || XINT (term, 1) != UNSPEC_GOTPCREL)
4980 term = XVECEXP (term, 0, 0);
4982 if (GET_CODE (term) != SYMBOL_REF
4983 && GET_CODE (term) != LABEL_REF)
4989 if (GET_CODE (x) != PLUS
4990 || XEXP (x, 0) != pic_offset_table_rtx
4991 || GET_CODE (XEXP (x, 1)) != CONST)
4994 term = XEXP (XEXP (x, 1), 0);
4996 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4997 term = XEXP (term, 0);
4999 if (GET_CODE (term) != UNSPEC
5000 || XINT (term, 1) != UNSPEC_GOTOFF)
5003 term = XVECEXP (term, 0, 0);
5005 if (GET_CODE (term) != SYMBOL_REF
5006 && GET_CODE (term) != LABEL_REF)
5012 /* Determine if a given RTX is a valid constant. We already know this
5013 satisfies CONSTANT_P. */
5016 legitimate_constant_p (x)
5021 switch (GET_CODE (x))
5024 /* TLS symbols are not constant. */
5025 if (tls_symbolic_operand (x, Pmode))
5030 inner = XEXP (x, 0);
5032 /* Offsets of TLS symbols are never valid.
5033 Discourage CSE from creating them. */
5034 if (GET_CODE (inner) == PLUS
5035 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5038 /* Only some unspecs are valid as "constants". */
5039 if (GET_CODE (inner) == UNSPEC)
5040 switch (XINT (inner, 1))
5043 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5053 /* Otherwise we handle everything else in the move patterns. */
5057 /* Determine if a given RTX is a valid constant address. */
5060 constant_address_p (x)
5063 switch (GET_CODE (x))
5070 return TARGET_64BIT;
5073 /* For Mach-O, really believe the CONST. */
5076 /* Otherwise fall through. */
5078 return !flag_pic && legitimate_constant_p (x);
5085 /* Nonzero if the constant value X is a legitimate general operand
5086 when generating PIC code. It is given that flag_pic is on and
5087 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5090 legitimate_pic_operand_p (x)
5095 switch (GET_CODE (x))
5098 inner = XEXP (x, 0);
5100 /* Only some unspecs are valid as "constants". */
5101 if (GET_CODE (inner) == UNSPEC)
5102 switch (XINT (inner, 1))
5105 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5113 return legitimate_pic_address_disp_p (x);
5120 /* Determine if a given CONST RTX is a valid memory displacement
5124 legitimate_pic_address_disp_p (disp)
5129 /* In 64bit mode we can allow direct addresses of symbols and labels
5130 when they are not dynamic symbols. */
5131 if (TARGET_64BIT && local_symbolic_operand (disp, Pmode))
5133 if (GET_CODE (disp) != CONST)
5135 disp = XEXP (disp, 0);
5139 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5140 of GOT tables. We should not need these anyway. */
5141 if (GET_CODE (disp) != UNSPEC
5142 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5145 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5146 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5152 if (GET_CODE (disp) == PLUS)
5154 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5156 disp = XEXP (disp, 0);
5160 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5161 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5163 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5164 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5165 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5167 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5168 if (strstr (sym_name, "$pb") != 0)
5173 if (GET_CODE (disp) != UNSPEC)
5176 switch (XINT (disp, 1))
5181 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5183 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5184 case UNSPEC_GOTTPOFF:
5185 case UNSPEC_GOTNTPOFF:
5186 case UNSPEC_INDNTPOFF:
5189 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5191 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5193 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5199 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5200 memory address for an instruction. The MODE argument is the machine mode
5201 for the MEM expression that wants to use this address.
5203 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5204 convert common non-canonical forms to canonical form so that they will
5208 legitimate_address_p (mode, addr, strict)
5209 enum machine_mode mode;
5213 struct ix86_address parts;
5214 rtx base, index, disp;
5215 HOST_WIDE_INT scale;
5216 const char *reason = NULL;
5217 rtx reason_rtx = NULL_RTX;
5219 if (TARGET_DEBUG_ADDR)
5222 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5223 GET_MODE_NAME (mode), strict);
5227 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5229 if (TARGET_DEBUG_ADDR)
5230 fprintf (stderr, "Success.\n");
5234 if (ix86_decompose_address (addr, &parts) <= 0)
5236 reason = "decomposition failed";
5241 index = parts.index;
5243 scale = parts.scale;
5245 /* Validate base register.
5247 Don't allow SUBREG's here, it can lead to spill failures when the base
5248 is one word out of a two word structure, which is represented internally
5256 if (GET_CODE (base) == SUBREG)
5257 reg = SUBREG_REG (base);
5261 if (GET_CODE (reg) != REG)
5263 reason = "base is not a register";
5267 if (GET_MODE (base) != Pmode)
5269 reason = "base is not in Pmode";
5273 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5274 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5276 reason = "base is not valid";
5281 /* Validate index register.
5283 Don't allow SUBREG's here, it can lead to spill failures when the index
5284 is one word out of a two word structure, which is represented internally
5292 if (GET_CODE (index) == SUBREG)
5293 reg = SUBREG_REG (index);
5297 if (GET_CODE (reg) != REG)
5299 reason = "index is not a register";
5303 if (GET_MODE (index) != Pmode)
5305 reason = "index is not in Pmode";
5309 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5310 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5312 reason = "index is not valid";
5317 /* Validate scale factor. */
5320 reason_rtx = GEN_INT (scale);
5323 reason = "scale without index";
5327 if (scale != 2 && scale != 4 && scale != 8)
5329 reason = "scale is not a valid multiplier";
5334 /* Validate displacement. */
5341 if (!x86_64_sign_extended_value (disp, !(index || base)))
5343 reason = "displacement is out of range";
5349 if (GET_CODE (disp) == CONST_DOUBLE)
5351 reason = "displacement is a const_double";
5356 if (GET_CODE (disp) == CONST
5357 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5358 switch (XINT (XEXP (disp, 0), 1))
5362 case UNSPEC_GOTPCREL:
5365 goto is_legitimate_pic;
5367 case UNSPEC_GOTTPOFF:
5368 case UNSPEC_GOTNTPOFF:
5369 case UNSPEC_INDNTPOFF:
5375 reason = "invalid address unspec";
5379 else if (flag_pic && (SYMBOLIC_CONST (disp)
5381 && !machopic_operand_p (disp)
5386 if (TARGET_64BIT && (index || base))
5388 /* foo@dtpoff(%rX) is ok. */
5389 if (GET_CODE (disp) != CONST
5390 || GET_CODE (XEXP (disp, 0)) != PLUS
5391 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5392 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5393 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5394 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5396 reason = "non-constant pic memory reference";
5400 else if (! legitimate_pic_address_disp_p (disp))
5402 reason = "displacement is an invalid pic construct";
5406 /* This code used to verify that a symbolic pic displacement
5407 includes the pic_offset_table_rtx register.
5409 While this is good idea, unfortunately these constructs may
5410 be created by "adds using lea" optimization for incorrect
5419 This code is nonsensical, but results in addressing
5420 GOT table with pic_offset_table_rtx base. We can't
5421 just refuse it easily, since it gets matched by
5422 "addsi3" pattern, that later gets split to lea in the
5423 case output register differs from input. While this
5424 can be handled by separate addsi pattern for this case
5425 that never results in lea, this seems to be easier and
5426 correct fix for crash to disable this test. */
5428 else if (!CONSTANT_ADDRESS_P (disp))
5430 reason = "displacement is not constant";
5435 /* Everything looks valid. */
5436 if (TARGET_DEBUG_ADDR)
5437 fprintf (stderr, "Success.\n");
5441 if (TARGET_DEBUG_ADDR)
5443 fprintf (stderr, "Error: %s\n", reason);
5444 debug_rtx (reason_rtx);
5449 /* Return an unique alias set for the GOT. */
5451 static HOST_WIDE_INT
5452 ix86_GOT_alias_set ()
5454 static HOST_WIDE_INT set = -1;
5456 set = new_alias_set ();
5460 /* Return a legitimate reference for ORIG (an address) using the
5461 register REG. If REG is 0, a new pseudo is generated.
5463 There are two types of references that must be handled:
5465 1. Global data references must load the address from the GOT, via
5466 the PIC reg. An insn is emitted to do this load, and the reg is
5469 2. Static data references, constant pool addresses, and code labels
5470 compute the address as an offset from the GOT, whose base is in
5471 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5472 differentiate them from global data objects. The returned
5473 address is the PIC reg + an unspec constant.
5475 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5476 reg also appears in the address. */
5479 legitimize_pic_address (orig, reg)
5489 reg = gen_reg_rtx (Pmode);
5490 /* Use the generic Mach-O PIC machinery. */
5491 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5494 if (local_symbolic_operand (addr, Pmode))
5496 /* In 64bit mode we can address such objects directly. */
5501 /* This symbol may be referenced via a displacement from the PIC
5502 base address (@GOTOFF). */
5504 if (reload_in_progress)
5505 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5506 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5507 new = gen_rtx_CONST (Pmode, new);
5508 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5512 emit_move_insn (reg, new);
5517 else if (GET_CODE (addr) == SYMBOL_REF)
5521 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5522 new = gen_rtx_CONST (Pmode, new);
5523 new = gen_rtx_MEM (Pmode, new);
5524 RTX_UNCHANGING_P (new) = 1;
5525 set_mem_alias_set (new, ix86_GOT_alias_set ());
5528 reg = gen_reg_rtx (Pmode);
5529 /* Use directly gen_movsi, otherwise the address is loaded
5530 into register for CSE. We don't want to CSE this addresses,
5531 instead we CSE addresses from the GOT table, so skip this. */
5532 emit_insn (gen_movsi (reg, new));
5537 /* This symbol must be referenced via a load from the
5538 Global Offset Table (@GOT). */
5540 if (reload_in_progress)
5541 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5542 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5543 new = gen_rtx_CONST (Pmode, new);
5544 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5545 new = gen_rtx_MEM (Pmode, new);
5546 RTX_UNCHANGING_P (new) = 1;
5547 set_mem_alias_set (new, ix86_GOT_alias_set ());
5550 reg = gen_reg_rtx (Pmode);
5551 emit_move_insn (reg, new);
5557 if (GET_CODE (addr) == CONST)
5559 addr = XEXP (addr, 0);
5561 /* We must match stuff we generate before. Assume the only
5562 unspecs that can get here are ours. Not that we could do
5563 anything with them anyway... */
5564 if (GET_CODE (addr) == UNSPEC
5565 || (GET_CODE (addr) == PLUS
5566 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5568 if (GET_CODE (addr) != PLUS)
5571 if (GET_CODE (addr) == PLUS)
5573 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5575 /* Check first to see if this is a constant offset from a @GOTOFF
5576 symbol reference. */
5577 if (local_symbolic_operand (op0, Pmode)
5578 && GET_CODE (op1) == CONST_INT)
5582 if (reload_in_progress)
5583 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5584 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5586 new = gen_rtx_PLUS (Pmode, new, op1);
5587 new = gen_rtx_CONST (Pmode, new);
5588 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5592 emit_move_insn (reg, new);
5598 if (INTVAL (op1) < -16*1024*1024
5599 || INTVAL (op1) >= 16*1024*1024)
5600 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5605 base = legitimize_pic_address (XEXP (addr, 0), reg);
5606 new = legitimize_pic_address (XEXP (addr, 1),
5607 base == reg ? NULL_RTX : reg);
5609 if (GET_CODE (new) == CONST_INT)
5610 new = plus_constant (base, INTVAL (new));
5613 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5615 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5616 new = XEXP (new, 1);
5618 new = gen_rtx_PLUS (Pmode, base, new);
5627 ix86_encode_section_info (decl, first)
5629 int first ATTRIBUTE_UNUSED;
5631 bool local_p = (*targetm.binds_local_p) (decl);
5634 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5635 if (GET_CODE (rtl) != MEM)
5637 symbol = XEXP (rtl, 0);
5638 if (GET_CODE (symbol) != SYMBOL_REF)
5641 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5642 symbol so that we may access it directly in the GOT. */
5645 SYMBOL_REF_FLAG (symbol) = local_p;
5647 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5648 "local dynamic", "initial exec" or "local exec" TLS models
5651 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5653 const char *symbol_str;
5656 enum tls_model kind = decl_tls_model (decl);
5658 if (TARGET_64BIT && ! flag_pic)
5660 /* x86-64 doesn't allow non-pic code for shared libraries,
5661 so don't generate GD/LD TLS models for non-pic code. */
5664 case TLS_MODEL_GLOBAL_DYNAMIC:
5665 kind = TLS_MODEL_INITIAL_EXEC; break;
5666 case TLS_MODEL_LOCAL_DYNAMIC:
5667 kind = TLS_MODEL_LOCAL_EXEC; break;
5673 symbol_str = XSTR (symbol, 0);
5675 if (symbol_str[0] == '%')
5677 if (symbol_str[1] == tls_model_chars[kind])
5681 len = strlen (symbol_str) + 1;
5682 newstr = alloca (len + 2);
5685 newstr[1] = tls_model_chars[kind];
5686 memcpy (newstr + 2, symbol_str, len);
5688 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5692 /* Undo the above when printing symbol names. */
5695 ix86_strip_name_encoding (str)
5705 /* Load the thread pointer into a register. */
5708 get_thread_pointer ()
5712 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5713 tp = gen_rtx_MEM (Pmode, tp);
5714 RTX_UNCHANGING_P (tp) = 1;
5715 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5716 tp = force_reg (Pmode, tp);
5721 /* Try machine-dependent ways of modifying an illegitimate address
5722 to be legitimate. If we find one, return the new, valid address.
5723 This macro is used in only one place: `memory_address' in explow.c.
5725 OLDX is the address as it was before break_out_memory_refs was called.
5726 In some cases it is useful to look at this to decide what needs to be done.
5728 MODE and WIN are passed so that this macro can use
5729 GO_IF_LEGITIMATE_ADDRESS.
5731 It is always safe for this macro to do nothing. It exists to recognize
5732 opportunities to optimize the output.
5734 For the 80386, we handle X+REG by loading X into a register R and
5735 using R+REG. R will go in a general reg and indexing will be used.
5736 However, if REG is a broken-out memory address or multiplication,
5737 nothing needs to be done because REG can certainly go in a general reg.
5739 When -fpic is used, special handling is needed for symbolic references.
5740 See comments by legitimize_pic_address in i386.c for details. */
5743 legitimize_address (x, oldx, mode)
5745 register rtx oldx ATTRIBUTE_UNUSED;
5746 enum machine_mode mode;
5751 if (TARGET_DEBUG_ADDR)
5753 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5754 GET_MODE_NAME (mode));
5758 log = tls_symbolic_operand (x, mode);
5761 rtx dest, base, off, pic;
5766 case TLS_MODEL_GLOBAL_DYNAMIC:
5767 dest = gen_reg_rtx (Pmode);
5770 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5773 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5774 insns = get_insns ();
5777 emit_libcall_block (insns, dest, rax, x);
5780 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5783 case TLS_MODEL_LOCAL_DYNAMIC:
5784 base = gen_reg_rtx (Pmode);
5787 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5790 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5791 insns = get_insns ();
5794 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5795 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5796 emit_libcall_block (insns, base, rax, note);
5799 emit_insn (gen_tls_local_dynamic_base_32 (base));
5801 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5802 off = gen_rtx_CONST (Pmode, off);
5804 return gen_rtx_PLUS (Pmode, base, off);
5806 case TLS_MODEL_INITIAL_EXEC:
5810 type = UNSPEC_GOTNTPOFF;
5814 if (reload_in_progress)
5815 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5816 pic = pic_offset_table_rtx;
5817 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5819 else if (!TARGET_GNU_TLS)
5821 pic = gen_reg_rtx (Pmode);
5822 emit_insn (gen_set_got (pic));
5823 type = UNSPEC_GOTTPOFF;
5828 type = UNSPEC_INDNTPOFF;
5831 base = get_thread_pointer ();
5833 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5834 off = gen_rtx_CONST (Pmode, off);
5836 off = gen_rtx_PLUS (Pmode, pic, off);
5837 off = gen_rtx_MEM (Pmode, off);
5838 RTX_UNCHANGING_P (off) = 1;
5839 set_mem_alias_set (off, ix86_GOT_alias_set ());
5840 dest = gen_reg_rtx (Pmode);
5842 if (TARGET_64BIT || TARGET_GNU_TLS)
5844 emit_move_insn (dest, off);
5845 return gen_rtx_PLUS (Pmode, base, dest);
5848 emit_insn (gen_subsi3 (dest, base, off));
5851 case TLS_MODEL_LOCAL_EXEC:
5852 base = get_thread_pointer ();
5854 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5855 (TARGET_64BIT || TARGET_GNU_TLS)
5856 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5857 off = gen_rtx_CONST (Pmode, off);
5859 if (TARGET_64BIT || TARGET_GNU_TLS)
5860 return gen_rtx_PLUS (Pmode, base, off);
5863 dest = gen_reg_rtx (Pmode);
5864 emit_insn (gen_subsi3 (dest, base, off));
5875 if (flag_pic && SYMBOLIC_CONST (x))
5876 return legitimize_pic_address (x, 0);
5878 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5879 if (GET_CODE (x) == ASHIFT
5880 && GET_CODE (XEXP (x, 1)) == CONST_INT
5881 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5884 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5885 GEN_INT (1 << log));
5888 if (GET_CODE (x) == PLUS)
5890 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5892 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5893 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5894 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5897 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5898 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5899 GEN_INT (1 << log));
5902 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5903 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5904 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5907 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5908 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5909 GEN_INT (1 << log));
5912 /* Put multiply first if it isn't already. */
5913 if (GET_CODE (XEXP (x, 1)) == MULT)
5915 rtx tmp = XEXP (x, 0);
5916 XEXP (x, 0) = XEXP (x, 1);
5921 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5922 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5923 created by virtual register instantiation, register elimination, and
5924 similar optimizations. */
5925 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5928 x = gen_rtx_PLUS (Pmode,
5929 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5930 XEXP (XEXP (x, 1), 0)),
5931 XEXP (XEXP (x, 1), 1));
5935 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5936 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5937 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5938 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5939 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5940 && CONSTANT_P (XEXP (x, 1)))
5943 rtx other = NULL_RTX;
5945 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5947 constant = XEXP (x, 1);
5948 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5950 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5952 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5953 other = XEXP (x, 1);
5961 x = gen_rtx_PLUS (Pmode,
5962 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5963 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5964 plus_constant (other, INTVAL (constant)));
5968 if (changed && legitimate_address_p (mode, x, FALSE))
5971 if (GET_CODE (XEXP (x, 0)) == MULT)
5974 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5977 if (GET_CODE (XEXP (x, 1)) == MULT)
5980 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5984 && GET_CODE (XEXP (x, 1)) == REG
5985 && GET_CODE (XEXP (x, 0)) == REG)
5988 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5991 x = legitimize_pic_address (x, 0);
5994 if (changed && legitimate_address_p (mode, x, FALSE))
5997 if (GET_CODE (XEXP (x, 0)) == REG)
5999 register rtx temp = gen_reg_rtx (Pmode);
6000 register rtx val = force_operand (XEXP (x, 1), temp);
6002 emit_move_insn (temp, val);
6008 else if (GET_CODE (XEXP (x, 1)) == REG)
6010 register rtx temp = gen_reg_rtx (Pmode);
6011 register rtx val = force_operand (XEXP (x, 0), temp);
6013 emit_move_insn (temp, val);
6023 /* Print an integer constant expression in assembler syntax. Addition
6024 and subtraction are the only arithmetic that may appear in these
6025 expressions. FILE is the stdio stream to write to, X is the rtx, and
6026 CODE is the operand print code from the output string. */
6029 output_pic_addr_const (file, x, code)
6036 switch (GET_CODE (x))
6046 assemble_name (file, XSTR (x, 0));
6047 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6048 fputs ("@PLT", file);
6055 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6056 assemble_name (asm_out_file, buf);
6060 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6064 /* This used to output parentheses around the expression,
6065 but that does not work on the 386 (either ATT or BSD assembler). */
6066 output_pic_addr_const (file, XEXP (x, 0), code);
6070 if (GET_MODE (x) == VOIDmode)
6072 /* We can use %d if the number is <32 bits and positive. */
6073 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6074 fprintf (file, "0x%lx%08lx",
6075 (unsigned long) CONST_DOUBLE_HIGH (x),
6076 (unsigned long) CONST_DOUBLE_LOW (x));
6078 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6081 /* We can't handle floating point constants;
6082 PRINT_OPERAND must handle them. */
6083 output_operand_lossage ("floating constant misused");
6087 /* Some assemblers need integer constants to appear first. */
6088 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6090 output_pic_addr_const (file, XEXP (x, 0), code);
6092 output_pic_addr_const (file, XEXP (x, 1), code);
6094 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6096 output_pic_addr_const (file, XEXP (x, 1), code);
6098 output_pic_addr_const (file, XEXP (x, 0), code);
6106 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6107 output_pic_addr_const (file, XEXP (x, 0), code);
6109 output_pic_addr_const (file, XEXP (x, 1), code);
6111 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6115 if (XVECLEN (x, 0) != 1)
6117 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6118 switch (XINT (x, 1))
6121 fputs ("@GOT", file);
6124 fputs ("@GOTOFF", file);
6126 case UNSPEC_GOTPCREL:
6127 fputs ("@GOTPCREL(%rip)", file);
6129 case UNSPEC_GOTTPOFF:
6130 /* FIXME: This might be @TPOFF in Sun ld too. */
6131 fputs ("@GOTTPOFF", file);
6134 fputs ("@TPOFF", file);
6138 fputs ("@TPOFF", file);
6140 fputs ("@NTPOFF", file);
6143 fputs ("@DTPOFF", file);
6145 case UNSPEC_GOTNTPOFF:
6147 fputs ("@GOTTPOFF(%rip)", file);
6149 fputs ("@GOTNTPOFF", file);
6151 case UNSPEC_INDNTPOFF:
6152 fputs ("@INDNTPOFF", file);
6155 output_operand_lossage ("invalid UNSPEC as operand");
6161 output_operand_lossage ("invalid expression as operand");
6165 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6166 We need to handle our special PIC relocations. */
6169 i386_dwarf_output_addr_const (file, x)
6174 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6178 fprintf (file, "%s", ASM_LONG);
6181 output_pic_addr_const (file, x, '\0');
6183 output_addr_const (file, x);
6187 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6188 We need to emit DTP-relative relocations. */
6191 i386_output_dwarf_dtprel (file, size, x)
6196 fputs (ASM_LONG, file);
6197 output_addr_const (file, x);
6198 fputs ("@DTPOFF", file);
6204 fputs (", 0", file);
6211 /* In the name of slightly smaller debug output, and to cater to
6212 general assembler losage, recognize PIC+GOTOFF and turn it back
6213 into a direct symbol reference. */
6216 i386_simplify_dwarf_addr (orig_x)
6221 if (GET_CODE (x) == MEM)
6226 if (GET_CODE (x) != CONST
6227 || GET_CODE (XEXP (x, 0)) != UNSPEC
6228 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6229 || GET_CODE (orig_x) != MEM)
6231 return XVECEXP (XEXP (x, 0), 0, 0);
6234 if (GET_CODE (x) != PLUS
6235 || GET_CODE (XEXP (x, 1)) != CONST)
6238 if (GET_CODE (XEXP (x, 0)) == REG
6239 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6240 /* %ebx + GOT/GOTOFF */
6242 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6244 /* %ebx + %reg * scale + GOT/GOTOFF */
6246 if (GET_CODE (XEXP (y, 0)) == REG
6247 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6249 else if (GET_CODE (XEXP (y, 1)) == REG
6250 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6254 if (GET_CODE (y) != REG
6255 && GET_CODE (y) != MULT
6256 && GET_CODE (y) != ASHIFT)
6262 x = XEXP (XEXP (x, 1), 0);
6263 if (GET_CODE (x) == UNSPEC
6264 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6265 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6268 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6269 return XVECEXP (x, 0, 0);
6272 if (GET_CODE (x) == PLUS
6273 && GET_CODE (XEXP (x, 0)) == UNSPEC
6274 && GET_CODE (XEXP (x, 1)) == CONST_INT
6275 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6276 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6277 && GET_CODE (orig_x) != MEM)))
6279 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6281 return gen_rtx_PLUS (Pmode, y, x);
6289 put_condition_code (code, mode, reverse, fp, file)
6291 enum machine_mode mode;
6297 if (mode == CCFPmode || mode == CCFPUmode)
6299 enum rtx_code second_code, bypass_code;
6300 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6301 if (bypass_code != NIL || second_code != NIL)
6303 code = ix86_fp_compare_code_to_integer (code);
6307 code = reverse_condition (code);
6318 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6323 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6324 Those same assemblers have the same but opposite losage on cmov. */
6327 suffix = fp ? "nbe" : "a";
6330 if (mode == CCNOmode || mode == CCGOCmode)
6332 else if (mode == CCmode || mode == CCGCmode)
6343 if (mode == CCNOmode || mode == CCGOCmode)
6345 else if (mode == CCmode || mode == CCGCmode)
6354 suffix = fp ? "nb" : "ae";
6357 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6367 suffix = fp ? "u" : "p";
6370 suffix = fp ? "nu" : "np";
6375 fputs (suffix, file);
6379 print_reg (x, code, file)
6384 if (REGNO (x) == ARG_POINTER_REGNUM
6385 || REGNO (x) == FRAME_POINTER_REGNUM
6386 || REGNO (x) == FLAGS_REG
6387 || REGNO (x) == FPSR_REG)
6390 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6393 if (code == 'w' || MMX_REG_P (x))
6395 else if (code == 'b')
6397 else if (code == 'k')
6399 else if (code == 'q')
6401 else if (code == 'y')
6403 else if (code == 'h')
6406 code = GET_MODE_SIZE (GET_MODE (x));
6408 /* Irritatingly, AMD extended registers use different naming convention
6409 from the normal registers. */
6410 if (REX_INT_REG_P (x))
6417 error ("extended registers have no high halves");
6420 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6423 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6426 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6429 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6432 error ("unsupported operand size for extended register");
6440 if (STACK_TOP_P (x))
6442 fputs ("st(0)", file);
6449 if (! ANY_FP_REG_P (x))
6450 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6454 fputs (hi_reg_name[REGNO (x)], file);
6457 fputs (qi_reg_name[REGNO (x)], file);
6460 fputs (qi_high_reg_name[REGNO (x)], file);
6467 /* Locate some local-dynamic symbol still in use by this function
6468 so that we can print its name in some tls_local_dynamic_base
6472 get_some_local_dynamic_name ()
6476 if (cfun->machine->some_ld_name)
6477 return cfun->machine->some_ld_name;
6479 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6481 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6482 return cfun->machine->some_ld_name;
6488 get_some_local_dynamic_name_1 (px, data)
6490 void *data ATTRIBUTE_UNUSED;
6494 if (GET_CODE (x) == SYMBOL_REF
6495 && local_dynamic_symbolic_operand (x, Pmode))
6497 cfun->machine->some_ld_name = XSTR (x, 0);
6505 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6506 C -- print opcode suffix for set/cmov insn.
6507 c -- like C, but print reversed condition
6508 F,f -- likewise, but for floating-point.
6509 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6511 R -- print the prefix for register names.
6512 z -- print the opcode suffix for the size of the current operand.
6513 * -- print a star (in certain assembler syntax)
6514 A -- print an absolute memory reference.
6515 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6516 s -- print a shift double count, followed by the assemblers argument
6518 b -- print the QImode name of the register for the indicated operand.
6519 %b0 would print %al if operands[0] is reg 0.
6520 w -- likewise, print the HImode name of the register.
6521 k -- likewise, print the SImode name of the register.
6522 q -- likewise, print the DImode name of the register.
6523 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6524 y -- print "st(0)" instead of "st" as a register.
6525 D -- print condition for SSE cmp instruction.
6526 P -- if PIC, print an @PLT suffix.
6527 X -- don't print any sort of PIC '@' suffix for a symbol.
6528 & -- print some in-use local-dynamic symbol name.
6532 print_operand (file, x, code)
6542 if (ASSEMBLER_DIALECT == ASM_ATT)
6547 assemble_name (file, get_some_local_dynamic_name ());
6551 if (ASSEMBLER_DIALECT == ASM_ATT)
6553 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6555 /* Intel syntax. For absolute addresses, registers should not
6556 be surrounded by braces. */
6557 if (GET_CODE (x) != REG)
6560 PRINT_OPERAND (file, x, 0);
6568 PRINT_OPERAND (file, x, 0);
6573 if (ASSEMBLER_DIALECT == ASM_ATT)
6578 if (ASSEMBLER_DIALECT == ASM_ATT)
6583 if (ASSEMBLER_DIALECT == ASM_ATT)
6588 if (ASSEMBLER_DIALECT == ASM_ATT)
6593 if (ASSEMBLER_DIALECT == ASM_ATT)
6598 if (ASSEMBLER_DIALECT == ASM_ATT)
6603 /* 387 opcodes don't get size suffixes if the operands are
6605 if (STACK_REG_P (x))
6608 /* Likewise if using Intel opcodes. */
6609 if (ASSEMBLER_DIALECT == ASM_INTEL)
6612 /* This is the size of op from size of operand. */
6613 switch (GET_MODE_SIZE (GET_MODE (x)))
6616 #ifdef HAVE_GAS_FILDS_FISTS
6622 if (GET_MODE (x) == SFmode)
6637 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6639 #ifdef GAS_MNEMONICS
6665 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6667 PRINT_OPERAND (file, x, 0);
6673 /* Little bit of braindamage here. The SSE compare instructions
6674 does use completely different names for the comparisons that the
6675 fp conditional moves. */
6676 switch (GET_CODE (x))
6691 fputs ("unord", file);
6695 fputs ("neq", file);
6699 fputs ("nlt", file);
6703 fputs ("nle", file);
6706 fputs ("ord", file);
6714 #ifdef CMOV_SUN_AS_SYNTAX
6715 if (ASSEMBLER_DIALECT == ASM_ATT)
6717 switch (GET_MODE (x))
6719 case HImode: putc ('w', file); break;
6721 case SFmode: putc ('l', file); break;
6723 case DFmode: putc ('q', file); break;
6731 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6734 #ifdef CMOV_SUN_AS_SYNTAX
6735 if (ASSEMBLER_DIALECT == ASM_ATT)
6738 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6741 /* Like above, but reverse condition */
6743 /* Check to see if argument to %c is really a constant
6744 and not a condition code which needs to be reversed. */
6745 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6747 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6750 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6753 #ifdef CMOV_SUN_AS_SYNTAX
6754 if (ASSEMBLER_DIALECT == ASM_ATT)
6757 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6763 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6766 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6769 int pred_val = INTVAL (XEXP (x, 0));
6771 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6772 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6774 int taken = pred_val > REG_BR_PROB_BASE / 2;
6775 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6777 /* Emit hints only in the case default branch prediction
6778 heruistics would fail. */
6779 if (taken != cputaken)
6781 /* We use 3e (DS) prefix for taken branches and
6782 2e (CS) prefix for not taken branches. */
6784 fputs ("ds ; ", file);
6786 fputs ("cs ; ", file);
6793 output_operand_lossage ("invalid operand code `%c'", code);
6797 if (GET_CODE (x) == REG)
6799 PRINT_REG (x, code, file);
6802 else if (GET_CODE (x) == MEM)
6804 /* No `byte ptr' prefix for call instructions. */
6805 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6808 switch (GET_MODE_SIZE (GET_MODE (x)))
6810 case 1: size = "BYTE"; break;
6811 case 2: size = "WORD"; break;
6812 case 4: size = "DWORD"; break;
6813 case 8: size = "QWORD"; break;
6814 case 12: size = "XWORD"; break;
6815 case 16: size = "XMMWORD"; break;
6820 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6823 else if (code == 'w')
6825 else if (code == 'k')
6829 fputs (" PTR ", file);
6833 if (flag_pic && CONSTANT_ADDRESS_P (x))
6834 output_pic_addr_const (file, x, code);
6835 /* Avoid (%rip) for call operands. */
6836 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6837 && GET_CODE (x) != CONST_INT)
6838 output_addr_const (file, x);
6839 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6840 output_operand_lossage ("invalid constraints for operand");
6845 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6850 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6851 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6853 if (ASSEMBLER_DIALECT == ASM_ATT)
6855 fprintf (file, "0x%lx", l);
6858 /* These float cases don't actually occur as immediate operands. */
6859 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6863 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6864 fprintf (file, "%s", dstr);
6867 else if (GET_CODE (x) == CONST_DOUBLE
6868 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6872 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6873 fprintf (file, "%s", dstr);
6880 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6882 if (ASSEMBLER_DIALECT == ASM_ATT)
6885 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6886 || GET_CODE (x) == LABEL_REF)
6888 if (ASSEMBLER_DIALECT == ASM_ATT)
6891 fputs ("OFFSET FLAT:", file);
6894 if (GET_CODE (x) == CONST_INT)
6895 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6897 output_pic_addr_const (file, x, code);
6899 output_addr_const (file, x);
6903 /* Print a memory operand whose address is ADDR. */
6906 print_operand_address (file, addr)
6910 struct ix86_address parts;
6911 rtx base, index, disp;
6914 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6916 if (ASSEMBLER_DIALECT == ASM_INTEL)
6917 fputs ("DWORD PTR ", file);
6918 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6921 fputs ("fs:0", file);
6923 fputs ("gs:0", file);
6927 if (! ix86_decompose_address (addr, &parts))
6931 index = parts.index;
6933 scale = parts.scale;
6935 if (!base && !index)
6937 /* Displacement only requires special attention. */
6939 if (GET_CODE (disp) == CONST_INT)
6941 if (ASSEMBLER_DIALECT == ASM_INTEL)
6943 if (USER_LABEL_PREFIX[0] == 0)
6945 fputs ("ds:", file);
6947 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6950 output_pic_addr_const (file, addr, 0);
6952 output_addr_const (file, addr);
6954 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6956 && ((GET_CODE (addr) == SYMBOL_REF
6957 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
6958 || GET_CODE (addr) == LABEL_REF
6959 || (GET_CODE (addr) == CONST
6960 && GET_CODE (XEXP (addr, 0)) == PLUS
6961 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6962 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
6963 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6964 fputs ("(%rip)", file);
6968 if (ASSEMBLER_DIALECT == ASM_ATT)
6973 output_pic_addr_const (file, disp, 0);
6974 else if (GET_CODE (disp) == LABEL_REF)
6975 output_asm_label (disp);
6977 output_addr_const (file, disp);
6982 PRINT_REG (base, 0, file);
6986 PRINT_REG (index, 0, file);
6988 fprintf (file, ",%d", scale);
6994 rtx offset = NULL_RTX;
6998 /* Pull out the offset of a symbol; print any symbol itself. */
6999 if (GET_CODE (disp) == CONST
7000 && GET_CODE (XEXP (disp, 0)) == PLUS
7001 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7003 offset = XEXP (XEXP (disp, 0), 1);
7004 disp = gen_rtx_CONST (VOIDmode,
7005 XEXP (XEXP (disp, 0), 0));
7009 output_pic_addr_const (file, disp, 0);
7010 else if (GET_CODE (disp) == LABEL_REF)
7011 output_asm_label (disp);
7012 else if (GET_CODE (disp) == CONST_INT)
7015 output_addr_const (file, disp);
7021 PRINT_REG (base, 0, file);
7024 if (INTVAL (offset) >= 0)
7026 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7030 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7037 PRINT_REG (index, 0, file);
7039 fprintf (file, "*%d", scale);
7047 output_addr_const_extra (file, x)
7053 if (GET_CODE (x) != UNSPEC)
7056 op = XVECEXP (x, 0, 0);
7057 switch (XINT (x, 1))
7059 case UNSPEC_GOTTPOFF:
7060 output_addr_const (file, op);
7061 /* FIXME: This might be @TPOFF in Sun ld. */
7062 fputs ("@GOTTPOFF", file);
7065 output_addr_const (file, op);
7066 fputs ("@TPOFF", file);
7069 output_addr_const (file, op);
7071 fputs ("@TPOFF", file);
7073 fputs ("@NTPOFF", file);
7076 output_addr_const (file, op);
7077 fputs ("@DTPOFF", file);
7079 case UNSPEC_GOTNTPOFF:
7080 output_addr_const (file, op);
7082 fputs ("@GOTTPOFF(%rip)", file);
7084 fputs ("@GOTNTPOFF", file);
7086 case UNSPEC_INDNTPOFF:
7087 output_addr_const (file, op);
7088 fputs ("@INDNTPOFF", file);
7098 /* Split one or more DImode RTL references into pairs of SImode
7099 references. The RTL can be REG, offsettable MEM, integer constant, or
7100 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7101 split and "num" is its length. lo_half and hi_half are output arrays
7102 that parallel "operands". */
7105 split_di (operands, num, lo_half, hi_half)
7108 rtx lo_half[], hi_half[];
7112 rtx op = operands[num];
7114 /* simplify_subreg refuse to split volatile memory addresses,
7115 but we still have to handle it. */
7116 if (GET_CODE (op) == MEM)
7118 lo_half[num] = adjust_address (op, SImode, 0);
7119 hi_half[num] = adjust_address (op, SImode, 4);
7123 lo_half[num] = simplify_gen_subreg (SImode, op,
7124 GET_MODE (op) == VOIDmode
7125 ? DImode : GET_MODE (op), 0);
7126 hi_half[num] = simplify_gen_subreg (SImode, op,
7127 GET_MODE (op) == VOIDmode
7128 ? DImode : GET_MODE (op), 4);
7132 /* Split one or more TImode RTL references into pairs of SImode
7133 references. The RTL can be REG, offsettable MEM, integer constant, or
7134 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7135 split and "num" is its length. lo_half and hi_half are output arrays
7136 that parallel "operands". */
7139 split_ti (operands, num, lo_half, hi_half)
7142 rtx lo_half[], hi_half[];
7146 rtx op = operands[num];
7148 /* simplify_subreg refuse to split volatile memory addresses, but we
7149 still have to handle it. */
7150 if (GET_CODE (op) == MEM)
7152 lo_half[num] = adjust_address (op, DImode, 0);
7153 hi_half[num] = adjust_address (op, DImode, 8);
7157 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7158 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7163 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7164 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7165 is the expression of the binary operation. The output may either be
7166 emitted here, or returned to the caller, like all output_* functions.
7168 There is no guarantee that the operands are the same mode, as they
7169 might be within FLOAT or FLOAT_EXTEND expressions. */
7171 #ifndef SYSV386_COMPAT
7172 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7173 wants to fix the assemblers because that causes incompatibility
7174 with gcc. No-one wants to fix gcc because that causes
7175 incompatibility with assemblers... You can use the option of
7176 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7177 #define SYSV386_COMPAT 1
7181 output_387_binary_op (insn, operands)
7185 static char buf[30];
7188 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7190 #ifdef ENABLE_CHECKING
7191 /* Even if we do not want to check the inputs, this documents input
7192 constraints. Which helps in understanding the following code. */
7193 if (STACK_REG_P (operands[0])
7194 && ((REG_P (operands[1])
7195 && REGNO (operands[0]) == REGNO (operands[1])
7196 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7197 || (REG_P (operands[2])
7198 && REGNO (operands[0]) == REGNO (operands[2])
7199 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7200 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7206 switch (GET_CODE (operands[3]))
7209 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7210 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7218 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7219 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7227 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7228 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7236 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7237 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7251 if (GET_MODE (operands[0]) == SFmode)
7252 strcat (buf, "ss\t{%2, %0|%0, %2}");
7254 strcat (buf, "sd\t{%2, %0|%0, %2}");
7259 switch (GET_CODE (operands[3]))
7263 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7265 rtx temp = operands[2];
7266 operands[2] = operands[1];
7270 /* know operands[0] == operands[1]. */
7272 if (GET_CODE (operands[2]) == MEM)
7278 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7280 if (STACK_TOP_P (operands[0]))
7281 /* How is it that we are storing to a dead operand[2]?
7282 Well, presumably operands[1] is dead too. We can't
7283 store the result to st(0) as st(0) gets popped on this
7284 instruction. Instead store to operands[2] (which I
7285 think has to be st(1)). st(1) will be popped later.
7286 gcc <= 2.8.1 didn't have this check and generated
7287 assembly code that the Unixware assembler rejected. */
7288 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7290 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7294 if (STACK_TOP_P (operands[0]))
7295 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7297 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7302 if (GET_CODE (operands[1]) == MEM)
7308 if (GET_CODE (operands[2]) == MEM)
7314 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7317 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7318 derived assemblers, confusingly reverse the direction of
7319 the operation for fsub{r} and fdiv{r} when the
7320 destination register is not st(0). The Intel assembler
7321 doesn't have this brain damage. Read !SYSV386_COMPAT to
7322 figure out what the hardware really does. */
7323 if (STACK_TOP_P (operands[0]))
7324 p = "{p\t%0, %2|rp\t%2, %0}";
7326 p = "{rp\t%2, %0|p\t%0, %2}";
7328 if (STACK_TOP_P (operands[0]))
7329 /* As above for fmul/fadd, we can't store to st(0). */
7330 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7332 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7337 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7340 if (STACK_TOP_P (operands[0]))
7341 p = "{rp\t%0, %1|p\t%1, %0}";
7343 p = "{p\t%1, %0|rp\t%0, %1}";
7345 if (STACK_TOP_P (operands[0]))
7346 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7348 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7353 if (STACK_TOP_P (operands[0]))
7355 if (STACK_TOP_P (operands[1]))
7356 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7358 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7361 else if (STACK_TOP_P (operands[1]))
7364 p = "{\t%1, %0|r\t%0, %1}";
7366 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7372 p = "{r\t%2, %0|\t%0, %2}";
7374 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7387 /* Output code to initialize control word copies used by
7388 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7389 is set to control word rounding downwards. */
7391 emit_i387_cw_initialization (normal, round_down)
7392 rtx normal, round_down;
7394 rtx reg = gen_reg_rtx (HImode);
7396 emit_insn (gen_x86_fnstcw_1 (normal));
7397 emit_move_insn (reg, normal);
7398 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7400 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7402 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7403 emit_move_insn (round_down, reg);
7406 /* Output code for INSN to convert a float to a signed int. OPERANDS
7407 are the insn operands. The output may be [HSD]Imode and the input
7408 operand may be [SDX]Fmode. */
7411 output_fix_trunc (insn, operands)
7415 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7416 int dimode_p = GET_MODE (operands[0]) == DImode;
7418 /* Jump through a hoop or two for DImode, since the hardware has no
7419 non-popping instruction. We used to do this a different way, but
7420 that was somewhat fragile and broke with post-reload splitters. */
7421 if (dimode_p && !stack_top_dies)
7422 output_asm_insn ("fld\t%y1", operands);
7424 if (!STACK_TOP_P (operands[1]))
7427 if (GET_CODE (operands[0]) != MEM)
7430 output_asm_insn ("fldcw\t%3", operands);
7431 if (stack_top_dies || dimode_p)
7432 output_asm_insn ("fistp%z0\t%0", operands);
7434 output_asm_insn ("fist%z0\t%0", operands);
7435 output_asm_insn ("fldcw\t%2", operands);
7440 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7441 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7442 when fucom should be used. */
7445 output_fp_compare (insn, operands, eflags_p, unordered_p)
7448 int eflags_p, unordered_p;
7451 rtx cmp_op0 = operands[0];
7452 rtx cmp_op1 = operands[1];
7453 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7458 cmp_op1 = operands[2];
7462 if (GET_MODE (operands[0]) == SFmode)
7464 return "ucomiss\t{%1, %0|%0, %1}";
7466 return "comiss\t{%1, %0|%0, %y}";
7469 return "ucomisd\t{%1, %0|%0, %1}";
7471 return "comisd\t{%1, %0|%0, %y}";
7474 if (! STACK_TOP_P (cmp_op0))
7477 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7479 if (STACK_REG_P (cmp_op1)
7481 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7482 && REGNO (cmp_op1) != FIRST_STACK_REG)
7484 /* If both the top of the 387 stack dies, and the other operand
7485 is also a stack register that dies, then this must be a
7486 `fcompp' float compare */
7490 /* There is no double popping fcomi variant. Fortunately,
7491 eflags is immune from the fstp's cc clobbering. */
7493 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7495 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7503 return "fucompp\n\tfnstsw\t%0";
7505 return "fcompp\n\tfnstsw\t%0";
7518 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7520 static const char * const alt[24] =
7532 "fcomi\t{%y1, %0|%0, %y1}",
7533 "fcomip\t{%y1, %0|%0, %y1}",
7534 "fucomi\t{%y1, %0|%0, %y1}",
7535 "fucomip\t{%y1, %0|%0, %y1}",
7542 "fcom%z2\t%y2\n\tfnstsw\t%0",
7543 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7544 "fucom%z2\t%y2\n\tfnstsw\t%0",
7545 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7547 "ficom%z2\t%y2\n\tfnstsw\t%0",
7548 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7556 mask = eflags_p << 3;
7557 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7558 mask |= unordered_p << 1;
7559 mask |= stack_top_dies;
7572 ix86_output_addr_vec_elt (file, value)
7576 const char *directive = ASM_LONG;
7581 directive = ASM_QUAD;
7587 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7591 ix86_output_addr_diff_elt (file, value, rel)
7596 fprintf (file, "%s%s%d-%s%d\n",
7597 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7598 else if (HAVE_AS_GOTOFF_IN_DATA)
7599 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7601 else if (TARGET_MACHO)
7602 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7603 machopic_function_base_name () + 1);
7606 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7607 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7610 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7614 ix86_expand_clear (dest)
7619 /* We play register width games, which are only valid after reload. */
7620 if (!reload_completed)
7623 /* Avoid HImode and its attendant prefix byte. */
7624 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7625 dest = gen_rtx_REG (SImode, REGNO (dest));
7627 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7629 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7630 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7632 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7633 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7639 /* X is an unchanging MEM. If it is a constant pool reference, return
7640 the constant pool rtx, else NULL. */
7643 maybe_get_pool_constant (x)
7648 if (flag_pic && ! TARGET_64BIT)
7650 if (GET_CODE (x) != PLUS)
7652 if (XEXP (x, 0) != pic_offset_table_rtx)
7655 if (GET_CODE (x) != CONST)
7658 if (GET_CODE (x) != UNSPEC)
7660 if (XINT (x, 1) != UNSPEC_GOTOFF)
7662 x = XVECEXP (x, 0, 0);
7665 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7666 return get_pool_constant (x);
7672 ix86_expand_move (mode, operands)
7673 enum machine_mode mode;
7676 int strict = (reload_in_progress || reload_completed);
7677 rtx insn, op0, op1, tmp;
7682 /* ??? We have a slight problem. We need to say that tls symbols are
7683 not legitimate constants so that reload does not helpfully reload
7684 these constants from a REG_EQUIV, which we cannot handle. (Recall
7685 that general- and local-dynamic address resolution requires a
7688 However, if we say that tls symbols are not legitimate constants,
7689 then emit_move_insn helpfully drop them into the constant pool.
7691 It is far easier to work around emit_move_insn than reload. Recognize
7692 the MEM that we would have created and extract the symbol_ref. */
7695 && GET_CODE (op1) == MEM
7696 && RTX_UNCHANGING_P (op1))
7698 tmp = maybe_get_pool_constant (op1);
7699 /* Note that we only care about symbolic constants here, which
7700 unlike CONST_INT will always have a proper mode. */
7701 if (tmp && GET_MODE (tmp) == Pmode)
7705 if (tls_symbolic_operand (op1, Pmode))
7707 op1 = legitimize_address (op1, op1, VOIDmode);
7708 if (GET_CODE (op0) == MEM)
7710 tmp = gen_reg_rtx (mode);
7711 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7715 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7720 rtx temp = ((reload_in_progress
7721 || ((op0 && GET_CODE (op0) == REG)
7723 ? op0 : gen_reg_rtx (Pmode));
7724 op1 = machopic_indirect_data_reference (op1, temp);
7725 op1 = machopic_legitimize_pic_address (op1, mode,
7726 temp == op1 ? 0 : temp);
7730 if (MACHOPIC_INDIRECT)
7731 op1 = machopic_indirect_data_reference (op1, 0);
7735 insn = gen_rtx_SET (VOIDmode, op0, op1);
7739 #endif /* TARGET_MACHO */
7740 if (GET_CODE (op0) == MEM)
7741 op1 = force_reg (Pmode, op1);
7745 if (GET_CODE (temp) != REG)
7746 temp = gen_reg_rtx (Pmode);
7747 temp = legitimize_pic_address (op1, temp);
7755 if (GET_CODE (op0) == MEM
7756 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7757 || !push_operand (op0, mode))
7758 && GET_CODE (op1) == MEM)
7759 op1 = force_reg (mode, op1);
7761 if (push_operand (op0, mode)
7762 && ! general_no_elim_operand (op1, mode))
7763 op1 = copy_to_mode_reg (mode, op1);
7765 /* Force large constants in 64bit compilation into register
7766 to get them CSEed. */
7767 if (TARGET_64BIT && mode == DImode
7768 && immediate_operand (op1, mode)
7769 && !x86_64_zero_extended_value (op1)
7770 && !register_operand (op0, mode)
7771 && optimize && !reload_completed && !reload_in_progress)
7772 op1 = copy_to_mode_reg (mode, op1);
7774 if (FLOAT_MODE_P (mode))
7776 /* If we are loading a floating point constant to a register,
7777 force the value to memory now, since we'll get better code
7778 out the back end. */
7782 else if (GET_CODE (op1) == CONST_DOUBLE
7783 && register_operand (op0, mode))
7784 op1 = validize_mem (force_const_mem (mode, op1));
7788 insn = gen_rtx_SET (VOIDmode, op0, op1);
7794 ix86_expand_vector_move (mode, operands)
7795 enum machine_mode mode;
7798 /* Force constants other than zero into memory. We do not know how
7799 the instructions used to build constants modify the upper 64 bits
7800 of the register, once we have that information we may be able
7801 to handle some of them more efficiently. */
7802 if ((reload_in_progress | reload_completed) == 0
7803 && register_operand (operands[0], mode)
7804 && CONSTANT_P (operands[1]))
7806 rtx addr = gen_reg_rtx (Pmode);
7807 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7808 operands[1] = gen_rtx_MEM (mode, addr);
7811 /* Make operand1 a register if it isn't already. */
7812 if ((reload_in_progress | reload_completed) == 0
7813 && !register_operand (operands[0], mode)
7814 && !register_operand (operands[1], mode))
7816 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7817 emit_move_insn (operands[0], temp);
7821 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7824 /* Attempt to expand a binary operator. Make the expansion closer to the
7825 actual machine, then just general_operand, which will allow 3 separate
7826 memory references (one output, two input) in a single insn. */
7829 ix86_expand_binary_operator (code, mode, operands)
7831 enum machine_mode mode;
7834 int matching_memory;
7835 rtx src1, src2, dst, op, clob;
7841 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7842 if (GET_RTX_CLASS (code) == 'c'
7843 && (rtx_equal_p (dst, src2)
7844 || immediate_operand (src1, mode)))
7851 /* If the destination is memory, and we do not have matching source
7852 operands, do things in registers. */
7853 matching_memory = 0;
7854 if (GET_CODE (dst) == MEM)
7856 if (rtx_equal_p (dst, src1))
7857 matching_memory = 1;
7858 else if (GET_RTX_CLASS (code) == 'c'
7859 && rtx_equal_p (dst, src2))
7860 matching_memory = 2;
7862 dst = gen_reg_rtx (mode);
7865 /* Both source operands cannot be in memory. */
7866 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7868 if (matching_memory != 2)
7869 src2 = force_reg (mode, src2);
7871 src1 = force_reg (mode, src1);
7874 /* If the operation is not commutable, source 1 cannot be a constant
7875 or non-matching memory. */
7876 if ((CONSTANT_P (src1)
7877 || (!matching_memory && GET_CODE (src1) == MEM))
7878 && GET_RTX_CLASS (code) != 'c')
7879 src1 = force_reg (mode, src1);
7881 /* If optimizing, copy to regs to improve CSE */
7882 if (optimize && ! no_new_pseudos)
7884 if (GET_CODE (dst) == MEM)
7885 dst = gen_reg_rtx (mode);
7886 if (GET_CODE (src1) == MEM)
7887 src1 = force_reg (mode, src1);
7888 if (GET_CODE (src2) == MEM)
7889 src2 = force_reg (mode, src2);
7892 /* Emit the instruction. */
7894 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7895 if (reload_in_progress)
7897 /* Reload doesn't know about the flags register, and doesn't know that
7898 it doesn't want to clobber it. We can only do this with PLUS. */
7905 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7906 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7909 /* Fix up the destination if needed. */
7910 if (dst != operands[0])
7911 emit_move_insn (operands[0], dst);
7914 /* Return TRUE or FALSE depending on whether the binary operator meets the
7915 appropriate constraints. */
7918 ix86_binary_operator_ok (code, mode, operands)
7920 enum machine_mode mode ATTRIBUTE_UNUSED;
7923 /* Both source operands cannot be in memory. */
7924 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7926 /* If the operation is not commutable, source 1 cannot be a constant. */
7927 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7929 /* If the destination is memory, we must have a matching source operand. */
7930 if (GET_CODE (operands[0]) == MEM
7931 && ! (rtx_equal_p (operands[0], operands[1])
7932 || (GET_RTX_CLASS (code) == 'c'
7933 && rtx_equal_p (operands[0], operands[2]))))
7935 /* If the operation is not commutable and the source 1 is memory, we must
7936 have a matching destination. */
7937 if (GET_CODE (operands[1]) == MEM
7938 && GET_RTX_CLASS (code) != 'c'
7939 && ! rtx_equal_p (operands[0], operands[1]))
7944 /* Attempt to expand a unary operator. Make the expansion closer to the
7945 actual machine, then just general_operand, which will allow 2 separate
7946 memory references (one output, one input) in a single insn. */
7949 ix86_expand_unary_operator (code, mode, operands)
7951 enum machine_mode mode;
7954 int matching_memory;
7955 rtx src, dst, op, clob;
7960 /* If the destination is memory, and we do not have matching source
7961 operands, do things in registers. */
7962 matching_memory = 0;
7963 if (GET_CODE (dst) == MEM)
7965 if (rtx_equal_p (dst, src))
7966 matching_memory = 1;
7968 dst = gen_reg_rtx (mode);
7971 /* When source operand is memory, destination must match. */
7972 if (!matching_memory && GET_CODE (src) == MEM)
7973 src = force_reg (mode, src);
7975 /* If optimizing, copy to regs to improve CSE */
7976 if (optimize && ! no_new_pseudos)
7978 if (GET_CODE (dst) == MEM)
7979 dst = gen_reg_rtx (mode);
7980 if (GET_CODE (src) == MEM)
7981 src = force_reg (mode, src);
7984 /* Emit the instruction. */
7986 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7987 if (reload_in_progress || code == NOT)
7989 /* Reload doesn't know about the flags register, and doesn't know that
7990 it doesn't want to clobber it. */
7997 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7998 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8001 /* Fix up the destination if needed. */
8002 if (dst != operands[0])
8003 emit_move_insn (operands[0], dst);
8006 /* Return TRUE or FALSE depending on whether the unary operator meets the
8007 appropriate constraints. */
8010 ix86_unary_operator_ok (code, mode, operands)
8011 enum rtx_code code ATTRIBUTE_UNUSED;
8012 enum machine_mode mode ATTRIBUTE_UNUSED;
8013 rtx operands[2] ATTRIBUTE_UNUSED;
8015 /* If one of operands is memory, source and destination must match. */
8016 if ((GET_CODE (operands[0]) == MEM
8017 || GET_CODE (operands[1]) == MEM)
8018 && ! rtx_equal_p (operands[0], operands[1]))
8023 /* Return TRUE or FALSE depending on whether the first SET in INSN
8024 has source and destination with matching CC modes, and that the
8025 CC mode is at least as constrained as REQ_MODE. */
8028 ix86_match_ccmode (insn, req_mode)
8030 enum machine_mode req_mode;
8033 enum machine_mode set_mode;
8035 set = PATTERN (insn);
8036 if (GET_CODE (set) == PARALLEL)
8037 set = XVECEXP (set, 0, 0);
8038 if (GET_CODE (set) != SET)
8040 if (GET_CODE (SET_SRC (set)) != COMPARE)
8043 set_mode = GET_MODE (SET_DEST (set));
8047 if (req_mode != CCNOmode
8048 && (req_mode != CCmode
8049 || XEXP (SET_SRC (set), 1) != const0_rtx))
8053 if (req_mode == CCGCmode)
8057 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8061 if (req_mode == CCZmode)
8071 return (GET_MODE (SET_SRC (set)) == set_mode);
8074 /* Generate insn patterns to do an integer compare of OPERANDS. */
8077 ix86_expand_int_compare (code, op0, op1)
8081 enum machine_mode cmpmode;
8084 cmpmode = SELECT_CC_MODE (code, op0, op1);
8085 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8087 /* This is very simple, but making the interface the same as in the
8088 FP case makes the rest of the code easier. */
8089 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8090 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8092 /* Return the test that should be put into the flags user, i.e.
8093 the bcc, scc, or cmov instruction. */
8094 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8097 /* Figure out whether to use ordered or unordered fp comparisons.
8098 Return the appropriate mode to use. */
8101 ix86_fp_compare_mode (code)
8102 enum rtx_code code ATTRIBUTE_UNUSED;
8104 /* ??? In order to make all comparisons reversible, we do all comparisons
8105 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8106 all forms trapping and nontrapping comparisons, we can make inequality
8107 comparisons trapping again, since it results in better code when using
8108 FCOM based compares. */
8109 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8113 ix86_cc_mode (code, op0, op1)
8117 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8118 return ix86_fp_compare_mode (code);
8121 /* Only zero flag is needed. */
8123 case NE: /* ZF!=0 */
8125 /* Codes needing carry flag. */
8126 case GEU: /* CF=0 */
8127 case GTU: /* CF=0 & ZF=0 */
8128 case LTU: /* CF=1 */
8129 case LEU: /* CF=1 | ZF=1 */
8131 /* Codes possibly doable only with sign flag when
8132 comparing against zero. */
8133 case GE: /* SF=OF or SF=0 */
8134 case LT: /* SF<>OF or SF=1 */
8135 if (op1 == const0_rtx)
8138 /* For other cases Carry flag is not required. */
8140 /* Codes doable only with sign flag when comparing
8141 against zero, but we miss jump instruction for it
8142 so we need to use relational tests agains overflow
8143 that thus needs to be zero. */
8144 case GT: /* ZF=0 & SF=OF */
8145 case LE: /* ZF=1 | SF<>OF */
8146 if (op1 == const0_rtx)
8150 /* strcmp pattern do (use flags) and combine may ask us for proper
8159 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8162 ix86_use_fcomi_compare (code)
8163 enum rtx_code code ATTRIBUTE_UNUSED;
8165 enum rtx_code swapped_code = swap_condition (code);
8166 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8167 || (ix86_fp_comparison_cost (swapped_code)
8168 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8171 /* Swap, force into registers, or otherwise massage the two operands
8172 to a fp comparison. The operands are updated in place; the new
8173 comparsion code is returned. */
8175 static enum rtx_code
8176 ix86_prepare_fp_compare_args (code, pop0, pop1)
8180 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8181 rtx op0 = *pop0, op1 = *pop1;
8182 enum machine_mode op_mode = GET_MODE (op0);
8183 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8185 /* All of the unordered compare instructions only work on registers.
8186 The same is true of the XFmode compare instructions. The same is
8187 true of the fcomi compare instructions. */
8190 && (fpcmp_mode == CCFPUmode
8191 || op_mode == XFmode
8192 || op_mode == TFmode
8193 || ix86_use_fcomi_compare (code)))
8195 op0 = force_reg (op_mode, op0);
8196 op1 = force_reg (op_mode, op1);
8200 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8201 things around if they appear profitable, otherwise force op0
8204 if (standard_80387_constant_p (op0) == 0
8205 || (GET_CODE (op0) == MEM
8206 && ! (standard_80387_constant_p (op1) == 0
8207 || GET_CODE (op1) == MEM)))
8210 tmp = op0, op0 = op1, op1 = tmp;
8211 code = swap_condition (code);
8214 if (GET_CODE (op0) != REG)
8215 op0 = force_reg (op_mode, op0);
8217 if (CONSTANT_P (op1))
8219 if (standard_80387_constant_p (op1))
8220 op1 = force_reg (op_mode, op1);
8222 op1 = validize_mem (force_const_mem (op_mode, op1));
8226 /* Try to rearrange the comparison to make it cheaper. */
8227 if (ix86_fp_comparison_cost (code)
8228 > ix86_fp_comparison_cost (swap_condition (code))
8229 && (GET_CODE (op1) == REG || !no_new_pseudos))
8232 tmp = op0, op0 = op1, op1 = tmp;
8233 code = swap_condition (code);
8234 if (GET_CODE (op0) != REG)
8235 op0 = force_reg (op_mode, op0);
8243 /* Convert comparison codes we use to represent FP comparison to integer
8244 code that will result in proper branch. Return UNKNOWN if no such code
8246 static enum rtx_code
8247 ix86_fp_compare_code_to_integer (code)
8277 /* Split comparison code CODE into comparisons we can do using branch
8278 instructions. BYPASS_CODE is comparison code for branch that will
8279 branch around FIRST_CODE and SECOND_CODE. If some of branches
8280 is not required, set value to NIL.
8281 We never require more than two branches. */
8283 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8284 enum rtx_code code, *bypass_code, *first_code, *second_code;
8290 /* The fcomi comparison sets flags as follows:
8300 case GT: /* GTU - CF=0 & ZF=0 */
8301 case GE: /* GEU - CF=0 */
8302 case ORDERED: /* PF=0 */
8303 case UNORDERED: /* PF=1 */
8304 case UNEQ: /* EQ - ZF=1 */
8305 case UNLT: /* LTU - CF=1 */
8306 case UNLE: /* LEU - CF=1 | ZF=1 */
8307 case LTGT: /* EQ - ZF=0 */
8309 case LT: /* LTU - CF=1 - fails on unordered */
8311 *bypass_code = UNORDERED;
8313 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8315 *bypass_code = UNORDERED;
8317 case EQ: /* EQ - ZF=1 - fails on unordered */
8319 *bypass_code = UNORDERED;
8321 case NE: /* NE - ZF=0 - fails on unordered */
8323 *second_code = UNORDERED;
8325 case UNGE: /* GEU - CF=0 - fails on unordered */
8327 *second_code = UNORDERED;
8329 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8331 *second_code = UNORDERED;
8336 if (!TARGET_IEEE_FP)
8343 /* Return cost of comparison done fcom + arithmetics operations on AX.
8344 All following functions do use number of instructions as an cost metrics.
8345 In future this should be tweaked to compute bytes for optimize_size and
8346 take into account performance of various instructions on various CPUs. */
8348 ix86_fp_comparison_arithmetics_cost (code)
8351 if (!TARGET_IEEE_FP)
8353 /* The cost of code output by ix86_expand_fp_compare. */
8381 /* Return cost of comparison done using fcomi operation.
8382 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8384 ix86_fp_comparison_fcomi_cost (code)
8387 enum rtx_code bypass_code, first_code, second_code;
8388 /* Return arbitarily high cost when instruction is not supported - this
8389 prevents gcc from using it. */
8392 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8393 return (bypass_code != NIL || second_code != NIL) + 2;
8396 /* Return cost of comparison done using sahf operation.
8397 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8399 ix86_fp_comparison_sahf_cost (code)
8402 enum rtx_code bypass_code, first_code, second_code;
8403 /* Return arbitarily high cost when instruction is not preferred - this
8404 avoids gcc from using it. */
8405 if (!TARGET_USE_SAHF && !optimize_size)
8407 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8408 return (bypass_code != NIL || second_code != NIL) + 3;
8411 /* Compute cost of the comparison done using any method.
8412 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8414 ix86_fp_comparison_cost (code)
8417 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8420 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8421 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8423 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8424 if (min > sahf_cost)
8426 if (min > fcomi_cost)
8431 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8434 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8436 rtx op0, op1, scratch;
8440 enum machine_mode fpcmp_mode, intcmp_mode;
8442 int cost = ix86_fp_comparison_cost (code);
8443 enum rtx_code bypass_code, first_code, second_code;
8445 fpcmp_mode = ix86_fp_compare_mode (code);
8446 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8449 *second_test = NULL_RTX;
8451 *bypass_test = NULL_RTX;
8453 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8455 /* Do fcomi/sahf based test when profitable. */
8456 if ((bypass_code == NIL || bypass_test)
8457 && (second_code == NIL || second_test)
8458 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8462 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8463 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8469 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8470 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8472 scratch = gen_reg_rtx (HImode);
8473 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8474 emit_insn (gen_x86_sahf_1 (scratch));
8477 /* The FP codes work out to act like unsigned. */
8478 intcmp_mode = fpcmp_mode;
8480 if (bypass_code != NIL)
8481 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8482 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8484 if (second_code != NIL)
8485 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8486 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8491 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8492 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8493 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8495 scratch = gen_reg_rtx (HImode);
8496 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8498 /* In the unordered case, we have to check C2 for NaN's, which
8499 doesn't happen to work out to anything nice combination-wise.
8500 So do some bit twiddling on the value we've got in AH to come
8501 up with an appropriate set of condition codes. */
8503 intcmp_mode = CCNOmode;
8508 if (code == GT || !TARGET_IEEE_FP)
8510 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8515 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8516 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8517 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8518 intcmp_mode = CCmode;
8524 if (code == LT && TARGET_IEEE_FP)
8526 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8527 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8528 intcmp_mode = CCmode;
8533 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8539 if (code == GE || !TARGET_IEEE_FP)
8541 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8546 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8547 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8554 if (code == LE && TARGET_IEEE_FP)
8556 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8557 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8558 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8559 intcmp_mode = CCmode;
8564 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8570 if (code == EQ && TARGET_IEEE_FP)
8572 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8573 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8574 intcmp_mode = CCmode;
8579 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8586 if (code == NE && TARGET_IEEE_FP)
8588 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8589 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8595 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8601 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8605 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8614 /* Return the test that should be put into the flags user, i.e.
8615 the bcc, scc, or cmov instruction. */
8616 return gen_rtx_fmt_ee (code, VOIDmode,
8617 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8622 ix86_expand_compare (code, second_test, bypass_test)
8624 rtx *second_test, *bypass_test;
8627 op0 = ix86_compare_op0;
8628 op1 = ix86_compare_op1;
8631 *second_test = NULL_RTX;
8633 *bypass_test = NULL_RTX;
8635 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8636 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8637 second_test, bypass_test);
8639 ret = ix86_expand_int_compare (code, op0, op1);
8644 /* Return true if the CODE will result in nontrivial jump sequence. */
8646 ix86_fp_jump_nontrivial_p (code)
8649 enum rtx_code bypass_code, first_code, second_code;
8652 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8653 return bypass_code != NIL || second_code != NIL;
8657 ix86_expand_branch (code, label)
8663 switch (GET_MODE (ix86_compare_op0))
8669 tmp = ix86_expand_compare (code, NULL, NULL);
8670 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8671 gen_rtx_LABEL_REF (VOIDmode, label),
8673 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8683 enum rtx_code bypass_code, first_code, second_code;
8685 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8688 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8690 /* Check whether we will use the natural sequence with one jump. If
8691 so, we can expand jump early. Otherwise delay expansion by
8692 creating compound insn to not confuse optimizers. */
8693 if (bypass_code == NIL && second_code == NIL
8696 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8697 gen_rtx_LABEL_REF (VOIDmode, label),
8702 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8703 ix86_compare_op0, ix86_compare_op1);
8704 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8705 gen_rtx_LABEL_REF (VOIDmode, label),
8707 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8709 use_fcomi = ix86_use_fcomi_compare (code);
8710 vec = rtvec_alloc (3 + !use_fcomi);
8711 RTVEC_ELT (vec, 0) = tmp;
8713 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8715 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8718 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8720 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8728 /* Expand DImode branch into multiple compare+branch. */
8730 rtx lo[2], hi[2], label2;
8731 enum rtx_code code1, code2, code3;
8733 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8735 tmp = ix86_compare_op0;
8736 ix86_compare_op0 = ix86_compare_op1;
8737 ix86_compare_op1 = tmp;
8738 code = swap_condition (code);
8740 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8741 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8743 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8744 avoid two branches. This costs one extra insn, so disable when
8745 optimizing for size. */
8747 if ((code == EQ || code == NE)
8749 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8754 if (hi[1] != const0_rtx)
8755 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8756 NULL_RTX, 0, OPTAB_WIDEN);
8759 if (lo[1] != const0_rtx)
8760 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8761 NULL_RTX, 0, OPTAB_WIDEN);
8763 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8764 NULL_RTX, 0, OPTAB_WIDEN);
8766 ix86_compare_op0 = tmp;
8767 ix86_compare_op1 = const0_rtx;
8768 ix86_expand_branch (code, label);
8772 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8773 op1 is a constant and the low word is zero, then we can just
8774 examine the high word. */
8776 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8779 case LT: case LTU: case GE: case GEU:
8780 ix86_compare_op0 = hi[0];
8781 ix86_compare_op1 = hi[1];
8782 ix86_expand_branch (code, label);
8788 /* Otherwise, we need two or three jumps. */
8790 label2 = gen_label_rtx ();
8793 code2 = swap_condition (code);
8794 code3 = unsigned_condition (code);
8798 case LT: case GT: case LTU: case GTU:
8801 case LE: code1 = LT; code2 = GT; break;
8802 case GE: code1 = GT; code2 = LT; break;
8803 case LEU: code1 = LTU; code2 = GTU; break;
8804 case GEU: code1 = GTU; code2 = LTU; break;
8806 case EQ: code1 = NIL; code2 = NE; break;
8807 case NE: code2 = NIL; break;
8815 * if (hi(a) < hi(b)) goto true;
8816 * if (hi(a) > hi(b)) goto false;
8817 * if (lo(a) < lo(b)) goto true;
8821 ix86_compare_op0 = hi[0];
8822 ix86_compare_op1 = hi[1];
8825 ix86_expand_branch (code1, label);
8827 ix86_expand_branch (code2, label2);
8829 ix86_compare_op0 = lo[0];
8830 ix86_compare_op1 = lo[1];
8831 ix86_expand_branch (code3, label);
8834 emit_label (label2);
8843 /* Split branch based on floating point condition. */
8845 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8847 rtx op1, op2, target1, target2, tmp;
8850 rtx label = NULL_RTX;
8852 int bypass_probability = -1, second_probability = -1, probability = -1;
8855 if (target2 != pc_rtx)
8858 code = reverse_condition_maybe_unordered (code);
8863 condition = ix86_expand_fp_compare (code, op1, op2,
8864 tmp, &second, &bypass);
8866 if (split_branch_probability >= 0)
8868 /* Distribute the probabilities across the jumps.
8869 Assume the BYPASS and SECOND to be always test
8871 probability = split_branch_probability;
8873 /* Value of 1 is low enough to make no need for probability
8874 to be updated. Later we may run some experiments and see
8875 if unordered values are more frequent in practice. */
8877 bypass_probability = 1;
8879 second_probability = 1;
8881 if (bypass != NULL_RTX)
8883 label = gen_label_rtx ();
8884 i = emit_jump_insn (gen_rtx_SET
8886 gen_rtx_IF_THEN_ELSE (VOIDmode,
8888 gen_rtx_LABEL_REF (VOIDmode,
8891 if (bypass_probability >= 0)
8893 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8894 GEN_INT (bypass_probability),
8897 i = emit_jump_insn (gen_rtx_SET
8899 gen_rtx_IF_THEN_ELSE (VOIDmode,
8900 condition, target1, target2)));
8901 if (probability >= 0)
8903 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8904 GEN_INT (probability),
8906 if (second != NULL_RTX)
8908 i = emit_jump_insn (gen_rtx_SET
8910 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8912 if (second_probability >= 0)
8914 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8915 GEN_INT (second_probability),
8918 if (label != NULL_RTX)
8923 ix86_expand_setcc (code, dest)
8927 rtx ret, tmp, tmpreg;
8928 rtx second_test, bypass_test;
8930 if (GET_MODE (ix86_compare_op0) == DImode
8932 return 0; /* FAIL */
8934 if (GET_MODE (dest) != QImode)
8937 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8938 PUT_MODE (ret, QImode);
8943 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8944 if (bypass_test || second_test)
8946 rtx test = second_test;
8948 rtx tmp2 = gen_reg_rtx (QImode);
8955 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8957 PUT_MODE (test, QImode);
8958 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8961 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8963 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8966 return 1; /* DONE */
8970 ix86_expand_int_movcc (operands)
8973 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8974 rtx compare_seq, compare_op;
8975 rtx second_test, bypass_test;
8976 enum machine_mode mode = GET_MODE (operands[0]);
8978 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8979 In case comparsion is done with immediate, we can convert it to LTU or
8980 GEU by altering the integer. */
8982 if ((code == LEU || code == GTU)
8983 && GET_CODE (ix86_compare_op1) == CONST_INT
8985 && INTVAL (ix86_compare_op1) != -1
8986 /* For x86-64, the immediate field in the instruction is 32-bit
8987 signed, so we can't increment a DImode value above 0x7fffffff. */
8989 || GET_MODE (ix86_compare_op0) != DImode
8990 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8991 && GET_CODE (operands[2]) == CONST_INT
8992 && GET_CODE (operands[3]) == CONST_INT)
8998 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8999 GET_MODE (ix86_compare_op0));
9003 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9004 compare_seq = get_insns ();
9007 compare_code = GET_CODE (compare_op);
9009 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9010 HImode insns, we'd be swallowed in word prefix ops. */
9013 && (mode != DImode || TARGET_64BIT)
9014 && GET_CODE (operands[2]) == CONST_INT
9015 && GET_CODE (operands[3]) == CONST_INT)
9017 rtx out = operands[0];
9018 HOST_WIDE_INT ct = INTVAL (operands[2]);
9019 HOST_WIDE_INT cf = INTVAL (operands[3]);
9022 if ((compare_code == LTU || compare_code == GEU)
9023 && !second_test && !bypass_test)
9025 /* Detect overlap between destination and compare sources. */
9028 /* To simplify rest of code, restrict to the GEU case. */
9029 if (compare_code == LTU)
9034 compare_code = reverse_condition (compare_code);
9035 code = reverse_condition (code);
9039 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9040 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9041 tmp = gen_reg_rtx (mode);
9043 emit_insn (compare_seq);
9045 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9047 emit_insn (gen_x86_movsicc_0_m1 (tmp));
9059 tmp = expand_simple_binop (mode, PLUS,
9061 tmp, 1, OPTAB_DIRECT);
9072 tmp = expand_simple_binop (mode, IOR,
9074 tmp, 1, OPTAB_DIRECT);
9076 else if (diff == -1 && ct)
9086 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9088 tmp = expand_simple_binop (mode, PLUS,
9090 tmp, 1, OPTAB_DIRECT);
9098 * andl cf - ct, dest
9108 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9111 tmp = expand_simple_binop (mode, AND,
9113 gen_int_mode (cf - ct, mode),
9114 tmp, 1, OPTAB_DIRECT);
9116 tmp = expand_simple_binop (mode, PLUS,
9118 tmp, 1, OPTAB_DIRECT);
9122 emit_move_insn (out, tmp);
9124 return 1; /* DONE */
9131 tmp = ct, ct = cf, cf = tmp;
9133 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9135 /* We may be reversing unordered compare to normal compare, that
9136 is not valid in general (we may convert non-trapping condition
9137 to trapping one), however on i386 we currently emit all
9138 comparisons unordered. */
9139 compare_code = reverse_condition_maybe_unordered (compare_code);
9140 code = reverse_condition_maybe_unordered (code);
9144 compare_code = reverse_condition (compare_code);
9145 code = reverse_condition (code);
9150 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9151 && GET_CODE (ix86_compare_op1) == CONST_INT)
9153 if (ix86_compare_op1 == const0_rtx
9154 && (code == LT || code == GE))
9155 compare_code = code;
9156 else if (ix86_compare_op1 == constm1_rtx)
9160 else if (code == GT)
9165 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9166 if (compare_code != NIL
9167 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9168 && (cf == -1 || ct == -1))
9170 /* If lea code below could be used, only optimize
9171 if it results in a 2 insn sequence. */
9173 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9174 || diff == 3 || diff == 5 || diff == 9)
9175 || (compare_code == LT && ct == -1)
9176 || (compare_code == GE && cf == -1))
9179 * notl op1 (if necessary)
9187 code = reverse_condition (code);
9190 out = emit_store_flag (out, code, ix86_compare_op0,
9191 ix86_compare_op1, VOIDmode, 0, -1);
9193 out = expand_simple_binop (mode, IOR,
9195 out, 1, OPTAB_DIRECT);
9196 if (out != operands[0])
9197 emit_move_insn (operands[0], out);
9199 return 1; /* DONE */
9203 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9204 || diff == 3 || diff == 5 || diff == 9)
9205 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf), 0)))
9211 * lea cf(dest*(ct-cf)),dest
9215 * This also catches the degenerate setcc-only case.
9221 out = emit_store_flag (out, code, ix86_compare_op0,
9222 ix86_compare_op1, VOIDmode, 0, 1);
9225 /* On x86_64 the lea instruction operates on Pmode, so we need
9226 to get arithmetics done in proper mode to match. */
9233 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9237 tmp = gen_rtx_PLUS (mode, tmp, out1);
9243 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9247 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9253 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9254 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9256 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9257 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9261 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9263 if (out != operands[0])
9264 emit_move_insn (operands[0], copy_rtx (out));
9266 return 1; /* DONE */
9270 * General case: Jumpful:
9271 * xorl dest,dest cmpl op1, op2
9272 * cmpl op1, op2 movl ct, dest
9274 * decl dest movl cf, dest
9275 * andl (cf-ct),dest 1:
9280 * This is reasonably steep, but branch mispredict costs are
9281 * high on modern cpus, so consider failing only if optimizing
9284 * %%% Parameterize branch_cost on the tuning architecture, then
9285 * use that. The 80386 couldn't care less about mispredicts.
9288 if (!optimize_size && !TARGET_CMOVE)
9294 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9295 /* We may be reversing unordered compare to normal compare,
9296 that is not valid in general (we may convert non-trapping
9297 condition to trapping one), however on i386 we currently
9298 emit all comparisons unordered. */
9299 code = reverse_condition_maybe_unordered (code);
9302 code = reverse_condition (code);
9303 if (compare_code != NIL)
9304 compare_code = reverse_condition (compare_code);
9308 if (compare_code != NIL)
9310 /* notl op1 (if needed)
9315 For x < 0 (resp. x <= -1) there will be no notl,
9316 so if possible swap the constants to get rid of the
9318 True/false will be -1/0 while code below (store flag
9319 followed by decrement) is 0/-1, so the constants need
9320 to be exchanged once more. */
9322 if (compare_code == GE || !cf)
9324 code = reverse_condition (code);
9329 HOST_WIDE_INT tmp = cf;
9334 out = emit_store_flag (out, code, ix86_compare_op0,
9335 ix86_compare_op1, VOIDmode, 0, -1);
9339 out = emit_store_flag (out, code, ix86_compare_op0,
9340 ix86_compare_op1, VOIDmode, 0, 1);
9342 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9343 out, 1, OPTAB_DIRECT);
9346 out = expand_simple_binop (mode, AND, out,
9347 gen_int_mode (cf - ct, mode),
9348 out, 1, OPTAB_DIRECT);
9350 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9351 out, 1, OPTAB_DIRECT);
9352 if (out != operands[0])
9353 emit_move_insn (operands[0], out);
9355 return 1; /* DONE */
9361 /* Try a few things more with specific constants and a variable. */
9364 rtx var, orig_out, out, tmp;
9367 return 0; /* FAIL */
9369 /* If one of the two operands is an interesting constant, load a
9370 constant with the above and mask it in with a logical operation. */
9372 if (GET_CODE (operands[2]) == CONST_INT)
9375 if (INTVAL (operands[2]) == 0)
9376 operands[3] = constm1_rtx, op = and_optab;
9377 else if (INTVAL (operands[2]) == -1)
9378 operands[3] = const0_rtx, op = ior_optab;
9380 return 0; /* FAIL */
9382 else if (GET_CODE (operands[3]) == CONST_INT)
9385 if (INTVAL (operands[3]) == 0)
9386 operands[2] = constm1_rtx, op = and_optab;
9387 else if (INTVAL (operands[3]) == -1)
9388 operands[2] = const0_rtx, op = ior_optab;
9390 return 0; /* FAIL */
9393 return 0; /* FAIL */
9395 orig_out = operands[0];
9396 tmp = gen_reg_rtx (mode);
9399 /* Recurse to get the constant loaded. */
9400 if (ix86_expand_int_movcc (operands) == 0)
9401 return 0; /* FAIL */
9403 /* Mask in the interesting variable. */
9404 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9406 if (out != orig_out)
9407 emit_move_insn (orig_out, out);
9409 return 1; /* DONE */
9413 * For comparison with above,
9423 if (! nonimmediate_operand (operands[2], mode))
9424 operands[2] = force_reg (mode, operands[2]);
9425 if (! nonimmediate_operand (operands[3], mode))
9426 operands[3] = force_reg (mode, operands[3]);
9428 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9430 rtx tmp = gen_reg_rtx (mode);
9431 emit_move_insn (tmp, operands[3]);
9434 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9436 rtx tmp = gen_reg_rtx (mode);
9437 emit_move_insn (tmp, operands[2]);
9440 if (! register_operand (operands[2], VOIDmode)
9441 && ! register_operand (operands[3], VOIDmode))
9442 operands[2] = force_reg (mode, operands[2]);
9444 emit_insn (compare_seq);
9445 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9446 gen_rtx_IF_THEN_ELSE (mode,
9447 compare_op, operands[2],
9450 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9451 gen_rtx_IF_THEN_ELSE (mode,
9456 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9457 gen_rtx_IF_THEN_ELSE (mode,
9462 return 1; /* DONE */
9466 ix86_expand_fp_movcc (operands)
9471 rtx compare_op, second_test, bypass_test;
9473 /* For SF/DFmode conditional moves based on comparisons
9474 in same mode, we may want to use SSE min/max instructions. */
9475 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9476 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9477 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9478 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9480 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9481 /* We may be called from the post-reload splitter. */
9482 && (!REG_P (operands[0])
9483 || SSE_REG_P (operands[0])
9484 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9486 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9487 code = GET_CODE (operands[1]);
9489 /* See if we have (cross) match between comparison operands and
9490 conditional move operands. */
9491 if (rtx_equal_p (operands[2], op1))
9496 code = reverse_condition_maybe_unordered (code);
9498 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9500 /* Check for min operation. */
9503 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9504 if (memory_operand (op0, VOIDmode))
9505 op0 = force_reg (GET_MODE (operands[0]), op0);
9506 if (GET_MODE (operands[0]) == SFmode)
9507 emit_insn (gen_minsf3 (operands[0], op0, op1));
9509 emit_insn (gen_mindf3 (operands[0], op0, op1));
9512 /* Check for max operation. */
9515 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9516 if (memory_operand (op0, VOIDmode))
9517 op0 = force_reg (GET_MODE (operands[0]), op0);
9518 if (GET_MODE (operands[0]) == SFmode)
9519 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9521 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9525 /* Manage condition to be sse_comparison_operator. In case we are
9526 in non-ieee mode, try to canonicalize the destination operand
9527 to be first in the comparison - this helps reload to avoid extra
9529 if (!sse_comparison_operator (operands[1], VOIDmode)
9530 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9532 rtx tmp = ix86_compare_op0;
9533 ix86_compare_op0 = ix86_compare_op1;
9534 ix86_compare_op1 = tmp;
9535 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9536 VOIDmode, ix86_compare_op0,
9539 /* Similary try to manage result to be first operand of conditional
9540 move. We also don't support the NE comparison on SSE, so try to
9542 if ((rtx_equal_p (operands[0], operands[3])
9543 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9544 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9546 rtx tmp = operands[2];
9547 operands[2] = operands[3];
9549 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9550 (GET_CODE (operands[1])),
9551 VOIDmode, ix86_compare_op0,
9554 if (GET_MODE (operands[0]) == SFmode)
9555 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9556 operands[2], operands[3],
9557 ix86_compare_op0, ix86_compare_op1));
9559 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9560 operands[2], operands[3],
9561 ix86_compare_op0, ix86_compare_op1));
9565 /* The floating point conditional move instructions don't directly
9566 support conditions resulting from a signed integer comparison. */
9568 code = GET_CODE (operands[1]);
9569 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9571 /* The floating point conditional move instructions don't directly
9572 support signed integer comparisons. */
9574 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9576 if (second_test != NULL || bypass_test != NULL)
9578 tmp = gen_reg_rtx (QImode);
9579 ix86_expand_setcc (code, tmp);
9581 ix86_compare_op0 = tmp;
9582 ix86_compare_op1 = const0_rtx;
9583 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9585 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9587 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9588 emit_move_insn (tmp, operands[3]);
9591 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9593 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9594 emit_move_insn (tmp, operands[2]);
9598 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9599 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9604 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9605 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9610 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9611 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9619 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9620 works for floating pointer parameters and nonoffsetable memories.
9621 For pushes, it returns just stack offsets; the values will be saved
9622 in the right order. Maximally three parts are generated. */
9625 ix86_split_to_parts (operand, parts, mode)
9628 enum machine_mode mode;
9633 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9635 size = (GET_MODE_SIZE (mode) + 4) / 8;
9637 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9639 if (size < 2 || size > 3)
9642 /* Optimize constant pool reference to immediates. This is used by fp
9643 moves, that force all constants to memory to allow combining. */
9644 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9646 rtx tmp = maybe_get_pool_constant (operand);
9651 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9653 /* The only non-offsetable memories we handle are pushes. */
9654 if (! push_operand (operand, VOIDmode))
9657 operand = copy_rtx (operand);
9658 PUT_MODE (operand, Pmode);
9659 parts[0] = parts[1] = parts[2] = operand;
9661 else if (!TARGET_64BIT)
9664 split_di (&operand, 1, &parts[0], &parts[1]);
9667 if (REG_P (operand))
9669 if (!reload_completed)
9671 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9672 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9674 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9676 else if (offsettable_memref_p (operand))
9678 operand = adjust_address (operand, SImode, 0);
9680 parts[1] = adjust_address (operand, SImode, 4);
9682 parts[2] = adjust_address (operand, SImode, 8);
9684 else if (GET_CODE (operand) == CONST_DOUBLE)
9689 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9694 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9695 parts[2] = gen_int_mode (l[2], SImode);
9698 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9703 parts[1] = gen_int_mode (l[1], SImode);
9704 parts[0] = gen_int_mode (l[0], SImode);
9713 split_ti (&operand, 1, &parts[0], &parts[1]);
9714 if (mode == XFmode || mode == TFmode)
9716 if (REG_P (operand))
9718 if (!reload_completed)
9720 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9721 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9723 else if (offsettable_memref_p (operand))
9725 operand = adjust_address (operand, DImode, 0);
9727 parts[1] = adjust_address (operand, SImode, 8);
9729 else if (GET_CODE (operand) == CONST_DOUBLE)
9734 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9735 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9736 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9737 if (HOST_BITS_PER_WIDE_INT >= 64)
9740 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9741 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9744 parts[0] = immed_double_const (l[0], l[1], DImode);
9745 parts[1] = gen_int_mode (l[2], SImode);
9755 /* Emit insns to perform a move or push of DI, DF, and XF values.
9756 Return false when normal moves are needed; true when all required
9757 insns have been emitted. Operands 2-4 contain the input values
9758 int the correct order; operands 5-7 contain the output values. */
9761 ix86_split_long_move (operands)
9768 enum machine_mode mode = GET_MODE (operands[0]);
9770 /* The DFmode expanders may ask us to move double.
9771 For 64bit target this is single move. By hiding the fact
9772 here we simplify i386.md splitters. */
9773 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9775 /* Optimize constant pool reference to immediates. This is used by
9776 fp moves, that force all constants to memory to allow combining. */
9778 if (GET_CODE (operands[1]) == MEM
9779 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9780 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9781 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9782 if (push_operand (operands[0], VOIDmode))
9784 operands[0] = copy_rtx (operands[0]);
9785 PUT_MODE (operands[0], Pmode);
9788 operands[0] = gen_lowpart (DImode, operands[0]);
9789 operands[1] = gen_lowpart (DImode, operands[1]);
9790 emit_move_insn (operands[0], operands[1]);
9794 /* The only non-offsettable memory we handle is push. */
9795 if (push_operand (operands[0], VOIDmode))
9797 else if (GET_CODE (operands[0]) == MEM
9798 && ! offsettable_memref_p (operands[0]))
9801 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9802 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9804 /* When emitting push, take care for source operands on the stack. */
9805 if (push && GET_CODE (operands[1]) == MEM
9806 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9809 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9810 XEXP (part[1][2], 0));
9811 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9812 XEXP (part[1][1], 0));
9815 /* We need to do copy in the right order in case an address register
9816 of the source overlaps the destination. */
9817 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9819 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9821 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9824 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9827 /* Collision in the middle part can be handled by reordering. */
9828 if (collisions == 1 && nparts == 3
9829 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9832 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9833 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9836 /* If there are more collisions, we can't handle it by reordering.
9837 Do an lea to the last part and use only one colliding move. */
9838 else if (collisions > 1)
9841 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9842 XEXP (part[1][0], 0)));
9843 part[1][0] = change_address (part[1][0],
9844 TARGET_64BIT ? DImode : SImode,
9845 part[0][nparts - 1]);
9846 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9848 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9858 /* We use only first 12 bytes of TFmode value, but for pushing we
9859 are required to adjust stack as if we were pushing real 16byte
9861 if (mode == TFmode && !TARGET_64BIT)
9862 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9864 emit_move_insn (part[0][2], part[1][2]);
9869 /* In 64bit mode we don't have 32bit push available. In case this is
9870 register, it is OK - we will just use larger counterpart. We also
9871 retype memory - these comes from attempt to avoid REX prefix on
9872 moving of second half of TFmode value. */
9873 if (GET_MODE (part[1][1]) == SImode)
9875 if (GET_CODE (part[1][1]) == MEM)
9876 part[1][1] = adjust_address (part[1][1], DImode, 0);
9877 else if (REG_P (part[1][1]))
9878 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9881 if (GET_MODE (part[1][0]) == SImode)
9882 part[1][0] = part[1][1];
9885 emit_move_insn (part[0][1], part[1][1]);
9886 emit_move_insn (part[0][0], part[1][0]);
9890 /* Choose correct order to not overwrite the source before it is copied. */
9891 if ((REG_P (part[0][0])
9892 && REG_P (part[1][1])
9893 && (REGNO (part[0][0]) == REGNO (part[1][1])
9895 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9897 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9901 operands[2] = part[0][2];
9902 operands[3] = part[0][1];
9903 operands[4] = part[0][0];
9904 operands[5] = part[1][2];
9905 operands[6] = part[1][1];
9906 operands[7] = part[1][0];
9910 operands[2] = part[0][1];
9911 operands[3] = part[0][0];
9912 operands[5] = part[1][1];
9913 operands[6] = part[1][0];
9920 operands[2] = part[0][0];
9921 operands[3] = part[0][1];
9922 operands[4] = part[0][2];
9923 operands[5] = part[1][0];
9924 operands[6] = part[1][1];
9925 operands[7] = part[1][2];
9929 operands[2] = part[0][0];
9930 operands[3] = part[0][1];
9931 operands[5] = part[1][0];
9932 operands[6] = part[1][1];
9935 emit_move_insn (operands[2], operands[5]);
9936 emit_move_insn (operands[3], operands[6]);
9938 emit_move_insn (operands[4], operands[7]);
9944 ix86_split_ashldi (operands, scratch)
9945 rtx *operands, scratch;
9947 rtx low[2], high[2];
9950 if (GET_CODE (operands[2]) == CONST_INT)
9952 split_di (operands, 2, low, high);
9953 count = INTVAL (operands[2]) & 63;
9957 emit_move_insn (high[0], low[1]);
9958 emit_move_insn (low[0], const0_rtx);
9961 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9965 if (!rtx_equal_p (operands[0], operands[1]))
9966 emit_move_insn (operands[0], operands[1]);
9967 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9968 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9973 if (!rtx_equal_p (operands[0], operands[1]))
9974 emit_move_insn (operands[0], operands[1]);
9976 split_di (operands, 1, low, high);
9978 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9979 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9981 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9983 if (! no_new_pseudos)
9984 scratch = force_reg (SImode, const0_rtx);
9986 emit_move_insn (scratch, const0_rtx);
9988 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9992 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9997 ix86_split_ashrdi (operands, scratch)
9998 rtx *operands, scratch;
10000 rtx low[2], high[2];
10003 if (GET_CODE (operands[2]) == CONST_INT)
10005 split_di (operands, 2, low, high);
10006 count = INTVAL (operands[2]) & 63;
10010 emit_move_insn (low[0], high[1]);
10012 if (! reload_completed)
10013 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10016 emit_move_insn (high[0], low[0]);
10017 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10021 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10025 if (!rtx_equal_p (operands[0], operands[1]))
10026 emit_move_insn (operands[0], operands[1]);
10027 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10028 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10033 if (!rtx_equal_p (operands[0], operands[1]))
10034 emit_move_insn (operands[0], operands[1]);
10036 split_di (operands, 1, low, high);
10038 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10039 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10041 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10043 if (! no_new_pseudos)
10044 scratch = gen_reg_rtx (SImode);
10045 emit_move_insn (scratch, high[0]);
10046 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10047 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10051 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10056 ix86_split_lshrdi (operands, scratch)
10057 rtx *operands, scratch;
10059 rtx low[2], high[2];
10062 if (GET_CODE (operands[2]) == CONST_INT)
10064 split_di (operands, 2, low, high);
10065 count = INTVAL (operands[2]) & 63;
10069 emit_move_insn (low[0], high[1]);
10070 emit_move_insn (high[0], const0_rtx);
10073 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10077 if (!rtx_equal_p (operands[0], operands[1]))
10078 emit_move_insn (operands[0], operands[1]);
10079 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10080 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10085 if (!rtx_equal_p (operands[0], operands[1]))
10086 emit_move_insn (operands[0], operands[1]);
10088 split_di (operands, 1, low, high);
10090 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10091 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10093 /* Heh. By reversing the arguments, we can reuse this pattern. */
10094 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10096 if (! no_new_pseudos)
10097 scratch = force_reg (SImode, const0_rtx);
10099 emit_move_insn (scratch, const0_rtx);
10101 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10105 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10109 /* Helper function for the string operations below. Dest VARIABLE whether
10110 it is aligned to VALUE bytes. If true, jump to the label. */
10112 ix86_expand_aligntest (variable, value)
10116 rtx label = gen_label_rtx ();
10117 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10118 if (GET_MODE (variable) == DImode)
10119 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10121 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10122 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10127 /* Adjust COUNTER by the VALUE. */
10129 ix86_adjust_counter (countreg, value)
10131 HOST_WIDE_INT value;
10133 if (GET_MODE (countreg) == DImode)
10134 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10136 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10139 /* Zero extend possibly SImode EXP to Pmode register. */
10141 ix86_zero_extend_to_Pmode (exp)
10145 if (GET_MODE (exp) == VOIDmode)
10146 return force_reg (Pmode, exp);
10147 if (GET_MODE (exp) == Pmode)
10148 return copy_to_mode_reg (Pmode, exp);
10149 r = gen_reg_rtx (Pmode);
10150 emit_insn (gen_zero_extendsidi2 (r, exp));
10154 /* Expand string move (memcpy) operation. Use i386 string operations when
10155 profitable. expand_clrstr contains similar code. */
10157 ix86_expand_movstr (dst, src, count_exp, align_exp)
10158 rtx dst, src, count_exp, align_exp;
10160 rtx srcreg, destreg, countreg;
10161 enum machine_mode counter_mode;
10162 HOST_WIDE_INT align = 0;
10163 unsigned HOST_WIDE_INT count = 0;
10168 if (GET_CODE (align_exp) == CONST_INT)
10169 align = INTVAL (align_exp);
10171 /* This simple hack avoids all inlining code and simplifies code below. */
10172 if (!TARGET_ALIGN_STRINGOPS)
10175 if (GET_CODE (count_exp) == CONST_INT)
10176 count = INTVAL (count_exp);
10178 /* Figure out proper mode for counter. For 32bits it is always SImode,
10179 for 64bits use SImode when possible, otherwise DImode.
10180 Set count to number of bytes copied when known at compile time. */
10181 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10182 || x86_64_zero_extended_value (count_exp))
10183 counter_mode = SImode;
10185 counter_mode = DImode;
10187 if (counter_mode != SImode && counter_mode != DImode)
10190 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10191 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10193 emit_insn (gen_cld ());
10195 /* When optimizing for size emit simple rep ; movsb instruction for
10196 counts not divisible by 4. */
10198 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10200 countreg = ix86_zero_extend_to_Pmode (count_exp);
10202 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10203 destreg, srcreg, countreg));
10205 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10206 destreg, srcreg, countreg));
10209 /* For constant aligned (or small unaligned) copies use rep movsl
10210 followed by code copying the rest. For PentiumPro ensure 8 byte
10211 alignment to allow rep movsl acceleration. */
10213 else if (count != 0
10215 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10216 || optimize_size || count < (unsigned int) 64))
10218 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10219 if (count & ~(size - 1))
10221 countreg = copy_to_mode_reg (counter_mode,
10222 GEN_INT ((count >> (size == 4 ? 2 : 3))
10223 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10224 countreg = ix86_zero_extend_to_Pmode (countreg);
10228 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10229 destreg, srcreg, countreg));
10231 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10232 destreg, srcreg, countreg));
10235 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10236 destreg, srcreg, countreg));
10238 if (size == 8 && (count & 0x04))
10239 emit_insn (gen_strmovsi (destreg, srcreg));
10241 emit_insn (gen_strmovhi (destreg, srcreg));
10243 emit_insn (gen_strmovqi (destreg, srcreg));
10245 /* The generic code based on the glibc implementation:
10246 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10247 allowing accelerated copying there)
10248 - copy the data using rep movsl
10249 - copy the rest. */
10254 int desired_alignment = (TARGET_PENTIUMPRO
10255 && (count == 0 || count >= (unsigned int) 260)
10256 ? 8 : UNITS_PER_WORD);
10258 /* In case we don't know anything about the alignment, default to
10259 library version, since it is usually equally fast and result in
10261 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10267 if (TARGET_SINGLE_STRINGOP)
10268 emit_insn (gen_cld ());
10270 countreg2 = gen_reg_rtx (Pmode);
10271 countreg = copy_to_mode_reg (counter_mode, count_exp);
10273 /* We don't use loops to align destination and to copy parts smaller
10274 than 4 bytes, because gcc is able to optimize such code better (in
10275 the case the destination or the count really is aligned, gcc is often
10276 able to predict the branches) and also it is friendlier to the
10277 hardware branch prediction.
10279 Using loops is benefical for generic case, because we can
10280 handle small counts using the loops. Many CPUs (such as Athlon)
10281 have large REP prefix setup costs.
10283 This is quite costy. Maybe we can revisit this decision later or
10284 add some customizability to this code. */
10286 if (count == 0 && align < desired_alignment)
10288 label = gen_label_rtx ();
10289 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10290 LEU, 0, counter_mode, 1, label);
10294 rtx label = ix86_expand_aligntest (destreg, 1);
10295 emit_insn (gen_strmovqi (destreg, srcreg));
10296 ix86_adjust_counter (countreg, 1);
10297 emit_label (label);
10298 LABEL_NUSES (label) = 1;
10302 rtx label = ix86_expand_aligntest (destreg, 2);
10303 emit_insn (gen_strmovhi (destreg, srcreg));
10304 ix86_adjust_counter (countreg, 2);
10305 emit_label (label);
10306 LABEL_NUSES (label) = 1;
10308 if (align <= 4 && desired_alignment > 4)
10310 rtx label = ix86_expand_aligntest (destreg, 4);
10311 emit_insn (gen_strmovsi (destreg, srcreg));
10312 ix86_adjust_counter (countreg, 4);
10313 emit_label (label);
10314 LABEL_NUSES (label) = 1;
10317 if (label && desired_alignment > 4 && !TARGET_64BIT)
10319 emit_label (label);
10320 LABEL_NUSES (label) = 1;
10323 if (!TARGET_SINGLE_STRINGOP)
10324 emit_insn (gen_cld ());
10327 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10329 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10330 destreg, srcreg, countreg2));
10334 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10335 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10336 destreg, srcreg, countreg2));
10341 emit_label (label);
10342 LABEL_NUSES (label) = 1;
10344 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10345 emit_insn (gen_strmovsi (destreg, srcreg));
10346 if ((align <= 4 || count == 0) && TARGET_64BIT)
10348 rtx label = ix86_expand_aligntest (countreg, 4);
10349 emit_insn (gen_strmovsi (destreg, srcreg));
10350 emit_label (label);
10351 LABEL_NUSES (label) = 1;
10353 if (align > 2 && count != 0 && (count & 2))
10354 emit_insn (gen_strmovhi (destreg, srcreg));
10355 if (align <= 2 || count == 0)
10357 rtx label = ix86_expand_aligntest (countreg, 2);
10358 emit_insn (gen_strmovhi (destreg, srcreg));
10359 emit_label (label);
10360 LABEL_NUSES (label) = 1;
10362 if (align > 1 && count != 0 && (count & 1))
10363 emit_insn (gen_strmovqi (destreg, srcreg));
10364 if (align <= 1 || count == 0)
10366 rtx label = ix86_expand_aligntest (countreg, 1);
10367 emit_insn (gen_strmovqi (destreg, srcreg));
10368 emit_label (label);
10369 LABEL_NUSES (label) = 1;
10373 insns = get_insns ();
10376 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10381 /* Expand string clear operation (bzero). Use i386 string operations when
10382 profitable. expand_movstr contains similar code. */
10384 ix86_expand_clrstr (src, count_exp, align_exp)
10385 rtx src, count_exp, align_exp;
10387 rtx destreg, zeroreg, countreg;
10388 enum machine_mode counter_mode;
10389 HOST_WIDE_INT align = 0;
10390 unsigned HOST_WIDE_INT count = 0;
10392 if (GET_CODE (align_exp) == CONST_INT)
10393 align = INTVAL (align_exp);
10395 /* This simple hack avoids all inlining code and simplifies code below. */
10396 if (!TARGET_ALIGN_STRINGOPS)
10399 if (GET_CODE (count_exp) == CONST_INT)
10400 count = INTVAL (count_exp);
10401 /* Figure out proper mode for counter. For 32bits it is always SImode,
10402 for 64bits use SImode when possible, otherwise DImode.
10403 Set count to number of bytes copied when known at compile time. */
10404 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10405 || x86_64_zero_extended_value (count_exp))
10406 counter_mode = SImode;
10408 counter_mode = DImode;
10410 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10412 emit_insn (gen_cld ());
10414 /* When optimizing for size emit simple rep ; movsb instruction for
10415 counts not divisible by 4. */
10417 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10419 countreg = ix86_zero_extend_to_Pmode (count_exp);
10420 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10422 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10423 destreg, countreg));
10425 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10426 destreg, countreg));
10428 else if (count != 0
10430 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10431 || optimize_size || count < (unsigned int) 64))
10433 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10434 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10435 if (count & ~(size - 1))
10437 countreg = copy_to_mode_reg (counter_mode,
10438 GEN_INT ((count >> (size == 4 ? 2 : 3))
10439 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10440 countreg = ix86_zero_extend_to_Pmode (countreg);
10444 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10445 destreg, countreg));
10447 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10448 destreg, countreg));
10451 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10452 destreg, countreg));
10454 if (size == 8 && (count & 0x04))
10455 emit_insn (gen_strsetsi (destreg,
10456 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10458 emit_insn (gen_strsethi (destreg,
10459 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10461 emit_insn (gen_strsetqi (destreg,
10462 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10468 /* Compute desired alignment of the string operation. */
10469 int desired_alignment = (TARGET_PENTIUMPRO
10470 && (count == 0 || count >= (unsigned int) 260)
10471 ? 8 : UNITS_PER_WORD);
10473 /* In case we don't know anything about the alignment, default to
10474 library version, since it is usually equally fast and result in
10476 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10479 if (TARGET_SINGLE_STRINGOP)
10480 emit_insn (gen_cld ());
10482 countreg2 = gen_reg_rtx (Pmode);
10483 countreg = copy_to_mode_reg (counter_mode, count_exp);
10484 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10486 if (count == 0 && align < desired_alignment)
10488 label = gen_label_rtx ();
10489 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10490 LEU, 0, counter_mode, 1, label);
10494 rtx label = ix86_expand_aligntest (destreg, 1);
10495 emit_insn (gen_strsetqi (destreg,
10496 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10497 ix86_adjust_counter (countreg, 1);
10498 emit_label (label);
10499 LABEL_NUSES (label) = 1;
10503 rtx label = ix86_expand_aligntest (destreg, 2);
10504 emit_insn (gen_strsethi (destreg,
10505 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10506 ix86_adjust_counter (countreg, 2);
10507 emit_label (label);
10508 LABEL_NUSES (label) = 1;
10510 if (align <= 4 && desired_alignment > 4)
10512 rtx label = ix86_expand_aligntest (destreg, 4);
10513 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10514 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10516 ix86_adjust_counter (countreg, 4);
10517 emit_label (label);
10518 LABEL_NUSES (label) = 1;
10521 if (label && desired_alignment > 4 && !TARGET_64BIT)
10523 emit_label (label);
10524 LABEL_NUSES (label) = 1;
10528 if (!TARGET_SINGLE_STRINGOP)
10529 emit_insn (gen_cld ());
10532 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10534 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10535 destreg, countreg2));
10539 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10540 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10541 destreg, countreg2));
10545 emit_label (label);
10546 LABEL_NUSES (label) = 1;
10549 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10550 emit_insn (gen_strsetsi (destreg,
10551 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10552 if (TARGET_64BIT && (align <= 4 || count == 0))
10554 rtx label = ix86_expand_aligntest (countreg, 4);
10555 emit_insn (gen_strsetsi (destreg,
10556 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10557 emit_label (label);
10558 LABEL_NUSES (label) = 1;
10560 if (align > 2 && count != 0 && (count & 2))
10561 emit_insn (gen_strsethi (destreg,
10562 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10563 if (align <= 2 || count == 0)
10565 rtx label = ix86_expand_aligntest (countreg, 2);
10566 emit_insn (gen_strsethi (destreg,
10567 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10568 emit_label (label);
10569 LABEL_NUSES (label) = 1;
10571 if (align > 1 && count != 0 && (count & 1))
10572 emit_insn (gen_strsetqi (destreg,
10573 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10574 if (align <= 1 || count == 0)
10576 rtx label = ix86_expand_aligntest (countreg, 1);
10577 emit_insn (gen_strsetqi (destreg,
10578 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10579 emit_label (label);
10580 LABEL_NUSES (label) = 1;
10585 /* Expand strlen. */
10587 ix86_expand_strlen (out, src, eoschar, align)
10588 rtx out, src, eoschar, align;
10590 rtx addr, scratch1, scratch2, scratch3, scratch4;
10592 /* The generic case of strlen expander is long. Avoid it's
10593 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10595 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10596 && !TARGET_INLINE_ALL_STRINGOPS
10598 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10601 addr = force_reg (Pmode, XEXP (src, 0));
10602 scratch1 = gen_reg_rtx (Pmode);
10604 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10607 /* Well it seems that some optimizer does not combine a call like
10608 foo(strlen(bar), strlen(bar));
10609 when the move and the subtraction is done here. It does calculate
10610 the length just once when these instructions are done inside of
10611 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10612 often used and I use one fewer register for the lifetime of
10613 output_strlen_unroll() this is better. */
10615 emit_move_insn (out, addr);
10617 ix86_expand_strlensi_unroll_1 (out, align);
10619 /* strlensi_unroll_1 returns the address of the zero at the end of
10620 the string, like memchr(), so compute the length by subtracting
10621 the start address. */
10623 emit_insn (gen_subdi3 (out, out, addr));
10625 emit_insn (gen_subsi3 (out, out, addr));
10629 scratch2 = gen_reg_rtx (Pmode);
10630 scratch3 = gen_reg_rtx (Pmode);
10631 scratch4 = force_reg (Pmode, constm1_rtx);
10633 emit_move_insn (scratch3, addr);
10634 eoschar = force_reg (QImode, eoschar);
10636 emit_insn (gen_cld ());
10639 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10640 align, scratch4, scratch3));
10641 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10642 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10646 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10647 align, scratch4, scratch3));
10648 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10649 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10655 /* Expand the appropriate insns for doing strlen if not just doing
10658 out = result, initialized with the start address
10659 align_rtx = alignment of the address.
10660 scratch = scratch register, initialized with the startaddress when
10661 not aligned, otherwise undefined
10663 This is just the body. It needs the initialisations mentioned above and
10664 some address computing at the end. These things are done in i386.md. */
10667 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10668 rtx out, align_rtx;
10672 rtx align_2_label = NULL_RTX;
10673 rtx align_3_label = NULL_RTX;
10674 rtx align_4_label = gen_label_rtx ();
10675 rtx end_0_label = gen_label_rtx ();
10677 rtx tmpreg = gen_reg_rtx (SImode);
10678 rtx scratch = gen_reg_rtx (SImode);
10681 if (GET_CODE (align_rtx) == CONST_INT)
10682 align = INTVAL (align_rtx);
10684 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10686 /* Is there a known alignment and is it less than 4? */
10689 rtx scratch1 = gen_reg_rtx (Pmode);
10690 emit_move_insn (scratch1, out);
10691 /* Is there a known alignment and is it not 2? */
10694 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10695 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10697 /* Leave just the 3 lower bits. */
10698 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10699 NULL_RTX, 0, OPTAB_WIDEN);
10701 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10702 Pmode, 1, align_4_label);
10703 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10704 Pmode, 1, align_2_label);
10705 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10706 Pmode, 1, align_3_label);
10710 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10711 check if is aligned to 4 - byte. */
10713 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10714 NULL_RTX, 0, OPTAB_WIDEN);
10716 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10717 Pmode, 1, align_4_label);
10720 mem = gen_rtx_MEM (QImode, out);
10722 /* Now compare the bytes. */
10724 /* Compare the first n unaligned byte on a byte per byte basis. */
10725 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10726 QImode, 1, end_0_label);
10728 /* Increment the address. */
10730 emit_insn (gen_adddi3 (out, out, const1_rtx));
10732 emit_insn (gen_addsi3 (out, out, const1_rtx));
10734 /* Not needed with an alignment of 2 */
10737 emit_label (align_2_label);
10739 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10743 emit_insn (gen_adddi3 (out, out, const1_rtx));
10745 emit_insn (gen_addsi3 (out, out, const1_rtx));
10747 emit_label (align_3_label);
10750 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10754 emit_insn (gen_adddi3 (out, out, const1_rtx));
10756 emit_insn (gen_addsi3 (out, out, const1_rtx));
10759 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10760 align this loop. It gives only huge programs, but does not help to
10762 emit_label (align_4_label);
10764 mem = gen_rtx_MEM (SImode, out);
10765 emit_move_insn (scratch, mem);
10767 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10769 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10771 /* This formula yields a nonzero result iff one of the bytes is zero.
10772 This saves three branches inside loop and many cycles. */
10774 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10775 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10776 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10777 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10778 gen_int_mode (0x80808080, SImode)));
10779 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10784 rtx reg = gen_reg_rtx (SImode);
10785 rtx reg2 = gen_reg_rtx (Pmode);
10786 emit_move_insn (reg, tmpreg);
10787 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10789 /* If zero is not in the first two bytes, move two bytes forward. */
10790 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10791 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10792 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10793 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10794 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10797 /* Emit lea manually to avoid clobbering of flags. */
10798 emit_insn (gen_rtx_SET (SImode, reg2,
10799 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10801 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10802 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10803 emit_insn (gen_rtx_SET (VOIDmode, out,
10804 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10811 rtx end_2_label = gen_label_rtx ();
10812 /* Is zero in the first two bytes? */
10814 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10815 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10816 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10817 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10818 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10820 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10821 JUMP_LABEL (tmp) = end_2_label;
10823 /* Not in the first two. Move two bytes forward. */
10824 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10826 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10828 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10830 emit_label (end_2_label);
10834 /* Avoid branch in fixing the byte. */
10835 tmpreg = gen_lowpart (QImode, tmpreg);
10836 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10838 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10840 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10842 emit_label (end_0_label);
10846 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10847 rtx retval, fnaddr, callarg1, callarg2, pop;
10849 rtx use = NULL, call;
10851 if (pop == const0_rtx)
10853 if (TARGET_64BIT && pop)
10857 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10858 fnaddr = machopic_indirect_call_target (fnaddr);
10860 /* Static functions and indirect calls don't need the pic register. */
10861 if (! TARGET_64BIT && flag_pic
10862 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10863 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10864 use_reg (&use, pic_offset_table_rtx);
10866 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10868 rtx al = gen_rtx_REG (QImode, 0);
10869 emit_move_insn (al, callarg2);
10870 use_reg (&use, al);
10872 #endif /* TARGET_MACHO */
10874 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10876 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10877 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10880 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10882 call = gen_rtx_SET (VOIDmode, retval, call);
10885 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10886 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10887 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10890 call = emit_call_insn (call);
10892 CALL_INSN_FUNCTION_USAGE (call) = use;
10896 /* Clear stack slot assignments remembered from previous functions.
10897 This is called from INIT_EXPANDERS once before RTL is emitted for each
10900 static struct machine_function *
10901 ix86_init_machine_status ()
10903 return ggc_alloc_cleared (sizeof (struct machine_function));
10906 /* Return a MEM corresponding to a stack slot with mode MODE.
10907 Allocate a new slot if necessary.
10909 The RTL for a function can have several slots available: N is
10910 which slot to use. */
10913 assign_386_stack_local (mode, n)
10914 enum machine_mode mode;
10917 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10920 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10921 ix86_stack_locals[(int) mode][n]
10922 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10924 return ix86_stack_locals[(int) mode][n];
10927 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10929 static GTY(()) rtx ix86_tls_symbol;
10931 ix86_tls_get_addr ()
10934 if (!ix86_tls_symbol)
10936 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
10937 (TARGET_GNU_TLS && !TARGET_64BIT)
10938 ? "___tls_get_addr"
10939 : "__tls_get_addr");
10942 return ix86_tls_symbol;
10945 /* Calculate the length of the memory address in the instruction
10946 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10949 memory_address_length (addr)
10952 struct ix86_address parts;
10953 rtx base, index, disp;
10956 if (GET_CODE (addr) == PRE_DEC
10957 || GET_CODE (addr) == POST_INC
10958 || GET_CODE (addr) == PRE_MODIFY
10959 || GET_CODE (addr) == POST_MODIFY)
10962 if (! ix86_decompose_address (addr, &parts))
10966 index = parts.index;
10970 /* Register Indirect. */
10971 if (base && !index && !disp)
10973 /* Special cases: ebp and esp need the two-byte modrm form. */
10974 if (addr == stack_pointer_rtx
10975 || addr == arg_pointer_rtx
10976 || addr == frame_pointer_rtx
10977 || addr == hard_frame_pointer_rtx)
10981 /* Direct Addressing. */
10982 else if (disp && !base && !index)
10987 /* Find the length of the displacement constant. */
10990 if (GET_CODE (disp) == CONST_INT
10991 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10997 /* An index requires the two-byte modrm form. */
11005 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11006 is set, expect that insn have 8bit immediate alternative. */
11008 ix86_attr_length_immediate_default (insn, shortform)
11014 extract_insn_cached (insn);
11015 for (i = recog_data.n_operands - 1; i >= 0; --i)
11016 if (CONSTANT_P (recog_data.operand[i]))
11021 && GET_CODE (recog_data.operand[i]) == CONST_INT
11022 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11026 switch (get_attr_mode (insn))
11037 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11042 fatal_insn ("unknown insn mode", insn);
11048 /* Compute default value for "length_address" attribute. */
11050 ix86_attr_length_address_default (insn)
11054 extract_insn_cached (insn);
11055 for (i = recog_data.n_operands - 1; i >= 0; --i)
11056 if (GET_CODE (recog_data.operand[i]) == MEM)
11058 return memory_address_length (XEXP (recog_data.operand[i], 0));
11064 /* Return the maximum number of instructions a cpu can issue. */
11071 case PROCESSOR_PENTIUM:
11075 case PROCESSOR_PENTIUMPRO:
11076 case PROCESSOR_PENTIUM4:
11077 case PROCESSOR_ATHLON:
11085 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11086 by DEP_INSN and nothing set by DEP_INSN. */
11089 ix86_flags_dependant (insn, dep_insn, insn_type)
11090 rtx insn, dep_insn;
11091 enum attr_type insn_type;
11095 /* Simplify the test for uninteresting insns. */
11096 if (insn_type != TYPE_SETCC
11097 && insn_type != TYPE_ICMOV
11098 && insn_type != TYPE_FCMOV
11099 && insn_type != TYPE_IBR)
11102 if ((set = single_set (dep_insn)) != 0)
11104 set = SET_DEST (set);
11107 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11108 && XVECLEN (PATTERN (dep_insn), 0) == 2
11109 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11110 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11112 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11113 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11118 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11121 /* This test is true if the dependent insn reads the flags but
11122 not any other potentially set register. */
11123 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11126 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11132 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11133 address with operands set by DEP_INSN. */
11136 ix86_agi_dependant (insn, dep_insn, insn_type)
11137 rtx insn, dep_insn;
11138 enum attr_type insn_type;
11142 if (insn_type == TYPE_LEA
11145 addr = PATTERN (insn);
11146 if (GET_CODE (addr) == SET)
11148 else if (GET_CODE (addr) == PARALLEL
11149 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11150 addr = XVECEXP (addr, 0, 0);
11153 addr = SET_SRC (addr);
11158 extract_insn_cached (insn);
11159 for (i = recog_data.n_operands - 1; i >= 0; --i)
11160 if (GET_CODE (recog_data.operand[i]) == MEM)
11162 addr = XEXP (recog_data.operand[i], 0);
11169 return modified_in_p (addr, dep_insn);
11173 ix86_adjust_cost (insn, link, dep_insn, cost)
11174 rtx insn, link, dep_insn;
11177 enum attr_type insn_type, dep_insn_type;
11178 enum attr_memory memory, dep_memory;
11180 int dep_insn_code_number;
11182 /* Anti and output depenancies have zero cost on all CPUs. */
11183 if (REG_NOTE_KIND (link) != 0)
11186 dep_insn_code_number = recog_memoized (dep_insn);
11188 /* If we can't recognize the insns, we can't really do anything. */
11189 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11192 insn_type = get_attr_type (insn);
11193 dep_insn_type = get_attr_type (dep_insn);
11197 case PROCESSOR_PENTIUM:
11198 /* Address Generation Interlock adds a cycle of latency. */
11199 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11202 /* ??? Compares pair with jump/setcc. */
11203 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11206 /* Floating point stores require value to be ready one cycle ealier. */
11207 if (insn_type == TYPE_FMOV
11208 && get_attr_memory (insn) == MEMORY_STORE
11209 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11213 case PROCESSOR_PENTIUMPRO:
11214 memory = get_attr_memory (insn);
11215 dep_memory = get_attr_memory (dep_insn);
11217 /* Since we can't represent delayed latencies of load+operation,
11218 increase the cost here for non-imov insns. */
11219 if (dep_insn_type != TYPE_IMOV
11220 && dep_insn_type != TYPE_FMOV
11221 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11224 /* INT->FP conversion is expensive. */
11225 if (get_attr_fp_int_src (dep_insn))
11228 /* There is one cycle extra latency between an FP op and a store. */
11229 if (insn_type == TYPE_FMOV
11230 && (set = single_set (dep_insn)) != NULL_RTX
11231 && (set2 = single_set (insn)) != NULL_RTX
11232 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11233 && GET_CODE (SET_DEST (set2)) == MEM)
11236 /* Show ability of reorder buffer to hide latency of load by executing
11237 in parallel with previous instruction in case
11238 previous instruction is not needed to compute the address. */
11239 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11240 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11242 /* Claim moves to take one cycle, as core can issue one load
11243 at time and the next load can start cycle later. */
11244 if (dep_insn_type == TYPE_IMOV
11245 || dep_insn_type == TYPE_FMOV)
11253 memory = get_attr_memory (insn);
11254 dep_memory = get_attr_memory (dep_insn);
11255 /* The esp dependency is resolved before the instruction is really
11257 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11258 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11261 /* Since we can't represent delayed latencies of load+operation,
11262 increase the cost here for non-imov insns. */
11263 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11264 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11266 /* INT->FP conversion is expensive. */
11267 if (get_attr_fp_int_src (dep_insn))
11270 /* Show ability of reorder buffer to hide latency of load by executing
11271 in parallel with previous instruction in case
11272 previous instruction is not needed to compute the address. */
11273 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11274 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11276 /* Claim moves to take one cycle, as core can issue one load
11277 at time and the next load can start cycle later. */
11278 if (dep_insn_type == TYPE_IMOV
11279 || dep_insn_type == TYPE_FMOV)
11288 case PROCESSOR_ATHLON:
11289 memory = get_attr_memory (insn);
11290 dep_memory = get_attr_memory (dep_insn);
11292 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11294 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11299 /* Show ability of reorder buffer to hide latency of load by executing
11300 in parallel with previous instruction in case
11301 previous instruction is not needed to compute the address. */
11302 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11303 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11305 /* Claim moves to take one cycle, as core can issue one load
11306 at time and the next load can start cycle later. */
11307 if (dep_insn_type == TYPE_IMOV
11308 || dep_insn_type == TYPE_FMOV)
11310 else if (cost >= 3)
11325 struct ppro_sched_data
11328 int issued_this_cycle;
11332 static enum attr_ppro_uops
11333 ix86_safe_ppro_uops (insn)
11336 if (recog_memoized (insn) >= 0)
11337 return get_attr_ppro_uops (insn);
11339 return PPRO_UOPS_MANY;
11343 ix86_dump_ppro_packet (dump)
11346 if (ix86_sched_data.ppro.decode[0])
11348 fprintf (dump, "PPRO packet: %d",
11349 INSN_UID (ix86_sched_data.ppro.decode[0]));
11350 if (ix86_sched_data.ppro.decode[1])
11351 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11352 if (ix86_sched_data.ppro.decode[2])
11353 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11354 fputc ('\n', dump);
11358 /* We're beginning a new block. Initialize data structures as necessary. */
11361 ix86_sched_init (dump, sched_verbose, veclen)
11362 FILE *dump ATTRIBUTE_UNUSED;
11363 int sched_verbose ATTRIBUTE_UNUSED;
11364 int veclen ATTRIBUTE_UNUSED;
11366 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11369 /* Shift INSN to SLOT, and shift everything else down. */
11372 ix86_reorder_insn (insnp, slot)
11379 insnp[0] = insnp[1];
11380 while (++insnp != slot);
11386 ix86_sched_reorder_ppro (ready, e_ready)
11391 enum attr_ppro_uops cur_uops;
11392 int issued_this_cycle;
11396 /* At this point .ppro.decode contains the state of the three
11397 decoders from last "cycle". That is, those insns that were
11398 actually independent. But here we're scheduling for the
11399 decoder, and we may find things that are decodable in the
11402 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11403 issued_this_cycle = 0;
11406 cur_uops = ix86_safe_ppro_uops (*insnp);
11408 /* If the decoders are empty, and we've a complex insn at the
11409 head of the priority queue, let it issue without complaint. */
11410 if (decode[0] == NULL)
11412 if (cur_uops == PPRO_UOPS_MANY)
11414 decode[0] = *insnp;
11418 /* Otherwise, search for a 2-4 uop unsn to issue. */
11419 while (cur_uops != PPRO_UOPS_FEW)
11421 if (insnp == ready)
11423 cur_uops = ix86_safe_ppro_uops (*--insnp);
11426 /* If so, move it to the head of the line. */
11427 if (cur_uops == PPRO_UOPS_FEW)
11428 ix86_reorder_insn (insnp, e_ready);
11430 /* Issue the head of the queue. */
11431 issued_this_cycle = 1;
11432 decode[0] = *e_ready--;
11435 /* Look for simple insns to fill in the other two slots. */
11436 for (i = 1; i < 3; ++i)
11437 if (decode[i] == NULL)
11439 if (ready > e_ready)
11443 cur_uops = ix86_safe_ppro_uops (*insnp);
11444 while (cur_uops != PPRO_UOPS_ONE)
11446 if (insnp == ready)
11448 cur_uops = ix86_safe_ppro_uops (*--insnp);
11451 /* Found one. Move it to the head of the queue and issue it. */
11452 if (cur_uops == PPRO_UOPS_ONE)
11454 ix86_reorder_insn (insnp, e_ready);
11455 decode[i] = *e_ready--;
11456 issued_this_cycle++;
11460 /* ??? Didn't find one. Ideally, here we would do a lazy split
11461 of 2-uop insns, issue one and queue the other. */
11465 if (issued_this_cycle == 0)
11466 issued_this_cycle = 1;
11467 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11470 /* We are about to being issuing insns for this clock cycle.
11471 Override the default sort algorithm to better slot instructions. */
11473 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11474 FILE *dump ATTRIBUTE_UNUSED;
11475 int sched_verbose ATTRIBUTE_UNUSED;
11478 int clock_var ATTRIBUTE_UNUSED;
11480 int n_ready = *n_readyp;
11481 rtx *e_ready = ready + n_ready - 1;
11483 /* Make sure to go ahead and initialize key items in
11484 ix86_sched_data if we are not going to bother trying to
11485 reorder the ready queue. */
11488 ix86_sched_data.ppro.issued_this_cycle = 1;
11497 case PROCESSOR_PENTIUMPRO:
11498 ix86_sched_reorder_ppro (ready, e_ready);
11503 return ix86_issue_rate ();
11506 /* We are about to issue INSN. Return the number of insns left on the
11507 ready queue that can be issued this cycle. */
11510 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11514 int can_issue_more;
11520 return can_issue_more - 1;
11522 case PROCESSOR_PENTIUMPRO:
11524 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11526 if (uops == PPRO_UOPS_MANY)
11529 ix86_dump_ppro_packet (dump);
11530 ix86_sched_data.ppro.decode[0] = insn;
11531 ix86_sched_data.ppro.decode[1] = NULL;
11532 ix86_sched_data.ppro.decode[2] = NULL;
11534 ix86_dump_ppro_packet (dump);
11535 ix86_sched_data.ppro.decode[0] = NULL;
11537 else if (uops == PPRO_UOPS_FEW)
11540 ix86_dump_ppro_packet (dump);
11541 ix86_sched_data.ppro.decode[0] = insn;
11542 ix86_sched_data.ppro.decode[1] = NULL;
11543 ix86_sched_data.ppro.decode[2] = NULL;
11547 for (i = 0; i < 3; ++i)
11548 if (ix86_sched_data.ppro.decode[i] == NULL)
11550 ix86_sched_data.ppro.decode[i] = insn;
11558 ix86_dump_ppro_packet (dump);
11559 ix86_sched_data.ppro.decode[0] = NULL;
11560 ix86_sched_data.ppro.decode[1] = NULL;
11561 ix86_sched_data.ppro.decode[2] = NULL;
11565 return --ix86_sched_data.ppro.issued_this_cycle;
11570 ia32_use_dfa_pipeline_interface ()
11572 if (ix86_cpu == PROCESSOR_PENTIUM)
11577 /* How many alternative schedules to try. This should be as wide as the
11578 scheduling freedom in the DFA, but no wider. Making this value too
11579 large results extra work for the scheduler. */
11582 ia32_multipass_dfa_lookahead ()
11584 if (ix86_cpu == PROCESSOR_PENTIUM)
11591 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11592 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11596 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11598 rtx dstref, srcref, dstreg, srcreg;
11602 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11604 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11608 /* Subroutine of above to actually do the updating by recursively walking
11612 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11614 rtx dstref, srcref, dstreg, srcreg;
11616 enum rtx_code code = GET_CODE (x);
11617 const char *format_ptr = GET_RTX_FORMAT (code);
11620 if (code == MEM && XEXP (x, 0) == dstreg)
11621 MEM_COPY_ATTRIBUTES (x, dstref);
11622 else if (code == MEM && XEXP (x, 0) == srcreg)
11623 MEM_COPY_ATTRIBUTES (x, srcref);
11625 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11627 if (*format_ptr == 'e')
11628 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11630 else if (*format_ptr == 'E')
11631 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11632 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11637 /* Compute the alignment given to a constant that is being placed in memory.
11638 EXP is the constant and ALIGN is the alignment that the object would
11640 The value of this function is used instead of that alignment to align
11644 ix86_constant_alignment (exp, align)
11648 if (TREE_CODE (exp) == REAL_CST)
11650 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11652 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11655 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11662 /* Compute the alignment for a static variable.
11663 TYPE is the data type, and ALIGN is the alignment that
11664 the object would ordinarily have. The value of this function is used
11665 instead of that alignment to align the object. */
11668 ix86_data_alignment (type, align)
11672 if (AGGREGATE_TYPE_P (type)
11673 && TYPE_SIZE (type)
11674 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11675 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11676 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11679 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11680 to 16byte boundary. */
11683 if (AGGREGATE_TYPE_P (type)
11684 && TYPE_SIZE (type)
11685 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11686 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11687 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11691 if (TREE_CODE (type) == ARRAY_TYPE)
11693 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11695 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11698 else if (TREE_CODE (type) == COMPLEX_TYPE)
11701 if (TYPE_MODE (type) == DCmode && align < 64)
11703 if (TYPE_MODE (type) == XCmode && align < 128)
11706 else if ((TREE_CODE (type) == RECORD_TYPE
11707 || TREE_CODE (type) == UNION_TYPE
11708 || TREE_CODE (type) == QUAL_UNION_TYPE)
11709 && TYPE_FIELDS (type))
11711 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11713 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11716 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11717 || TREE_CODE (type) == INTEGER_TYPE)
11719 if (TYPE_MODE (type) == DFmode && align < 64)
11721 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11728 /* Compute the alignment for a local variable.
11729 TYPE is the data type, and ALIGN is the alignment that
11730 the object would ordinarily have. The value of this macro is used
11731 instead of that alignment to align the object. */
11734 ix86_local_alignment (type, align)
11738 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11739 to 16byte boundary. */
11742 if (AGGREGATE_TYPE_P (type)
11743 && TYPE_SIZE (type)
11744 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11745 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11746 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11749 if (TREE_CODE (type) == ARRAY_TYPE)
11751 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11753 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11756 else if (TREE_CODE (type) == COMPLEX_TYPE)
11758 if (TYPE_MODE (type) == DCmode && align < 64)
11760 if (TYPE_MODE (type) == XCmode && align < 128)
11763 else if ((TREE_CODE (type) == RECORD_TYPE
11764 || TREE_CODE (type) == UNION_TYPE
11765 || TREE_CODE (type) == QUAL_UNION_TYPE)
11766 && TYPE_FIELDS (type))
11768 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11770 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11773 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11774 || TREE_CODE (type) == INTEGER_TYPE)
11777 if (TYPE_MODE (type) == DFmode && align < 64)
11779 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11785 /* Emit RTL insns to initialize the variable parts of a trampoline.
11786 FNADDR is an RTX for the address of the function's pure code.
11787 CXT is an RTX for the static chain value for the function. */
11789 x86_initialize_trampoline (tramp, fnaddr, cxt)
11790 rtx tramp, fnaddr, cxt;
11794 /* Compute offset from the end of the jmp to the target function. */
11795 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11796 plus_constant (tramp, 10),
11797 NULL_RTX, 1, OPTAB_DIRECT);
11798 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11799 gen_int_mode (0xb9, QImode));
11800 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11801 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11802 gen_int_mode (0xe9, QImode));
11803 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11808 /* Try to load address using shorter movl instead of movabs.
11809 We may want to support movq for kernel mode, but kernel does not use
11810 trampolines at the moment. */
11811 if (x86_64_zero_extended_value (fnaddr))
11813 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11814 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11815 gen_int_mode (0xbb41, HImode));
11816 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11817 gen_lowpart (SImode, fnaddr));
11822 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11823 gen_int_mode (0xbb49, HImode));
11824 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11828 /* Load static chain using movabs to r10. */
11829 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11830 gen_int_mode (0xba49, HImode));
11831 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11834 /* Jump to the r11 */
11835 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11836 gen_int_mode (0xff49, HImode));
11837 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11838 gen_int_mode (0xe3, QImode));
11840 if (offset > TRAMPOLINE_SIZE)
11844 #ifdef TRANSFER_FROM_TRAMPOLINE
11845 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
11846 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11850 #define def_builtin(MASK, NAME, TYPE, CODE) \
11852 if ((MASK) & target_flags) \
11853 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11854 NULL, NULL_TREE); \
11857 struct builtin_description
11859 const unsigned int mask;
11860 const enum insn_code icode;
11861 const char *const name;
11862 const enum ix86_builtins code;
11863 const enum rtx_code comparison;
11864 const unsigned int flag;
11867 /* Used for builtins that are enabled both by -msse and -msse2. */
11868 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11870 static const struct builtin_description bdesc_comi[] =
11872 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11873 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11874 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11875 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11876 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11877 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11878 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11879 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11880 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11881 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11882 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11883 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11884 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11885 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11886 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11887 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11888 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11889 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11890 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11891 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11892 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11893 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11894 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11895 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11898 static const struct builtin_description bdesc_2arg[] =
11901 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11902 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11903 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11904 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11905 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11906 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11907 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11908 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11910 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11911 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11912 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11913 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11914 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11915 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11916 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11917 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11918 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11919 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11920 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11921 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11922 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11923 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11924 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11925 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11926 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11927 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11928 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11929 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11931 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11932 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11933 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11934 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11936 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11937 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11938 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11939 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11941 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11942 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11943 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11944 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11945 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11948 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11949 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11950 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11951 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11952 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11953 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11955 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11956 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11957 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11958 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11959 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11960 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11961 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11962 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11964 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11965 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11966 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11968 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11969 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11970 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11971 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11973 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11974 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11976 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11977 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11978 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11979 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11980 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11981 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11983 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11984 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11985 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11986 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11988 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11989 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11990 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11991 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11992 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11993 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11996 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11997 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11998 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12000 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12001 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12003 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12004 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12005 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12006 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12007 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12008 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12010 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12011 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12012 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12013 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12014 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12015 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12017 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12018 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12019 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12020 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12022 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12023 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12026 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12027 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12028 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12029 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12030 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12031 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12032 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12033 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12035 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12036 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12037 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12038 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12039 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12040 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12041 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12042 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12043 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12044 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12045 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12046 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12047 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12048 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12049 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12050 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12051 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12052 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12053 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12054 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12056 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12057 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12058 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12059 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12061 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12062 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12063 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12064 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12066 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12067 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12068 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12071 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12072 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12073 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12074 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12075 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12076 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12077 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12078 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12080 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12081 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12082 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12083 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12084 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12085 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12086 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12087 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12089 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12090 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12091 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12092 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12095 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12096 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12097 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12099 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12100 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12102 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12103 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12104 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12105 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12106 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12107 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12109 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12110 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12111 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12112 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12114 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12115 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12116 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12117 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12118 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12119 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12121 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12122 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12123 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12125 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12126 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12128 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12129 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12130 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12132 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12133 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12135 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12136 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12137 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12138 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12139 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12140 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12142 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12143 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12144 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12145 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12147 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12149 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12150 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12151 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12154 static const struct builtin_description bdesc_1arg[] =
12156 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12157 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12159 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12160 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12161 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12163 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12164 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12165 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12166 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12168 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12170 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12172 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12174 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12175 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12177 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12178 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12179 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12180 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12181 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12183 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12185 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12186 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12188 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12189 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12190 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
12194 ix86_init_builtins ()
12197 ix86_init_mmx_sse_builtins ();
12200 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12201 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12204 ix86_init_mmx_sse_builtins ()
12206 const struct builtin_description * d;
12209 tree pchar_type_node = build_pointer_type (char_type_node);
12210 tree pfloat_type_node = build_pointer_type (float_type_node);
12211 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12212 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12213 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12216 tree int_ftype_v4sf_v4sf
12217 = build_function_type_list (integer_type_node,
12218 V4SF_type_node, V4SF_type_node, NULL_TREE);
12219 tree v4si_ftype_v4sf_v4sf
12220 = build_function_type_list (V4SI_type_node,
12221 V4SF_type_node, V4SF_type_node, NULL_TREE);
12222 /* MMX/SSE/integer conversions. */
12223 tree int_ftype_v4sf
12224 = build_function_type_list (integer_type_node,
12225 V4SF_type_node, NULL_TREE);
12226 tree int_ftype_v8qi
12227 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12228 tree v4sf_ftype_v4sf_int
12229 = build_function_type_list (V4SF_type_node,
12230 V4SF_type_node, integer_type_node, NULL_TREE);
12231 tree v4sf_ftype_v4sf_v2si
12232 = build_function_type_list (V4SF_type_node,
12233 V4SF_type_node, V2SI_type_node, NULL_TREE);
12234 tree int_ftype_v4hi_int
12235 = build_function_type_list (integer_type_node,
12236 V4HI_type_node, integer_type_node, NULL_TREE);
12237 tree v4hi_ftype_v4hi_int_int
12238 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12239 integer_type_node, integer_type_node,
12241 /* Miscellaneous. */
12242 tree v8qi_ftype_v4hi_v4hi
12243 = build_function_type_list (V8QI_type_node,
12244 V4HI_type_node, V4HI_type_node, NULL_TREE);
12245 tree v4hi_ftype_v2si_v2si
12246 = build_function_type_list (V4HI_type_node,
12247 V2SI_type_node, V2SI_type_node, NULL_TREE);
12248 tree v4sf_ftype_v4sf_v4sf_int
12249 = build_function_type_list (V4SF_type_node,
12250 V4SF_type_node, V4SF_type_node,
12251 integer_type_node, NULL_TREE);
12252 tree v2si_ftype_v4hi_v4hi
12253 = build_function_type_list (V2SI_type_node,
12254 V4HI_type_node, V4HI_type_node, NULL_TREE);
12255 tree v4hi_ftype_v4hi_int
12256 = build_function_type_list (V4HI_type_node,
12257 V4HI_type_node, integer_type_node, NULL_TREE);
12258 tree v4hi_ftype_v4hi_di
12259 = build_function_type_list (V4HI_type_node,
12260 V4HI_type_node, long_long_unsigned_type_node,
12262 tree v2si_ftype_v2si_di
12263 = build_function_type_list (V2SI_type_node,
12264 V2SI_type_node, long_long_unsigned_type_node,
12266 tree void_ftype_void
12267 = build_function_type (void_type_node, void_list_node);
12268 tree void_ftype_unsigned
12269 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12270 tree unsigned_ftype_void
12271 = build_function_type (unsigned_type_node, void_list_node);
12273 = build_function_type (long_long_unsigned_type_node, void_list_node);
12274 tree v4sf_ftype_void
12275 = build_function_type (V4SF_type_node, void_list_node);
12276 tree v2si_ftype_v4sf
12277 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12278 /* Loads/stores. */
12279 tree void_ftype_v8qi_v8qi_pchar
12280 = build_function_type_list (void_type_node,
12281 V8QI_type_node, V8QI_type_node,
12282 pchar_type_node, NULL_TREE);
12283 tree v4sf_ftype_pfloat
12284 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12285 /* @@@ the type is bogus */
12286 tree v4sf_ftype_v4sf_pv2si
12287 = build_function_type_list (V4SF_type_node,
12288 V4SF_type_node, pv2di_type_node, NULL_TREE);
12289 tree void_ftype_pv2si_v4sf
12290 = build_function_type_list (void_type_node,
12291 pv2di_type_node, V4SF_type_node, NULL_TREE);
12292 tree void_ftype_pfloat_v4sf
12293 = build_function_type_list (void_type_node,
12294 pfloat_type_node, V4SF_type_node, NULL_TREE);
12295 tree void_ftype_pdi_di
12296 = build_function_type_list (void_type_node,
12297 pdi_type_node, long_long_unsigned_type_node,
12299 tree void_ftype_pv2di_v2di
12300 = build_function_type_list (void_type_node,
12301 pv2di_type_node, V2DI_type_node, NULL_TREE);
12302 /* Normal vector unops. */
12303 tree v4sf_ftype_v4sf
12304 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12306 /* Normal vector binops. */
12307 tree v4sf_ftype_v4sf_v4sf
12308 = build_function_type_list (V4SF_type_node,
12309 V4SF_type_node, V4SF_type_node, NULL_TREE);
12310 tree v8qi_ftype_v8qi_v8qi
12311 = build_function_type_list (V8QI_type_node,
12312 V8QI_type_node, V8QI_type_node, NULL_TREE);
12313 tree v4hi_ftype_v4hi_v4hi
12314 = build_function_type_list (V4HI_type_node,
12315 V4HI_type_node, V4HI_type_node, NULL_TREE);
12316 tree v2si_ftype_v2si_v2si
12317 = build_function_type_list (V2SI_type_node,
12318 V2SI_type_node, V2SI_type_node, NULL_TREE);
12319 tree di_ftype_di_di
12320 = build_function_type_list (long_long_unsigned_type_node,
12321 long_long_unsigned_type_node,
12322 long_long_unsigned_type_node, NULL_TREE);
12324 tree v2si_ftype_v2sf
12325 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12326 tree v2sf_ftype_v2si
12327 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12328 tree v2si_ftype_v2si
12329 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12330 tree v2sf_ftype_v2sf
12331 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12332 tree v2sf_ftype_v2sf_v2sf
12333 = build_function_type_list (V2SF_type_node,
12334 V2SF_type_node, V2SF_type_node, NULL_TREE);
12335 tree v2si_ftype_v2sf_v2sf
12336 = build_function_type_list (V2SI_type_node,
12337 V2SF_type_node, V2SF_type_node, NULL_TREE);
12338 tree pint_type_node = build_pointer_type (integer_type_node);
12339 tree pdouble_type_node = build_pointer_type (double_type_node);
12340 tree int_ftype_v2df_v2df
12341 = build_function_type_list (integer_type_node,
12342 V2DF_type_node, V2DF_type_node, NULL_TREE);
12345 = build_function_type (intTI_type_node, void_list_node);
12346 tree ti_ftype_ti_ti
12347 = build_function_type_list (intTI_type_node,
12348 intTI_type_node, intTI_type_node, NULL_TREE);
12349 tree void_ftype_pvoid
12350 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12352 = build_function_type_list (V2DI_type_node,
12353 long_long_unsigned_type_node, NULL_TREE);
12354 tree v4sf_ftype_v4si
12355 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12356 tree v4si_ftype_v4sf
12357 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12358 tree v2df_ftype_v4si
12359 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12360 tree v4si_ftype_v2df
12361 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12362 tree v2si_ftype_v2df
12363 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12364 tree v4sf_ftype_v2df
12365 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12366 tree v2df_ftype_v2si
12367 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12368 tree v2df_ftype_v4sf
12369 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12370 tree int_ftype_v2df
12371 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12372 tree v2df_ftype_v2df_int
12373 = build_function_type_list (V2DF_type_node,
12374 V2DF_type_node, integer_type_node, NULL_TREE);
12375 tree v4sf_ftype_v4sf_v2df
12376 = build_function_type_list (V4SF_type_node,
12377 V4SF_type_node, V2DF_type_node, NULL_TREE);
12378 tree v2df_ftype_v2df_v4sf
12379 = build_function_type_list (V2DF_type_node,
12380 V2DF_type_node, V4SF_type_node, NULL_TREE);
12381 tree v2df_ftype_v2df_v2df_int
12382 = build_function_type_list (V2DF_type_node,
12383 V2DF_type_node, V2DF_type_node,
12386 tree v2df_ftype_v2df_pv2si
12387 = build_function_type_list (V2DF_type_node,
12388 V2DF_type_node, pv2si_type_node, NULL_TREE);
12389 tree void_ftype_pv2si_v2df
12390 = build_function_type_list (void_type_node,
12391 pv2si_type_node, V2DF_type_node, NULL_TREE);
12392 tree void_ftype_pdouble_v2df
12393 = build_function_type_list (void_type_node,
12394 pdouble_type_node, V2DF_type_node, NULL_TREE);
12395 tree void_ftype_pint_int
12396 = build_function_type_list (void_type_node,
12397 pint_type_node, integer_type_node, NULL_TREE);
12398 tree void_ftype_v16qi_v16qi_pchar
12399 = build_function_type_list (void_type_node,
12400 V16QI_type_node, V16QI_type_node,
12401 pchar_type_node, NULL_TREE);
12402 tree v2df_ftype_pdouble
12403 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12404 tree v2df_ftype_v2df_v2df
12405 = build_function_type_list (V2DF_type_node,
12406 V2DF_type_node, V2DF_type_node, NULL_TREE);
12407 tree v16qi_ftype_v16qi_v16qi
12408 = build_function_type_list (V16QI_type_node,
12409 V16QI_type_node, V16QI_type_node, NULL_TREE);
12410 tree v8hi_ftype_v8hi_v8hi
12411 = build_function_type_list (V8HI_type_node,
12412 V8HI_type_node, V8HI_type_node, NULL_TREE);
12413 tree v4si_ftype_v4si_v4si
12414 = build_function_type_list (V4SI_type_node,
12415 V4SI_type_node, V4SI_type_node, NULL_TREE);
12416 tree v2di_ftype_v2di_v2di
12417 = build_function_type_list (V2DI_type_node,
12418 V2DI_type_node, V2DI_type_node, NULL_TREE);
12419 tree v2di_ftype_v2df_v2df
12420 = build_function_type_list (V2DI_type_node,
12421 V2DF_type_node, V2DF_type_node, NULL_TREE);
12422 tree v2df_ftype_v2df
12423 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12424 tree v2df_ftype_double
12425 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12426 tree v2df_ftype_double_double
12427 = build_function_type_list (V2DF_type_node,
12428 double_type_node, double_type_node, NULL_TREE);
12429 tree int_ftype_v8hi_int
12430 = build_function_type_list (integer_type_node,
12431 V8HI_type_node, integer_type_node, NULL_TREE);
12432 tree v8hi_ftype_v8hi_int_int
12433 = build_function_type_list (V8HI_type_node,
12434 V8HI_type_node, integer_type_node,
12435 integer_type_node, NULL_TREE);
12436 tree v2di_ftype_v2di_int
12437 = build_function_type_list (V2DI_type_node,
12438 V2DI_type_node, integer_type_node, NULL_TREE);
12439 tree v4si_ftype_v4si_int
12440 = build_function_type_list (V4SI_type_node,
12441 V4SI_type_node, integer_type_node, NULL_TREE);
12442 tree v8hi_ftype_v8hi_int
12443 = build_function_type_list (V8HI_type_node,
12444 V8HI_type_node, integer_type_node, NULL_TREE);
12445 tree v8hi_ftype_v8hi_v2di
12446 = build_function_type_list (V8HI_type_node,
12447 V8HI_type_node, V2DI_type_node, NULL_TREE);
12448 tree v4si_ftype_v4si_v2di
12449 = build_function_type_list (V4SI_type_node,
12450 V4SI_type_node, V2DI_type_node, NULL_TREE);
12451 tree v4si_ftype_v8hi_v8hi
12452 = build_function_type_list (V4SI_type_node,
12453 V8HI_type_node, V8HI_type_node, NULL_TREE);
12454 tree di_ftype_v8qi_v8qi
12455 = build_function_type_list (long_long_unsigned_type_node,
12456 V8QI_type_node, V8QI_type_node, NULL_TREE);
12457 tree v2di_ftype_v16qi_v16qi
12458 = build_function_type_list (V2DI_type_node,
12459 V16QI_type_node, V16QI_type_node, NULL_TREE);
12460 tree int_ftype_v16qi
12461 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12463 /* Add all builtins that are more or less simple operations on two
12465 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12467 /* Use one of the operands; the target can have a different mode for
12468 mask-generating compares. */
12469 enum machine_mode mode;
12474 mode = insn_data[d->icode].operand[1].mode;
12479 type = v16qi_ftype_v16qi_v16qi;
12482 type = v8hi_ftype_v8hi_v8hi;
12485 type = v4si_ftype_v4si_v4si;
12488 type = v2di_ftype_v2di_v2di;
12491 type = v2df_ftype_v2df_v2df;
12494 type = ti_ftype_ti_ti;
12497 type = v4sf_ftype_v4sf_v4sf;
12500 type = v8qi_ftype_v8qi_v8qi;
12503 type = v4hi_ftype_v4hi_v4hi;
12506 type = v2si_ftype_v2si_v2si;
12509 type = di_ftype_di_di;
12516 /* Override for comparisons. */
12517 if (d->icode == CODE_FOR_maskcmpv4sf3
12518 || d->icode == CODE_FOR_maskncmpv4sf3
12519 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12520 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12521 type = v4si_ftype_v4sf_v4sf;
12523 if (d->icode == CODE_FOR_maskcmpv2df3
12524 || d->icode == CODE_FOR_maskncmpv2df3
12525 || d->icode == CODE_FOR_vmmaskcmpv2df3
12526 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12527 type = v2di_ftype_v2df_v2df;
12529 def_builtin (d->mask, d->name, type, d->code);
12532 /* Add the remaining MMX insns with somewhat more complicated types. */
12533 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12534 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12535 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12536 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12537 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12538 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12539 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12541 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12542 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12543 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12545 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12546 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12548 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12549 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12551 /* comi/ucomi insns. */
12552 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12553 if (d->mask == MASK_SSE2)
12554 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12556 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12558 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12559 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12560 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12562 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12563 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12564 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12565 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12566 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12567 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12569 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12570 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12572 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12574 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12575 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12576 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12577 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12578 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12579 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12581 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12582 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12583 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12584 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12586 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12587 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12588 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12589 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12591 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12593 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12595 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12596 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12597 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12598 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12599 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12600 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12602 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12604 /* Original 3DNow! */
12605 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12606 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12607 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12608 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12609 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12610 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12611 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12612 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12613 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12614 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12615 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12616 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12617 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12618 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12619 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12620 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12621 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12622 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12623 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12624 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12626 /* 3DNow! extension as used in the Athlon CPU. */
12627 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12628 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12629 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12630 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12631 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12632 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12634 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12637 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12638 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12640 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12641 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12643 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12644 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12645 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12646 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12647 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12648 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12650 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12651 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12652 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12653 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12655 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12656 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12657 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12658 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12659 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12661 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12662 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12663 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12664 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12666 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12667 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12669 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12671 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12672 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12674 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12675 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12676 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12677 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12678 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12680 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12682 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12683 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12685 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12686 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12687 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12689 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12690 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12691 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12693 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12694 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12695 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12696 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12697 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12698 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12699 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12701 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12702 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12703 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12705 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12706 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12707 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12709 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12710 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12711 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12713 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12714 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12716 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12717 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12718 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12719 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12721 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12722 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12723 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12724 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12726 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12727 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12729 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12732 /* Errors in the source file can cause expand_expr to return const0_rtx
12733 where we expect a vector. To avoid crashing, use one of the vector
12734 clear instructions. */
12736 safe_vector_operand (x, mode)
12738 enum machine_mode mode;
12740 if (x != const0_rtx)
12742 x = gen_reg_rtx (mode);
12744 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12745 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12746 : gen_rtx_SUBREG (DImode, x, 0)));
12748 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12749 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12753 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12756 ix86_expand_binop_builtin (icode, arglist, target)
12757 enum insn_code icode;
12762 tree arg0 = TREE_VALUE (arglist);
12763 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12764 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12765 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12766 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12767 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12768 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12770 if (VECTOR_MODE_P (mode0))
12771 op0 = safe_vector_operand (op0, mode0);
12772 if (VECTOR_MODE_P (mode1))
12773 op1 = safe_vector_operand (op1, mode1);
12776 || GET_MODE (target) != tmode
12777 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12778 target = gen_reg_rtx (tmode);
12780 /* In case the insn wants input operands in modes different from
12781 the result, abort. */
12782 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12785 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12786 op0 = copy_to_mode_reg (mode0, op0);
12787 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12788 op1 = copy_to_mode_reg (mode1, op1);
12790 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12791 yet one of the two must not be a memory. This is normally enforced
12792 by expanders, but we didn't bother to create one here. */
12793 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12794 op0 = copy_to_mode_reg (mode0, op0);
12796 pat = GEN_FCN (icode) (target, op0, op1);
12803 /* Subroutine of ix86_expand_builtin to take care of stores. */
12806 ix86_expand_store_builtin (icode, arglist)
12807 enum insn_code icode;
12811 tree arg0 = TREE_VALUE (arglist);
12812 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12813 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12814 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12815 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12816 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12818 if (VECTOR_MODE_P (mode1))
12819 op1 = safe_vector_operand (op1, mode1);
12821 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12823 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12824 op1 = copy_to_mode_reg (mode1, op1);
12826 pat = GEN_FCN (icode) (op0, op1);
12832 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12835 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12836 enum insn_code icode;
12842 tree arg0 = TREE_VALUE (arglist);
12843 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12844 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12845 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12848 || GET_MODE (target) != tmode
12849 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12850 target = gen_reg_rtx (tmode);
12852 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12855 if (VECTOR_MODE_P (mode0))
12856 op0 = safe_vector_operand (op0, mode0);
12858 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12859 op0 = copy_to_mode_reg (mode0, op0);
12862 pat = GEN_FCN (icode) (target, op0);
12869 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12870 sqrtss, rsqrtss, rcpss. */
12873 ix86_expand_unop1_builtin (icode, arglist, target)
12874 enum insn_code icode;
12879 tree arg0 = TREE_VALUE (arglist);
12880 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12881 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12882 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12885 || GET_MODE (target) != tmode
12886 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12887 target = gen_reg_rtx (tmode);
12889 if (VECTOR_MODE_P (mode0))
12890 op0 = safe_vector_operand (op0, mode0);
12892 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12893 op0 = copy_to_mode_reg (mode0, op0);
12896 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12897 op1 = copy_to_mode_reg (mode0, op1);
12899 pat = GEN_FCN (icode) (target, op0, op1);
12906 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12909 ix86_expand_sse_compare (d, arglist, target)
12910 const struct builtin_description *d;
12915 tree arg0 = TREE_VALUE (arglist);
12916 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12917 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12918 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12920 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12921 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12922 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12923 enum rtx_code comparison = d->comparison;
12925 if (VECTOR_MODE_P (mode0))
12926 op0 = safe_vector_operand (op0, mode0);
12927 if (VECTOR_MODE_P (mode1))
12928 op1 = safe_vector_operand (op1, mode1);
12930 /* Swap operands if we have a comparison that isn't available in
12934 rtx tmp = gen_reg_rtx (mode1);
12935 emit_move_insn (tmp, op1);
12941 || GET_MODE (target) != tmode
12942 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12943 target = gen_reg_rtx (tmode);
12945 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12946 op0 = copy_to_mode_reg (mode0, op0);
12947 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12948 op1 = copy_to_mode_reg (mode1, op1);
12950 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12951 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12958 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12961 ix86_expand_sse_comi (d, arglist, target)
12962 const struct builtin_description *d;
12967 tree arg0 = TREE_VALUE (arglist);
12968 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12969 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12970 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12972 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12973 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12974 enum rtx_code comparison = d->comparison;
12976 if (VECTOR_MODE_P (mode0))
12977 op0 = safe_vector_operand (op0, mode0);
12978 if (VECTOR_MODE_P (mode1))
12979 op1 = safe_vector_operand (op1, mode1);
12981 /* Swap operands if we have a comparison that isn't available in
12990 target = gen_reg_rtx (SImode);
12991 emit_move_insn (target, const0_rtx);
12992 target = gen_rtx_SUBREG (QImode, target, 0);
12994 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12995 op0 = copy_to_mode_reg (mode0, op0);
12996 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12997 op1 = copy_to_mode_reg (mode1, op1);
12999 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13000 pat = GEN_FCN (d->icode) (op0, op1);
13004 emit_insn (gen_rtx_SET (VOIDmode,
13005 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13006 gen_rtx_fmt_ee (comparison, QImode,
13010 return SUBREG_REG (target);
13013 /* Expand an expression EXP that calls a built-in function,
13014 with result going to TARGET if that's convenient
13015 (and in mode MODE if that's convenient).
13016 SUBTARGET may be used as the target for computing one of EXP's operands.
13017 IGNORE is nonzero if the value is to be ignored. */
13020 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13023 rtx subtarget ATTRIBUTE_UNUSED;
13024 enum machine_mode mode ATTRIBUTE_UNUSED;
13025 int ignore ATTRIBUTE_UNUSED;
13027 const struct builtin_description *d;
13029 enum insn_code icode;
13030 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13031 tree arglist = TREE_OPERAND (exp, 1);
13032 tree arg0, arg1, arg2;
13033 rtx op0, op1, op2, pat;
13034 enum machine_mode tmode, mode0, mode1, mode2;
13035 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13039 case IX86_BUILTIN_EMMS:
13040 emit_insn (gen_emms ());
13043 case IX86_BUILTIN_SFENCE:
13044 emit_insn (gen_sfence ());
13047 case IX86_BUILTIN_PEXTRW:
13048 case IX86_BUILTIN_PEXTRW128:
13049 icode = (fcode == IX86_BUILTIN_PEXTRW
13050 ? CODE_FOR_mmx_pextrw
13051 : CODE_FOR_sse2_pextrw);
13052 arg0 = TREE_VALUE (arglist);
13053 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13054 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13055 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13056 tmode = insn_data[icode].operand[0].mode;
13057 mode0 = insn_data[icode].operand[1].mode;
13058 mode1 = insn_data[icode].operand[2].mode;
13060 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13061 op0 = copy_to_mode_reg (mode0, op0);
13062 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13064 /* @@@ better error message */
13065 error ("selector must be an immediate");
13066 return gen_reg_rtx (tmode);
13069 || GET_MODE (target) != tmode
13070 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13071 target = gen_reg_rtx (tmode);
13072 pat = GEN_FCN (icode) (target, op0, op1);
13078 case IX86_BUILTIN_PINSRW:
13079 case IX86_BUILTIN_PINSRW128:
13080 icode = (fcode == IX86_BUILTIN_PINSRW
13081 ? CODE_FOR_mmx_pinsrw
13082 : CODE_FOR_sse2_pinsrw);
13083 arg0 = TREE_VALUE (arglist);
13084 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13085 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13086 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13087 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13088 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13089 tmode = insn_data[icode].operand[0].mode;
13090 mode0 = insn_data[icode].operand[1].mode;
13091 mode1 = insn_data[icode].operand[2].mode;
13092 mode2 = insn_data[icode].operand[3].mode;
13094 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13095 op0 = copy_to_mode_reg (mode0, op0);
13096 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13097 op1 = copy_to_mode_reg (mode1, op1);
13098 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13100 /* @@@ better error message */
13101 error ("selector must be an immediate");
13105 || GET_MODE (target) != tmode
13106 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13107 target = gen_reg_rtx (tmode);
13108 pat = GEN_FCN (icode) (target, op0, op1, op2);
13114 case IX86_BUILTIN_MASKMOVQ:
13115 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13116 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13117 : CODE_FOR_sse2_maskmovdqu);
13118 /* Note the arg order is different from the operand order. */
13119 arg1 = TREE_VALUE (arglist);
13120 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13121 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13122 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13123 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13124 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13125 mode0 = insn_data[icode].operand[0].mode;
13126 mode1 = insn_data[icode].operand[1].mode;
13127 mode2 = insn_data[icode].operand[2].mode;
13129 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13130 op0 = copy_to_mode_reg (mode0, op0);
13131 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13132 op1 = copy_to_mode_reg (mode1, op1);
13133 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13134 op2 = copy_to_mode_reg (mode2, op2);
13135 pat = GEN_FCN (icode) (op0, op1, op2);
13141 case IX86_BUILTIN_SQRTSS:
13142 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13143 case IX86_BUILTIN_RSQRTSS:
13144 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13145 case IX86_BUILTIN_RCPSS:
13146 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13148 case IX86_BUILTIN_LOADAPS:
13149 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13151 case IX86_BUILTIN_LOADUPS:
13152 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13154 case IX86_BUILTIN_STOREAPS:
13155 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13156 case IX86_BUILTIN_STOREUPS:
13157 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13159 case IX86_BUILTIN_LOADSS:
13160 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13162 case IX86_BUILTIN_STORESS:
13163 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13165 case IX86_BUILTIN_LOADHPS:
13166 case IX86_BUILTIN_LOADLPS:
13167 case IX86_BUILTIN_LOADHPD:
13168 case IX86_BUILTIN_LOADLPD:
13169 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13170 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13171 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13172 : CODE_FOR_sse2_movlpd);
13173 arg0 = TREE_VALUE (arglist);
13174 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13175 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13176 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13177 tmode = insn_data[icode].operand[0].mode;
13178 mode0 = insn_data[icode].operand[1].mode;
13179 mode1 = insn_data[icode].operand[2].mode;
13181 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13182 op0 = copy_to_mode_reg (mode0, op0);
13183 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13185 || GET_MODE (target) != tmode
13186 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13187 target = gen_reg_rtx (tmode);
13188 pat = GEN_FCN (icode) (target, op0, op1);
13194 case IX86_BUILTIN_STOREHPS:
13195 case IX86_BUILTIN_STORELPS:
13196 case IX86_BUILTIN_STOREHPD:
13197 case IX86_BUILTIN_STORELPD:
13198 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13199 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13200 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13201 : CODE_FOR_sse2_movlpd);
13202 arg0 = TREE_VALUE (arglist);
13203 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13204 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13205 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13206 mode0 = insn_data[icode].operand[1].mode;
13207 mode1 = insn_data[icode].operand[2].mode;
13209 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13210 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13211 op1 = copy_to_mode_reg (mode1, op1);
13213 pat = GEN_FCN (icode) (op0, op0, op1);
13219 case IX86_BUILTIN_MOVNTPS:
13220 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13221 case IX86_BUILTIN_MOVNTQ:
13222 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13224 case IX86_BUILTIN_LDMXCSR:
13225 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13226 target = assign_386_stack_local (SImode, 0);
13227 emit_move_insn (target, op0);
13228 emit_insn (gen_ldmxcsr (target));
13231 case IX86_BUILTIN_STMXCSR:
13232 target = assign_386_stack_local (SImode, 0);
13233 emit_insn (gen_stmxcsr (target));
13234 return copy_to_mode_reg (SImode, target);
13236 case IX86_BUILTIN_SHUFPS:
13237 case IX86_BUILTIN_SHUFPD:
13238 icode = (fcode == IX86_BUILTIN_SHUFPS
13239 ? CODE_FOR_sse_shufps
13240 : CODE_FOR_sse2_shufpd);
13241 arg0 = TREE_VALUE (arglist);
13242 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13243 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13244 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13245 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13246 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13247 tmode = insn_data[icode].operand[0].mode;
13248 mode0 = insn_data[icode].operand[1].mode;
13249 mode1 = insn_data[icode].operand[2].mode;
13250 mode2 = insn_data[icode].operand[3].mode;
13252 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13253 op0 = copy_to_mode_reg (mode0, op0);
13254 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13255 op1 = copy_to_mode_reg (mode1, op1);
13256 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13258 /* @@@ better error message */
13259 error ("mask must be an immediate");
13260 return gen_reg_rtx (tmode);
13263 || GET_MODE (target) != tmode
13264 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13265 target = gen_reg_rtx (tmode);
13266 pat = GEN_FCN (icode) (target, op0, op1, op2);
13272 case IX86_BUILTIN_PSHUFW:
13273 case IX86_BUILTIN_PSHUFD:
13274 case IX86_BUILTIN_PSHUFHW:
13275 case IX86_BUILTIN_PSHUFLW:
13276 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13277 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13278 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13279 : CODE_FOR_mmx_pshufw);
13280 arg0 = TREE_VALUE (arglist);
13281 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13282 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13283 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13284 tmode = insn_data[icode].operand[0].mode;
13285 mode1 = insn_data[icode].operand[1].mode;
13286 mode2 = insn_data[icode].operand[2].mode;
13288 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13289 op0 = copy_to_mode_reg (mode1, op0);
13290 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13292 /* @@@ better error message */
13293 error ("mask must be an immediate");
13297 || GET_MODE (target) != tmode
13298 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13299 target = gen_reg_rtx (tmode);
13300 pat = GEN_FCN (icode) (target, op0, op1);
13306 case IX86_BUILTIN_PSLLDQI128:
13307 case IX86_BUILTIN_PSRLDQI128:
13308 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13309 : CODE_FOR_sse2_lshrti3);
13310 arg0 = TREE_VALUE (arglist);
13311 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13312 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13313 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13314 tmode = insn_data[icode].operand[0].mode;
13315 mode1 = insn_data[icode].operand[1].mode;
13316 mode2 = insn_data[icode].operand[2].mode;
13318 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13320 op0 = copy_to_reg (op0);
13321 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13323 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13325 error ("shift must be an immediate");
13328 target = gen_reg_rtx (V2DImode);
13329 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13335 case IX86_BUILTIN_FEMMS:
13336 emit_insn (gen_femms ());
13339 case IX86_BUILTIN_PAVGUSB:
13340 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13342 case IX86_BUILTIN_PF2ID:
13343 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13345 case IX86_BUILTIN_PFACC:
13346 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13348 case IX86_BUILTIN_PFADD:
13349 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13351 case IX86_BUILTIN_PFCMPEQ:
13352 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13354 case IX86_BUILTIN_PFCMPGE:
13355 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13357 case IX86_BUILTIN_PFCMPGT:
13358 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13360 case IX86_BUILTIN_PFMAX:
13361 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13363 case IX86_BUILTIN_PFMIN:
13364 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13366 case IX86_BUILTIN_PFMUL:
13367 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13369 case IX86_BUILTIN_PFRCP:
13370 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13372 case IX86_BUILTIN_PFRCPIT1:
13373 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13375 case IX86_BUILTIN_PFRCPIT2:
13376 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13378 case IX86_BUILTIN_PFRSQIT1:
13379 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13381 case IX86_BUILTIN_PFRSQRT:
13382 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13384 case IX86_BUILTIN_PFSUB:
13385 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13387 case IX86_BUILTIN_PFSUBR:
13388 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13390 case IX86_BUILTIN_PI2FD:
13391 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13393 case IX86_BUILTIN_PMULHRW:
13394 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13396 case IX86_BUILTIN_PF2IW:
13397 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13399 case IX86_BUILTIN_PFNACC:
13400 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13402 case IX86_BUILTIN_PFPNACC:
13403 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13405 case IX86_BUILTIN_PI2FW:
13406 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13408 case IX86_BUILTIN_PSWAPDSI:
13409 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13411 case IX86_BUILTIN_PSWAPDSF:
13412 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13414 case IX86_BUILTIN_SSE_ZERO:
13415 target = gen_reg_rtx (V4SFmode);
13416 emit_insn (gen_sse_clrv4sf (target));
13419 case IX86_BUILTIN_MMX_ZERO:
13420 target = gen_reg_rtx (DImode);
13421 emit_insn (gen_mmx_clrdi (target));
13424 case IX86_BUILTIN_SQRTSD:
13425 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13426 case IX86_BUILTIN_LOADAPD:
13427 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13428 case IX86_BUILTIN_LOADUPD:
13429 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13431 case IX86_BUILTIN_STOREAPD:
13432 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13433 case IX86_BUILTIN_STOREUPD:
13434 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13436 case IX86_BUILTIN_LOADSD:
13437 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13439 case IX86_BUILTIN_STORESD:
13440 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13442 case IX86_BUILTIN_SETPD1:
13443 target = assign_386_stack_local (DFmode, 0);
13444 arg0 = TREE_VALUE (arglist);
13445 emit_move_insn (adjust_address (target, DFmode, 0),
13446 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13447 op0 = gen_reg_rtx (V2DFmode);
13448 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13449 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13452 case IX86_BUILTIN_SETPD:
13453 target = assign_386_stack_local (V2DFmode, 0);
13454 arg0 = TREE_VALUE (arglist);
13455 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13456 emit_move_insn (adjust_address (target, DFmode, 0),
13457 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13458 emit_move_insn (adjust_address (target, DFmode, 8),
13459 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13460 op0 = gen_reg_rtx (V2DFmode);
13461 emit_insn (gen_sse2_movapd (op0, target));
13464 case IX86_BUILTIN_LOADRPD:
13465 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13466 gen_reg_rtx (V2DFmode), 1);
13467 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13470 case IX86_BUILTIN_LOADPD1:
13471 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13472 gen_reg_rtx (V2DFmode), 1);
13473 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13476 case IX86_BUILTIN_STOREPD1:
13477 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13478 case IX86_BUILTIN_STORERPD:
13479 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13481 case IX86_BUILTIN_CLRPD:
13482 target = gen_reg_rtx (V2DFmode);
13483 emit_insn (gen_sse_clrv2df (target));
13486 case IX86_BUILTIN_MFENCE:
13487 emit_insn (gen_sse2_mfence ());
13489 case IX86_BUILTIN_LFENCE:
13490 emit_insn (gen_sse2_lfence ());
13493 case IX86_BUILTIN_CLFLUSH:
13494 arg0 = TREE_VALUE (arglist);
13495 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13496 icode = CODE_FOR_sse2_clflush;
13497 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13498 op0 = copy_to_mode_reg (Pmode, op0);
13500 emit_insn (gen_sse2_clflush (op0));
13503 case IX86_BUILTIN_MOVNTPD:
13504 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13505 case IX86_BUILTIN_MOVNTDQ:
13506 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13507 case IX86_BUILTIN_MOVNTI:
13508 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13514 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13515 if (d->code == fcode)
13517 /* Compares are treated specially. */
13518 if (d->icode == CODE_FOR_maskcmpv4sf3
13519 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13520 || d->icode == CODE_FOR_maskncmpv4sf3
13521 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13522 || d->icode == CODE_FOR_maskcmpv2df3
13523 || d->icode == CODE_FOR_vmmaskcmpv2df3
13524 || d->icode == CODE_FOR_maskncmpv2df3
13525 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13526 return ix86_expand_sse_compare (d, arglist, target);
13528 return ix86_expand_binop_builtin (d->icode, arglist, target);
13531 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13532 if (d->code == fcode)
13533 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13535 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13536 if (d->code == fcode)
13537 return ix86_expand_sse_comi (d, arglist, target);
13539 /* @@@ Should really do something sensible here. */
13543 /* Store OPERAND to the memory after reload is completed. This means
13544 that we can't easily use assign_stack_local. */
13546 ix86_force_to_memory (mode, operand)
13547 enum machine_mode mode;
13551 if (!reload_completed)
13553 if (TARGET_64BIT && TARGET_RED_ZONE)
13555 result = gen_rtx_MEM (mode,
13556 gen_rtx_PLUS (Pmode,
13558 GEN_INT (-RED_ZONE_SIZE)));
13559 emit_move_insn (result, operand);
13561 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13567 operand = gen_lowpart (DImode, operand);
13571 gen_rtx_SET (VOIDmode,
13572 gen_rtx_MEM (DImode,
13573 gen_rtx_PRE_DEC (DImode,
13574 stack_pointer_rtx)),
13580 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13589 split_di (&operand, 1, operands, operands + 1);
13591 gen_rtx_SET (VOIDmode,
13592 gen_rtx_MEM (SImode,
13593 gen_rtx_PRE_DEC (Pmode,
13594 stack_pointer_rtx)),
13597 gen_rtx_SET (VOIDmode,
13598 gen_rtx_MEM (SImode,
13599 gen_rtx_PRE_DEC (Pmode,
13600 stack_pointer_rtx)),
13605 /* It is better to store HImodes as SImodes. */
13606 if (!TARGET_PARTIAL_REG_STALL)
13607 operand = gen_lowpart (SImode, operand);
13611 gen_rtx_SET (VOIDmode,
13612 gen_rtx_MEM (GET_MODE (operand),
13613 gen_rtx_PRE_DEC (SImode,
13614 stack_pointer_rtx)),
13620 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13625 /* Free operand from the memory. */
13627 ix86_free_from_memory (mode)
13628 enum machine_mode mode;
13630 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13634 if (mode == DImode || TARGET_64BIT)
13636 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13640 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13641 to pop or add instruction if registers are available. */
13642 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13643 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13648 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13649 QImode must go into class Q_REGS.
13650 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13651 movdf to do mem-to-mem moves through integer regs. */
13653 ix86_preferred_reload_class (x, class)
13655 enum reg_class class;
13657 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13659 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13661 /* SSE can't load any constant directly yet. */
13662 if (SSE_CLASS_P (class))
13664 /* Floats can load 0 and 1. */
13665 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13667 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13668 if (MAYBE_SSE_CLASS_P (class))
13669 return (reg_class_subset_p (class, GENERAL_REGS)
13670 ? GENERAL_REGS : FLOAT_REGS);
13674 /* General regs can load everything. */
13675 if (reg_class_subset_p (class, GENERAL_REGS))
13676 return GENERAL_REGS;
13677 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13678 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13681 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13683 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13688 /* If we are copying between general and FP registers, we need a memory
13689 location. The same is true for SSE and MMX registers.
13691 The macro can't work reliably when one of the CLASSES is class containing
13692 registers from multiple units (SSE, MMX, integer). We avoid this by never
13693 combining those units in single alternative in the machine description.
13694 Ensure that this constraint holds to avoid unexpected surprises.
13696 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13697 enforce these sanity checks. */
13699 ix86_secondary_memory_needed (class1, class2, mode, strict)
13700 enum reg_class class1, class2;
13701 enum machine_mode mode;
13704 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13705 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13706 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13707 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13708 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13709 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13716 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13717 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13718 && (mode) != SImode)
13719 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13720 && (mode) != SImode));
13722 /* Return the cost of moving data from a register in class CLASS1 to
13723 one in class CLASS2.
13725 It is not required that the cost always equal 2 when FROM is the same as TO;
13726 on some machines it is expensive to move between registers if they are not
13727 general registers. */
13729 ix86_register_move_cost (mode, class1, class2)
13730 enum machine_mode mode;
13731 enum reg_class class1, class2;
13733 /* In case we require secondary memory, compute cost of the store followed
13734 by load. In order to avoid bad register allocation choices, we need
13735 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13737 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13741 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13742 MEMORY_MOVE_COST (mode, class1, 1));
13743 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13744 MEMORY_MOVE_COST (mode, class2, 1));
13746 /* In case of copying from general_purpose_register we may emit multiple
13747 stores followed by single load causing memory size mismatch stall.
13748 Count this as arbitarily high cost of 20. */
13749 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13752 /* In the case of FP/MMX moves, the registers actually overlap, and we
13753 have to switch modes in order to treat them differently. */
13754 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13755 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13761 /* Moves between SSE/MMX and integer unit are expensive. */
13762 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13763 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13764 return ix86_cost->mmxsse_to_integer;
13765 if (MAYBE_FLOAT_CLASS_P (class1))
13766 return ix86_cost->fp_move;
13767 if (MAYBE_SSE_CLASS_P (class1))
13768 return ix86_cost->sse_move;
13769 if (MAYBE_MMX_CLASS_P (class1))
13770 return ix86_cost->mmx_move;
13774 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13776 ix86_hard_regno_mode_ok (regno, mode)
13778 enum machine_mode mode;
13780 /* Flags and only flags can only hold CCmode values. */
13781 if (CC_REGNO_P (regno))
13782 return GET_MODE_CLASS (mode) == MODE_CC;
13783 if (GET_MODE_CLASS (mode) == MODE_CC
13784 || GET_MODE_CLASS (mode) == MODE_RANDOM
13785 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13787 if (FP_REGNO_P (regno))
13788 return VALID_FP_MODE_P (mode);
13789 if (SSE_REGNO_P (regno))
13790 return VALID_SSE_REG_MODE (mode);
13791 if (MMX_REGNO_P (regno))
13792 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13793 /* We handle both integer and floats in the general purpose registers.
13794 In future we should be able to handle vector modes as well. */
13795 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13797 /* Take care for QImode values - they can be in non-QI regs, but then
13798 they do cause partial register stalls. */
13799 if (regno < 4 || mode != QImode || TARGET_64BIT)
13801 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13804 /* Return the cost of moving data of mode M between a
13805 register and memory. A value of 2 is the default; this cost is
13806 relative to those in `REGISTER_MOVE_COST'.
13808 If moving between registers and memory is more expensive than
13809 between two registers, you should define this macro to express the
13812 Model also increased moving costs of QImode registers in non
13816 ix86_memory_move_cost (mode, class, in)
13817 enum machine_mode mode;
13818 enum reg_class class;
13821 if (FLOAT_CLASS_P (class))
13839 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13841 if (SSE_CLASS_P (class))
13844 switch (GET_MODE_SIZE (mode))
13858 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13860 if (MMX_CLASS_P (class))
13863 switch (GET_MODE_SIZE (mode))
13874 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13876 switch (GET_MODE_SIZE (mode))
13880 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13881 : ix86_cost->movzbl_load);
13883 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13884 : ix86_cost->int_store[0] + 4);
13887 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13889 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13890 if (mode == TFmode)
13892 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13893 * ((int) GET_MODE_SIZE (mode)
13894 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
13898 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13900 ix86_svr3_asm_out_constructor (symbol, priority)
13902 int priority ATTRIBUTE_UNUSED;
13905 fputs ("\tpushl $", asm_out_file);
13906 assemble_name (asm_out_file, XSTR (symbol, 0));
13907 fputc ('\n', asm_out_file);
13913 static int current_machopic_label_num;
13915 /* Given a symbol name and its associated stub, write out the
13916 definition of the stub. */
13919 machopic_output_stub (file, symb, stub)
13921 const char *symb, *stub;
13923 unsigned int length;
13924 char *binder_name, *symbol_name, lazy_ptr_name[32];
13925 int label = ++current_machopic_label_num;
13927 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13928 symb = (*targetm.strip_name_encoding) (symb);
13930 length = strlen (stub);
13931 binder_name = alloca (length + 32);
13932 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13934 length = strlen (symb);
13935 symbol_name = alloca (length + 32);
13936 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13938 sprintf (lazy_ptr_name, "L%d$lz", label);
13941 machopic_picsymbol_stub_section ();
13943 machopic_symbol_stub_section ();
13945 fprintf (file, "%s:\n", stub);
13946 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13950 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13951 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13952 fprintf (file, "\tjmp %%edx\n");
13955 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13957 fprintf (file, "%s:\n", binder_name);
13961 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13962 fprintf (file, "\tpushl %%eax\n");
13965 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13967 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13969 machopic_lazy_symbol_ptr_section ();
13970 fprintf (file, "%s:\n", lazy_ptr_name);
13971 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13972 fprintf (file, "\t.long %s\n", binder_name);
13974 #endif /* TARGET_MACHO */
13976 /* Order the registers for register allocator. */
13979 x86_order_regs_for_local_alloc ()
13984 /* First allocate the local general purpose registers. */
13985 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13986 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13987 reg_alloc_order [pos++] = i;
13989 /* Global general purpose registers. */
13990 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13991 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13992 reg_alloc_order [pos++] = i;
13994 /* x87 registers come first in case we are doing FP math
13996 if (!TARGET_SSE_MATH)
13997 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13998 reg_alloc_order [pos++] = i;
14000 /* SSE registers. */
14001 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14002 reg_alloc_order [pos++] = i;
14003 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14004 reg_alloc_order [pos++] = i;
14006 /* x87 registerts. */
14007 if (TARGET_SSE_MATH)
14008 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14009 reg_alloc_order [pos++] = i;
14011 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14012 reg_alloc_order [pos++] = i;
14014 /* Initialize the rest of array as we do not allocate some registers
14016 while (pos < FIRST_PSEUDO_REGISTER)
14017 reg_alloc_order [pos++] = 0;
14020 /* Returns an expression indicating where the this parameter is
14021 located on entry to the FUNCTION. */
14024 ia32_this_parameter (function)
14027 tree type = TREE_TYPE (function);
14029 if (ix86_fntype_regparm (type) > 0)
14033 parm = TYPE_ARG_TYPES (type);
14034 /* Figure out whether or not the function has a variable number of
14036 for (; parm; parm = TREE_CHAIN (parm))\
14037 if (TREE_VALUE (parm) == void_type_node)
14039 /* If not, the this parameter is in %eax. */
14041 return gen_rtx_REG (SImode, 0);
14044 if (aggregate_value_p (TREE_TYPE (type)))
14045 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14047 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14052 x86_output_mi_vcall_thunk (file, thunk, delta, vcall_index, function)
14054 tree thunk ATTRIBUTE_UNUSED;
14055 HOST_WIDE_INT delta;
14056 HOST_WIDE_INT vcall_index;
14063 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
14064 xops[0] = GEN_INT (delta);
14065 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14066 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
14069 fprintf (file, "\tjmp *");
14070 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14071 fprintf (file, "@GOTPCREL(%%rip)\n");
14075 fprintf (file, "\tjmp ");
14076 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14077 fprintf (file, "\n");
14082 /* Adjust the this parameter by a fixed constant. */
14085 xops[0] = GEN_INT (delta);
14086 xops[1] = ia32_this_parameter (function);
14087 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14090 /* Adjust the this parameter by a value stored in the vtable. */
14095 /* Put the this parameter into %eax. */
14096 this_parm = ia32_this_parameter (function);
14097 if (!REG_P (this_parm))
14099 xops[0] = this_parm;
14100 xops[1] = gen_rtx_REG (Pmode, 0);
14101 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14103 /* Load the virtual table pointer into %edx. */
14104 if (ix86_fntype_regparm (TREE_TYPE (function)) > 2)
14105 error ("virtual function `%D' cannot have more than two register parameters",
14107 xops[0] = gen_rtx_MEM (Pmode,
14108 gen_rtx_REG (Pmode, 0));
14109 xops[1] = gen_rtx_REG (Pmode, 1);
14110 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14111 /* Adjust the this parameter. */
14112 xops[0] = gen_rtx_MEM (SImode,
14113 plus_constant (gen_rtx_REG (Pmode, 1),
14115 xops[1] = gen_rtx_REG (Pmode, 0);
14116 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14117 /* Put the this parameter back where it came from. */
14118 if (!REG_P (this_parm))
14120 xops[0] = gen_rtx_REG (Pmode, 0);
14121 xops[1] = ia32_this_parameter (function);
14122 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14128 xops[0] = pic_offset_table_rtx;
14129 xops[1] = gen_label_rtx ();
14130 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14132 if (ix86_regparm > 2)
14134 output_asm_insn ("push{l}\t%0", xops);
14135 output_asm_insn ("call\t%P1", xops);
14136 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
14137 output_asm_insn ("pop{l}\t%0", xops);
14139 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
14140 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
14142 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
14143 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
14144 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
14148 fprintf (file, "\tjmp\t");
14149 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14150 fprintf (file, "\n");
14156 x86_output_mi_thunk (file, thunk, delta, function)
14159 HOST_WIDE_INT delta;
14162 x86_output_mi_vcall_thunk (file, thunk, delta, /*vcall_index=*/0,
14167 x86_field_alignment (field, computed)
14171 enum machine_mode mode;
14172 tree type = TREE_TYPE (field);
14174 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14176 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14177 ? get_inner_array_type (type) : type);
14178 if (mode == DFmode || mode == DCmode
14179 || GET_MODE_CLASS (mode) == MODE_INT
14180 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14181 return MIN (32, computed);
14185 /* Implement machine specific optimizations.
14186 At the moment we implement single transformation: AMD Athlon works faster
14187 when RET is not destination of conditional jump or directly preceeded
14188 by other jump instruction. We avoid the penalty by inserting NOP just
14189 before the RET instructions in such cases. */
14191 x86_machine_dependent_reorg (first)
14192 rtx first ATTRIBUTE_UNUSED;
14196 if (!TARGET_ATHLON || !optimize || optimize_size)
14198 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14200 basic_block bb = e->src;
14203 bool insert = false;
14205 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14207 prev = prev_nonnote_insn (ret);
14208 if (prev && GET_CODE (prev) == CODE_LABEL)
14211 for (e = bb->pred; e; e = e->pred_next)
14212 if (EDGE_FREQUENCY (e) && e->src->index > 0
14213 && !(e->flags & EDGE_FALLTHRU))
14218 prev = prev_real_insn (ret);
14219 if (prev && GET_CODE (prev) == JUMP_INSN
14220 && any_condjump_p (prev))
14224 emit_insn_before (gen_nop (), ret);
14228 #include "gt-i386.h"