1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_fisttp = m_NOCONA;
529 const int x86_3dnow_a = m_ATHLON_K8;
530 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints = 0;
536 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
537 const int x86_partial_reg_stall = m_PPRO;
538 const int x86_use_loop = m_K6;
539 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541 const int x86_use_mov0 = m_K6;
542 const int x86_use_cltd = ~(m_PENT | m_K6);
543 const int x86_read_modify_write = ~m_PENT;
544 const int x86_read_modify = ~(m_PENT | m_PPRO);
545 const int x86_split_long_moves = m_PPRO;
546 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549 const int x86_qimode_math = ~(0);
550 const int x86_promote_qi_regs = 0;
551 const int x86_himode_math = ~(m_PPRO);
552 const int x86_promote_hi_regs = m_PPRO;
553 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564 const int x86_shift1 = ~m_486;
565 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
571 const int x86_sse_split_regs = m_ATHLON_K8;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
581 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586 const int x86_use_bt = m_ATHLON_K8;
588 /* In case the average insn count for single function invocation is
589 lower than this constant, emit fast (but longer) prologue and
591 #define FAST_PROLOGUE_INSN_COUNT 20
593 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
594 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
595 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
596 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
598 /* Array of the smallest class containing reg number REGNO, indexed by
599 REGNO. Used by REGNO_REG_CLASS in i386.h. */
601 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
604 AREG, DREG, CREG, BREG,
606 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
608 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
609 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
612 /* flags, fpsr, dirflag, frame */
613 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
616 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
618 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
619 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
620 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
624 /* The "default" register map used in 32bit mode. */
626 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
628 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
629 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
630 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
631 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
632 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
633 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
634 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
637 static int const x86_64_int_parameter_registers[6] =
639 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
640 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
643 static int const x86_64_int_return_registers[4] =
645 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
648 /* The "default" register map used in 64bit mode. */
649 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
651 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
652 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
653 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
654 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
655 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
656 8,9,10,11,12,13,14,15, /* extended integer registers */
657 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
660 /* Define the register numbers to be used in Dwarf debugging information.
661 The SVR4 reference port C compiler uses the following register numbers
662 in its Dwarf output code:
663 0 for %eax (gcc regno = 0)
664 1 for %ecx (gcc regno = 2)
665 2 for %edx (gcc regno = 1)
666 3 for %ebx (gcc regno = 3)
667 4 for %esp (gcc regno = 7)
668 5 for %ebp (gcc regno = 6)
669 6 for %esi (gcc regno = 4)
670 7 for %edi (gcc regno = 5)
671 The following three DWARF register numbers are never generated by
672 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
673 believes these numbers have these meanings.
674 8 for %eip (no gcc equivalent)
675 9 for %eflags (gcc regno = 17)
676 10 for %trapno (no gcc equivalent)
677 It is not at all clear how we should number the FP stack registers
678 for the x86 architecture. If the version of SDB on x86/svr4 were
679 a bit less brain dead with respect to floating-point then we would
680 have a precedent to follow with respect to DWARF register numbers
681 for x86 FP registers, but the SDB on x86/svr4 is so completely
682 broken with respect to FP registers that it is hardly worth thinking
683 of it as something to strive for compatibility with.
684 The version of x86/svr4 SDB I have at the moment does (partially)
685 seem to believe that DWARF register number 11 is associated with
686 the x86 register %st(0), but that's about all. Higher DWARF
687 register numbers don't seem to be associated with anything in
688 particular, and even for DWARF regno 11, SDB only seems to under-
689 stand that it should say that a variable lives in %st(0) (when
690 asked via an `=' command) if we said it was in DWARF regno 11,
691 but SDB still prints garbage when asked for the value of the
692 variable in question (via a `/' command).
693 (Also note that the labels SDB prints for various FP stack regs
694 when doing an `x' command are all wrong.)
695 Note that these problems generally don't affect the native SVR4
696 C compiler because it doesn't allow the use of -O with -g and
697 because when it is *not* optimizing, it allocates a memory
698 location for each floating-point variable, and the memory
699 location is what gets described in the DWARF AT_location
700 attribute for the variable in question.
701 Regardless of the severe mental illness of the x86/svr4 SDB, we
702 do something sensible here and we use the following DWARF
703 register numbers. Note that these are all stack-top-relative
705 11 for %st(0) (gcc regno = 8)
706 12 for %st(1) (gcc regno = 9)
707 13 for %st(2) (gcc regno = 10)
708 14 for %st(3) (gcc regno = 11)
709 15 for %st(4) (gcc regno = 12)
710 16 for %st(5) (gcc regno = 13)
711 17 for %st(6) (gcc regno = 14)
712 18 for %st(7) (gcc regno = 15)
714 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
716 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
717 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
718 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
719 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
720 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
722 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
725 /* Test and compare insns in i386.md store the information needed to
726 generate branch and scc insns here. */
728 rtx ix86_compare_op0 = NULL_RTX;
729 rtx ix86_compare_op1 = NULL_RTX;
731 #define MAX_386_STACK_LOCALS 3
732 /* Size of the register save area. */
733 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
735 /* Define the structure for the machine field in struct function. */
737 struct stack_local_entry GTY(())
742 struct stack_local_entry *next;
745 /* Structure describing stack frame layout.
746 Stack grows downward:
752 saved frame pointer if frame_pointer_needed
753 <- HARD_FRAME_POINTER
759 > to_allocate <- FRAME_POINTER
771 int outgoing_arguments_size;
774 HOST_WIDE_INT to_allocate;
775 /* The offsets relative to ARG_POINTER. */
776 HOST_WIDE_INT frame_pointer_offset;
777 HOST_WIDE_INT hard_frame_pointer_offset;
778 HOST_WIDE_INT stack_pointer_offset;
780 /* When save_regs_using_mov is set, emit prologue using
781 move instead of push instructions. */
782 bool save_regs_using_mov;
785 /* Used to enable/disable debugging features. */
786 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
787 /* Code model option as passed by user. */
788 const char *ix86_cmodel_string;
790 enum cmodel ix86_cmodel;
792 const char *ix86_asm_string;
793 enum asm_dialect ix86_asm_dialect = ASM_ATT;
795 const char *ix86_tls_dialect_string;
796 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
798 /* Which unit we are generating floating point math for. */
799 enum fpmath_unit ix86_fpmath;
801 /* Which cpu are we scheduling for. */
802 enum processor_type ix86_tune;
803 /* Which instruction set architecture to use. */
804 enum processor_type ix86_arch;
806 /* Strings to hold which cpu and instruction set architecture to use. */
807 const char *ix86_tune_string; /* for -mtune=<xxx> */
808 const char *ix86_arch_string; /* for -march=<xxx> */
809 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
811 /* # of registers to use to pass arguments. */
812 const char *ix86_regparm_string;
814 /* true if sse prefetch instruction is not NOOP. */
815 int x86_prefetch_sse;
817 /* ix86_regparm_string as a number */
820 /* Alignment to use for loops and jumps: */
822 /* Power of two alignment for loops. */
823 const char *ix86_align_loops_string;
825 /* Power of two alignment for non-loop jumps. */
826 const char *ix86_align_jumps_string;
828 /* Power of two alignment for stack boundary in bytes. */
829 const char *ix86_preferred_stack_boundary_string;
831 /* Preferred alignment for stack boundary in bits. */
832 unsigned int ix86_preferred_stack_boundary;
834 /* Values 1-5: see jump.c */
835 int ix86_branch_cost;
836 const char *ix86_branch_cost_string;
838 /* Power of two alignment for functions. */
839 const char *ix86_align_funcs_string;
841 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
842 char internal_label_prefix[16];
843 int internal_label_prefix_len;
845 static void output_pic_addr_const (FILE *, rtx, int);
846 static void put_condition_code (enum rtx_code, enum machine_mode,
848 static const char *get_some_local_dynamic_name (void);
849 static int get_some_local_dynamic_name_1 (rtx *, void *);
850 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
851 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
853 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
854 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
856 static rtx get_thread_pointer (int);
857 static rtx legitimize_tls_address (rtx, enum tls_model, int);
858 static void get_pc_thunk_name (char [32], unsigned int);
859 static rtx gen_push (rtx);
860 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
861 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
862 static struct machine_function * ix86_init_machine_status (void);
863 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
864 static int ix86_nsaved_regs (void);
865 static void ix86_emit_save_regs (void);
866 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
867 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
868 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
869 static HOST_WIDE_INT ix86_GOT_alias_set (void);
870 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
871 static rtx ix86_expand_aligntest (rtx, int);
872 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
873 static int ix86_issue_rate (void);
874 static int ix86_adjust_cost (rtx, rtx, rtx, int);
875 static int ia32_multipass_dfa_lookahead (void);
876 static void ix86_init_mmx_sse_builtins (void);
877 static rtx x86_this_parameter (tree);
878 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
879 HOST_WIDE_INT, tree);
880 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
881 static void x86_file_start (void);
882 static void ix86_reorg (void);
883 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
884 static tree ix86_build_builtin_va_list (void);
885 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
887 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
888 static bool ix86_vector_mode_supported_p (enum machine_mode);
890 static int ix86_address_cost (rtx);
891 static bool ix86_cannot_force_const_mem (rtx);
892 static rtx ix86_delegitimize_address (rtx);
894 struct builtin_description;
895 static rtx ix86_expand_sse_comi (const struct builtin_description *,
897 static rtx ix86_expand_sse_compare (const struct builtin_description *,
899 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
900 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
901 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
902 static rtx ix86_expand_store_builtin (enum insn_code, tree);
903 static rtx safe_vector_operand (rtx, enum machine_mode);
904 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
905 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
906 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
907 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
908 static int ix86_fp_comparison_cost (enum rtx_code code);
909 static unsigned int ix86_select_alt_pic_regnum (void);
910 static int ix86_save_reg (unsigned int, int);
911 static void ix86_compute_frame_layout (struct ix86_frame *);
912 static int ix86_comp_type_attributes (tree, tree);
913 static int ix86_function_regparm (tree, tree);
914 const struct attribute_spec ix86_attribute_table[];
915 static bool ix86_function_ok_for_sibcall (tree, tree);
916 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
917 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
918 static int ix86_value_regno (enum machine_mode, tree);
919 static bool contains_128bit_aligned_vector_p (tree);
920 static rtx ix86_struct_value_rtx (tree, int);
921 static bool ix86_ms_bitfield_layout_p (tree);
922 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
923 static int extended_reg_mentioned_1 (rtx *, void *);
924 static bool ix86_rtx_costs (rtx, int, int, int *);
925 static int min_insn_size (rtx);
926 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
927 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
928 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
930 static void ix86_init_builtins (void);
931 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
933 /* This function is only used on Solaris. */
934 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
937 /* Register class used for passing given 64bit part of the argument.
938 These represent classes as documented by the PS ABI, with the exception
939 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
940 use SF or DFmode move instead of DImode to avoid reformatting penalties.
942 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
943 whenever possible (upper half does contain padding).
945 enum x86_64_reg_class
948 X86_64_INTEGER_CLASS,
949 X86_64_INTEGERSI_CLASS,
956 X86_64_COMPLEX_X87_CLASS,
959 static const char * const x86_64_reg_class_name[] = {
960 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
961 "sseup", "x87", "x87up", "cplx87", "no"
964 #define MAX_CLASSES 4
966 /* Table of constants used by fldpi, fldln2, etc.... */
967 static REAL_VALUE_TYPE ext_80387_constants_table [5];
968 static bool ext_80387_constants_init = 0;
969 static void init_ext_80387_constants (void);
971 /* Initialize the GCC target structure. */
972 #undef TARGET_ATTRIBUTE_TABLE
973 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
974 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
975 # undef TARGET_MERGE_DECL_ATTRIBUTES
976 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
979 #undef TARGET_COMP_TYPE_ATTRIBUTES
980 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
982 #undef TARGET_INIT_BUILTINS
983 #define TARGET_INIT_BUILTINS ix86_init_builtins
984 #undef TARGET_EXPAND_BUILTIN
985 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
987 #undef TARGET_ASM_FUNCTION_EPILOGUE
988 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
990 #undef TARGET_ASM_OPEN_PAREN
991 #define TARGET_ASM_OPEN_PAREN ""
992 #undef TARGET_ASM_CLOSE_PAREN
993 #define TARGET_ASM_CLOSE_PAREN ""
995 #undef TARGET_ASM_ALIGNED_HI_OP
996 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
997 #undef TARGET_ASM_ALIGNED_SI_OP
998 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1000 #undef TARGET_ASM_ALIGNED_DI_OP
1001 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1004 #undef TARGET_ASM_UNALIGNED_HI_OP
1005 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1006 #undef TARGET_ASM_UNALIGNED_SI_OP
1007 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1008 #undef TARGET_ASM_UNALIGNED_DI_OP
1009 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1011 #undef TARGET_SCHED_ADJUST_COST
1012 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1013 #undef TARGET_SCHED_ISSUE_RATE
1014 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1015 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1016 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1017 ia32_multipass_dfa_lookahead
1019 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1020 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1023 #undef TARGET_HAVE_TLS
1024 #define TARGET_HAVE_TLS true
1026 #undef TARGET_CANNOT_FORCE_CONST_MEM
1027 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1029 #undef TARGET_DELEGITIMIZE_ADDRESS
1030 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1032 #undef TARGET_MS_BITFIELD_LAYOUT_P
1033 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1035 #undef TARGET_ASM_OUTPUT_MI_THUNK
1036 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1037 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1038 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1040 #undef TARGET_ASM_FILE_START
1041 #define TARGET_ASM_FILE_START x86_file_start
1043 #undef TARGET_RTX_COSTS
1044 #define TARGET_RTX_COSTS ix86_rtx_costs
1045 #undef TARGET_ADDRESS_COST
1046 #define TARGET_ADDRESS_COST ix86_address_cost
1048 #undef TARGET_FIXED_CONDITION_CODE_REGS
1049 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1050 #undef TARGET_CC_MODES_COMPATIBLE
1051 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1053 #undef TARGET_MACHINE_DEPENDENT_REORG
1054 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1056 #undef TARGET_BUILD_BUILTIN_VA_LIST
1057 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1059 #undef TARGET_MD_ASM_CLOBBERS
1060 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1062 #undef TARGET_PROMOTE_PROTOTYPES
1063 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1064 #undef TARGET_STRUCT_VALUE_RTX
1065 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1066 #undef TARGET_SETUP_INCOMING_VARARGS
1067 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1068 #undef TARGET_MUST_PASS_IN_STACK
1069 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1070 #undef TARGET_PASS_BY_REFERENCE
1071 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1073 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1074 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1076 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1077 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1079 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1080 #undef TARGET_INSERT_ATTRIBUTES
1081 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1084 struct gcc_target targetm = TARGET_INITIALIZER;
1087 /* The svr4 ABI for the i386 says that records and unions are returned
1089 #ifndef DEFAULT_PCC_STRUCT_RETURN
1090 #define DEFAULT_PCC_STRUCT_RETURN 1
1093 /* Sometimes certain combinations of command options do not make
1094 sense on a particular target machine. You can define a macro
1095 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1096 defined, is executed once just after all the command options have
1099 Don't use this macro to turn on various extra optimizations for
1100 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1103 override_options (void)
1106 int ix86_tune_defaulted = 0;
1108 /* Comes from final.c -- no real reason to change it. */
1109 #define MAX_CODE_ALIGN 16
1113 const struct processor_costs *cost; /* Processor costs */
1114 const int target_enable; /* Target flags to enable. */
1115 const int target_disable; /* Target flags to disable. */
1116 const int align_loop; /* Default alignments. */
1117 const int align_loop_max_skip;
1118 const int align_jump;
1119 const int align_jump_max_skip;
1120 const int align_func;
1122 const processor_target_table[PROCESSOR_max] =
1124 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1125 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1126 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1127 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1128 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1129 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1130 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1131 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1132 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1135 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1138 const char *const name; /* processor name or nickname. */
1139 const enum processor_type processor;
1140 const enum pta_flags
1146 PTA_PREFETCH_SSE = 16,
1152 const processor_alias_table[] =
1154 {"i386", PROCESSOR_I386, 0},
1155 {"i486", PROCESSOR_I486, 0},
1156 {"i586", PROCESSOR_PENTIUM, 0},
1157 {"pentium", PROCESSOR_PENTIUM, 0},
1158 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1159 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1160 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1161 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1162 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1163 {"i686", PROCESSOR_PENTIUMPRO, 0},
1164 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1165 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1166 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1167 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1168 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1169 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1170 | PTA_MMX | PTA_PREFETCH_SSE},
1171 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1172 | PTA_MMX | PTA_PREFETCH_SSE},
1173 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1176 | PTA_MMX | PTA_PREFETCH_SSE},
1177 {"k6", PROCESSOR_K6, PTA_MMX},
1178 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1179 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1180 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1182 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1183 | PTA_3DNOW | PTA_3DNOW_A},
1184 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1185 | PTA_3DNOW_A | PTA_SSE},
1186 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1187 | PTA_3DNOW_A | PTA_SSE},
1188 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1189 | PTA_3DNOW_A | PTA_SSE},
1190 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1191 | PTA_SSE | PTA_SSE2 },
1192 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1195 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1196 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1198 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1199 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1202 int const pta_size = ARRAY_SIZE (processor_alias_table);
1204 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1205 SUBTARGET_OVERRIDE_OPTIONS;
1208 /* Set the default values for switches whose default depends on TARGET_64BIT
1209 in case they weren't overwritten by command line options. */
1212 if (flag_omit_frame_pointer == 2)
1213 flag_omit_frame_pointer = 1;
1214 if (flag_asynchronous_unwind_tables == 2)
1215 flag_asynchronous_unwind_tables = 1;
1216 if (flag_pcc_struct_return == 2)
1217 flag_pcc_struct_return = 0;
1221 if (flag_omit_frame_pointer == 2)
1222 flag_omit_frame_pointer = 0;
1223 if (flag_asynchronous_unwind_tables == 2)
1224 flag_asynchronous_unwind_tables = 0;
1225 if (flag_pcc_struct_return == 2)
1226 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1229 if (!ix86_tune_string && ix86_arch_string)
1230 ix86_tune_string = ix86_arch_string;
1231 if (!ix86_tune_string)
1233 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1234 ix86_tune_defaulted = 1;
1236 if (!ix86_arch_string)
1237 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1239 if (ix86_cmodel_string != 0)
1241 if (!strcmp (ix86_cmodel_string, "small"))
1242 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1244 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1245 else if (!strcmp (ix86_cmodel_string, "32"))
1246 ix86_cmodel = CM_32;
1247 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1248 ix86_cmodel = CM_KERNEL;
1249 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1250 ix86_cmodel = CM_MEDIUM;
1251 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1252 ix86_cmodel = CM_LARGE;
1254 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1258 ix86_cmodel = CM_32;
1260 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1262 if (ix86_asm_string != 0)
1264 if (!strcmp (ix86_asm_string, "intel"))
1265 ix86_asm_dialect = ASM_INTEL;
1266 else if (!strcmp (ix86_asm_string, "att"))
1267 ix86_asm_dialect = ASM_ATT;
1269 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1271 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1272 error ("code model %qs not supported in the %s bit mode",
1273 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1274 if (ix86_cmodel == CM_LARGE)
1275 sorry ("code model %<large%> not supported yet");
1276 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1277 sorry ("%i-bit mode not compiled in",
1278 (target_flags & MASK_64BIT) ? 64 : 32);
1280 for (i = 0; i < pta_size; i++)
1281 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1283 ix86_arch = processor_alias_table[i].processor;
1284 /* Default cpu tuning to the architecture. */
1285 ix86_tune = ix86_arch;
1286 if (processor_alias_table[i].flags & PTA_MMX
1287 && !(target_flags_explicit & MASK_MMX))
1288 target_flags |= MASK_MMX;
1289 if (processor_alias_table[i].flags & PTA_3DNOW
1290 && !(target_flags_explicit & MASK_3DNOW))
1291 target_flags |= MASK_3DNOW;
1292 if (processor_alias_table[i].flags & PTA_3DNOW_A
1293 && !(target_flags_explicit & MASK_3DNOW_A))
1294 target_flags |= MASK_3DNOW_A;
1295 if (processor_alias_table[i].flags & PTA_SSE
1296 && !(target_flags_explicit & MASK_SSE))
1297 target_flags |= MASK_SSE;
1298 if (processor_alias_table[i].flags & PTA_SSE2
1299 && !(target_flags_explicit & MASK_SSE2))
1300 target_flags |= MASK_SSE2;
1301 if (processor_alias_table[i].flags & PTA_SSE3
1302 && !(target_flags_explicit & MASK_SSE3))
1303 target_flags |= MASK_SSE3;
1304 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1305 x86_prefetch_sse = true;
1306 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1307 error ("CPU you selected does not support x86-64 "
1313 error ("bad value (%s) for -march= switch", ix86_arch_string);
1315 for (i = 0; i < pta_size; i++)
1316 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1318 ix86_tune = processor_alias_table[i].processor;
1319 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1321 if (ix86_tune_defaulted)
1323 ix86_tune_string = "x86-64";
1324 for (i = 0; i < pta_size; i++)
1325 if (! strcmp (ix86_tune_string,
1326 processor_alias_table[i].name))
1328 ix86_tune = processor_alias_table[i].processor;
1331 error ("CPU you selected does not support x86-64 "
1334 /* Intel CPUs have always interpreted SSE prefetch instructions as
1335 NOPs; so, we can enable SSE prefetch instructions even when
1336 -mtune (rather than -march) points us to a processor that has them.
1337 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1338 higher processors. */
1339 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1340 x86_prefetch_sse = true;
1344 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1347 ix86_cost = &size_cost;
1349 ix86_cost = processor_target_table[ix86_tune].cost;
1350 target_flags |= processor_target_table[ix86_tune].target_enable;
1351 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1353 /* Arrange to set up i386_stack_locals for all functions. */
1354 init_machine_status = ix86_init_machine_status;
1356 /* Validate -mregparm= value. */
1357 if (ix86_regparm_string)
1359 i = atoi (ix86_regparm_string);
1360 if (i < 0 || i > REGPARM_MAX)
1361 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1367 ix86_regparm = REGPARM_MAX;
1369 /* If the user has provided any of the -malign-* options,
1370 warn and use that value only if -falign-* is not set.
1371 Remove this code in GCC 3.2 or later. */
1372 if (ix86_align_loops_string)
1374 warning ("-malign-loops is obsolete, use -falign-loops");
1375 if (align_loops == 0)
1377 i = atoi (ix86_align_loops_string);
1378 if (i < 0 || i > MAX_CODE_ALIGN)
1379 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1381 align_loops = 1 << i;
1385 if (ix86_align_jumps_string)
1387 warning ("-malign-jumps is obsolete, use -falign-jumps");
1388 if (align_jumps == 0)
1390 i = atoi (ix86_align_jumps_string);
1391 if (i < 0 || i > MAX_CODE_ALIGN)
1392 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1394 align_jumps = 1 << i;
1398 if (ix86_align_funcs_string)
1400 warning ("-malign-functions is obsolete, use -falign-functions");
1401 if (align_functions == 0)
1403 i = atoi (ix86_align_funcs_string);
1404 if (i < 0 || i > MAX_CODE_ALIGN)
1405 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1407 align_functions = 1 << i;
1411 /* Default align_* from the processor table. */
1412 if (align_loops == 0)
1414 align_loops = processor_target_table[ix86_tune].align_loop;
1415 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1417 if (align_jumps == 0)
1419 align_jumps = processor_target_table[ix86_tune].align_jump;
1420 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1422 if (align_functions == 0)
1424 align_functions = processor_target_table[ix86_tune].align_func;
1427 /* Validate -mpreferred-stack-boundary= value, or provide default.
1428 The default of 128 bits is for Pentium III's SSE __m128, but we
1429 don't want additional code to keep the stack aligned when
1430 optimizing for code size. */
1431 ix86_preferred_stack_boundary = (optimize_size
1432 ? TARGET_64BIT ? 128 : 32
1434 if (ix86_preferred_stack_boundary_string)
1436 i = atoi (ix86_preferred_stack_boundary_string);
1437 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1438 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1439 TARGET_64BIT ? 4 : 2);
1441 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1444 /* Validate -mbranch-cost= value, or provide default. */
1445 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1446 if (ix86_branch_cost_string)
1448 i = atoi (ix86_branch_cost_string);
1450 error ("-mbranch-cost=%d is not between 0 and 5", i);
1452 ix86_branch_cost = i;
1455 if (ix86_tls_dialect_string)
1457 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1458 ix86_tls_dialect = TLS_DIALECT_GNU;
1459 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1460 ix86_tls_dialect = TLS_DIALECT_SUN;
1462 error ("bad value (%s) for -mtls-dialect= switch",
1463 ix86_tls_dialect_string);
1466 /* Keep nonleaf frame pointers. */
1467 if (flag_omit_frame_pointer)
1468 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1469 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1470 flag_omit_frame_pointer = 1;
1472 /* If we're doing fast math, we don't care about comparison order
1473 wrt NaNs. This lets us use a shorter comparison sequence. */
1474 if (flag_unsafe_math_optimizations)
1475 target_flags &= ~MASK_IEEE_FP;
1477 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1478 since the insns won't need emulation. */
1479 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1480 target_flags &= ~MASK_NO_FANCY_MATH_387;
1482 /* Likewise, if the target doesn't have a 387, or we've specified
1483 software floating point, don't use 387 inline instrinsics. */
1485 target_flags |= MASK_NO_FANCY_MATH_387;
1487 /* Turn on SSE2 builtins for -msse3. */
1489 target_flags |= MASK_SSE2;
1491 /* Turn on SSE builtins for -msse2. */
1493 target_flags |= MASK_SSE;
1495 /* Turn on MMX builtins for -msse. */
1498 target_flags |= MASK_MMX & ~target_flags_explicit;
1499 x86_prefetch_sse = true;
1502 /* Turn on MMX builtins for 3Dnow. */
1504 target_flags |= MASK_MMX;
1508 if (TARGET_ALIGN_DOUBLE)
1509 error ("-malign-double makes no sense in the 64bit mode");
1511 error ("-mrtd calling convention not supported in the 64bit mode");
1513 /* Enable by default the SSE and MMX builtins. Do allow the user to
1514 explicitly disable any of these. In particular, disabling SSE and
1515 MMX for kernel code is extremely useful. */
1517 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1518 & ~target_flags_explicit);
1521 ix86_fpmath = FPMATH_SSE;
1525 ix86_fpmath = FPMATH_387;
1526 /* i386 ABI does not specify red zone. It still makes sense to use it
1527 when programmer takes care to stack from being destroyed. */
1528 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1529 target_flags |= MASK_NO_RED_ZONE;
1532 if (ix86_fpmath_string != 0)
1534 if (! strcmp (ix86_fpmath_string, "387"))
1535 ix86_fpmath = FPMATH_387;
1536 else if (! strcmp (ix86_fpmath_string, "sse"))
1540 warning ("SSE instruction set disabled, using 387 arithmetics");
1541 ix86_fpmath = FPMATH_387;
1544 ix86_fpmath = FPMATH_SSE;
1546 else if (! strcmp (ix86_fpmath_string, "387,sse")
1547 || ! strcmp (ix86_fpmath_string, "sse,387"))
1551 warning ("SSE instruction set disabled, using 387 arithmetics");
1552 ix86_fpmath = FPMATH_387;
1554 else if (!TARGET_80387)
1556 warning ("387 instruction set disabled, using SSE arithmetics");
1557 ix86_fpmath = FPMATH_SSE;
1560 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1563 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1566 /* If the i387 is disabled, then do not return values in it. */
1568 target_flags &= ~MASK_FLOAT_RETURNS;
1570 if ((x86_accumulate_outgoing_args & TUNEMASK)
1571 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1573 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1575 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1578 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1579 p = strchr (internal_label_prefix, 'X');
1580 internal_label_prefix_len = p - internal_label_prefix;
1584 /* When scheduling description is not available, disable scheduler pass
1585 so it won't slow down the compilation and make x87 code slower. */
1586 if (!TARGET_SCHEDULE)
1587 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1591 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1593 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1594 make the problem with not enough registers even worse. */
1595 #ifdef INSN_SCHEDULING
1597 flag_schedule_insns = 0;
1600 /* The default values of these switches depend on the TARGET_64BIT
1601 that is not known at this moment. Mark these values with 2 and
1602 let user the to override these. In case there is no command line option
1603 specifying them, we will set the defaults in override_options. */
1605 flag_omit_frame_pointer = 2;
1606 flag_pcc_struct_return = 2;
1607 flag_asynchronous_unwind_tables = 2;
1608 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1609 SUBTARGET_OPTIMIZATION_OPTIONS;
1613 /* Table of valid machine attributes. */
1614 const struct attribute_spec ix86_attribute_table[] =
1616 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1617 /* Stdcall attribute says callee is responsible for popping arguments
1618 if they are not variable. */
1619 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1620 /* Fastcall attribute says callee is responsible for popping arguments
1621 if they are not variable. */
1622 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1623 /* Cdecl attribute says the callee is a normal C declaration */
1624 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1625 /* Regparm attribute specifies how many integer arguments are to be
1626 passed in registers. */
1627 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1628 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1629 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1630 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1631 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1633 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1634 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1635 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1636 SUBTARGET_ATTRIBUTE_TABLE,
1638 { NULL, 0, 0, false, false, false, NULL }
1641 /* Decide whether we can make a sibling call to a function. DECL is the
1642 declaration of the function being targeted by the call and EXP is the
1643 CALL_EXPR representing the call. */
1646 ix86_function_ok_for_sibcall (tree decl, tree exp)
1650 /* If we are generating position-independent code, we cannot sibcall
1651 optimize any indirect call, or a direct call to a global function,
1652 as the PLT requires %ebx be live. */
1653 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1661 /* If we are returning floats on the 80387 register stack, we cannot
1662 make a sibcall from a function that doesn't return a float to a
1663 function that does or, conversely, from a function that does return
1664 a float to a function that doesn't; the necessary stack adjustment
1665 would not be executed. */
1666 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
1667 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1671 /* If this call is indirect, we'll need to be able to use a call-clobbered
1672 register for the address of the target function. Make sure that all
1673 such registers are not used for passing parameters. */
1674 if (!decl && !TARGET_64BIT)
1678 /* We're looking at the CALL_EXPR, we need the type of the function. */
1679 type = TREE_OPERAND (exp, 0); /* pointer expression */
1680 type = TREE_TYPE (type); /* pointer type */
1681 type = TREE_TYPE (type); /* function type */
1683 if (ix86_function_regparm (type, NULL) >= 3)
1685 /* ??? Need to count the actual number of registers to be used,
1686 not the possible number of registers. Fix later. */
1691 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1692 /* Dllimport'd functions are also called indirectly. */
1693 if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
1694 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1698 /* Otherwise okay. That also includes certain types of indirect calls. */
1702 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1703 arguments as in struct attribute_spec.handler. */
1705 ix86_handle_cdecl_attribute (tree *node, tree name,
1706 tree args ATTRIBUTE_UNUSED,
1707 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1709 if (TREE_CODE (*node) != FUNCTION_TYPE
1710 && TREE_CODE (*node) != METHOD_TYPE
1711 && TREE_CODE (*node) != FIELD_DECL
1712 && TREE_CODE (*node) != TYPE_DECL)
1714 warning ("%qs attribute only applies to functions",
1715 IDENTIFIER_POINTER (name));
1716 *no_add_attrs = true;
1720 if (is_attribute_p ("fastcall", name))
1722 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1724 error ("fastcall and stdcall attributes are not compatible");
1726 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1728 error ("fastcall and regparm attributes are not compatible");
1731 else if (is_attribute_p ("stdcall", name))
1733 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1735 error ("fastcall and stdcall attributes are not compatible");
1742 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1743 *no_add_attrs = true;
1749 /* Handle a "regparm" attribute;
1750 arguments as in struct attribute_spec.handler. */
1752 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1753 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1755 if (TREE_CODE (*node) != FUNCTION_TYPE
1756 && TREE_CODE (*node) != METHOD_TYPE
1757 && TREE_CODE (*node) != FIELD_DECL
1758 && TREE_CODE (*node) != TYPE_DECL)
1760 warning ("%qs attribute only applies to functions",
1761 IDENTIFIER_POINTER (name));
1762 *no_add_attrs = true;
1768 cst = TREE_VALUE (args);
1769 if (TREE_CODE (cst) != INTEGER_CST)
1771 warning ("%qs attribute requires an integer constant argument",
1772 IDENTIFIER_POINTER (name));
1773 *no_add_attrs = true;
1775 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1777 warning ("argument to %qs attribute larger than %d",
1778 IDENTIFIER_POINTER (name), REGPARM_MAX);
1779 *no_add_attrs = true;
1782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1784 error ("fastcall and regparm attributes are not compatible");
1791 /* Return 0 if the attributes for two types are incompatible, 1 if they
1792 are compatible, and 2 if they are nearly compatible (which causes a
1793 warning to be generated). */
1796 ix86_comp_type_attributes (tree type1, tree type2)
1798 /* Check for mismatch of non-default calling convention. */
1799 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1801 if (TREE_CODE (type1) != FUNCTION_TYPE)
1804 /* Check for mismatched fastcall types */
1805 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1806 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1809 /* Check for mismatched return types (cdecl vs stdcall). */
1810 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1811 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1813 if (ix86_function_regparm (type1, NULL)
1814 != ix86_function_regparm (type2, NULL))
1819 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1820 DECL may be NULL when calling function indirectly
1821 or considering a libcall. */
1824 ix86_function_regparm (tree type, tree decl)
1827 int regparm = ix86_regparm;
1828 bool user_convention = false;
1832 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1835 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1836 user_convention = true;
1839 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1842 user_convention = true;
1845 /* Use register calling convention for local functions when possible. */
1846 if (!TARGET_64BIT && !user_convention && decl
1847 && flag_unit_at_a_time && !profile_flag)
1849 struct cgraph_local_info *i = cgraph_local_info (decl);
1852 /* We can't use regparm(3) for nested functions as these use
1853 static chain pointer in third argument. */
1854 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1864 /* Return true if EAX is live at the start of the function. Used by
1865 ix86_expand_prologue to determine if we need special help before
1866 calling allocate_stack_worker. */
1869 ix86_eax_live_at_start_p (void)
1871 /* Cheat. Don't bother working forward from ix86_function_regparm
1872 to the function type to whether an actual argument is located in
1873 eax. Instead just look at cfg info, which is still close enough
1874 to correct at this point. This gives false positives for broken
1875 functions that might use uninitialized data that happens to be
1876 allocated in eax, but who cares? */
1877 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1880 /* Value is the number of bytes of arguments automatically
1881 popped when returning from a subroutine call.
1882 FUNDECL is the declaration node of the function (as a tree),
1883 FUNTYPE is the data type of the function (as a tree),
1884 or for a library call it is an identifier node for the subroutine name.
1885 SIZE is the number of bytes of arguments passed on the stack.
1887 On the 80386, the RTD insn may be used to pop them if the number
1888 of args is fixed, but if the number is variable then the caller
1889 must pop them all. RTD can't be used for library calls now
1890 because the library is compiled with the Unix compiler.
1891 Use of RTD is a selectable option, since it is incompatible with
1892 standard Unix calling sequences. If the option is not selected,
1893 the caller must always pop the args.
1895 The attribute stdcall is equivalent to RTD on a per module basis. */
1898 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1900 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1902 /* Cdecl functions override -mrtd, and never pop the stack. */
1903 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1905 /* Stdcall and fastcall functions will pop the stack if not
1907 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1908 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1912 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1913 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1914 == void_type_node)))
1918 /* Lose any fake structure return argument if it is passed on the stack. */
1919 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1921 && !KEEP_AGGREGATE_RETURN_POINTER)
1923 int nregs = ix86_function_regparm (funtype, fundecl);
1926 return GET_MODE_SIZE (Pmode);
1932 /* Argument support functions. */
1934 /* Return true when register may be used to pass function parameters. */
1936 ix86_function_arg_regno_p (int regno)
1940 return (regno < REGPARM_MAX
1941 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1942 if (SSE_REGNO_P (regno) && TARGET_SSE)
1944 /* RAX is used as hidden argument to va_arg functions. */
1947 for (i = 0; i < REGPARM_MAX; i++)
1948 if (regno == x86_64_int_parameter_registers[i])
1953 /* Return if we do not know how to pass TYPE solely in registers. */
1956 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1958 if (must_pass_in_stack_var_size_or_pad (mode, type))
1961 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1962 The layout_type routine is crafty and tries to trick us into passing
1963 currently unsupported vector types on the stack by using TImode. */
1964 return (!TARGET_64BIT && mode == TImode
1965 && type && TREE_CODE (type) != VECTOR_TYPE);
1968 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1969 for a call to a function whose data type is FNTYPE.
1970 For a library call, FNTYPE is 0. */
1973 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1974 tree fntype, /* tree ptr for function decl */
1975 rtx libname, /* SYMBOL_REF of library name or 0 */
1978 static CUMULATIVE_ARGS zero_cum;
1979 tree param, next_param;
1981 if (TARGET_DEBUG_ARG)
1983 fprintf (stderr, "\ninit_cumulative_args (");
1985 fprintf (stderr, "fntype code = %s, ret code = %s",
1986 tree_code_name[(int) TREE_CODE (fntype)],
1987 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1989 fprintf (stderr, "no fntype");
1992 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1997 /* Set up the number of registers to use for passing arguments. */
1999 cum->nregs = ix86_function_regparm (fntype, fndecl);
2001 cum->nregs = ix86_regparm;
2003 cum->sse_nregs = SSE_REGPARM_MAX;
2005 cum->mmx_nregs = MMX_REGPARM_MAX;
2006 cum->warn_sse = true;
2007 cum->warn_mmx = true;
2008 cum->maybe_vaarg = false;
2010 /* Use ecx and edx registers if function has fastcall attribute */
2011 if (fntype && !TARGET_64BIT)
2013 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2020 /* Determine if this function has variable arguments. This is
2021 indicated by the last argument being 'void_type_mode' if there
2022 are no variable arguments. If there are variable arguments, then
2023 we won't pass anything in registers in 32-bit mode. */
2025 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2027 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2028 param != 0; param = next_param)
2030 next_param = TREE_CHAIN (param);
2031 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2042 cum->maybe_vaarg = true;
2046 if ((!fntype && !libname)
2047 || (fntype && !TYPE_ARG_TYPES (fntype)))
2048 cum->maybe_vaarg = true;
2050 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2051 in SSE registers even for 32-bit mode and not just 3, but up to
2052 8 SSE arguments in registers. */
2053 if (!TARGET_64BIT && !cum->maybe_vaarg && !cum->fastcall
2054 && cum->sse_nregs == SSE_REGPARM_MAX && fndecl
2055 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2057 struct cgraph_local_info *i = cgraph_local_info (fndecl);
2061 cum->float_in_sse = true;
2065 if (TARGET_DEBUG_ARG)
2066 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2071 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2072 But in the case of vector types, it is some vector mode.
2074 When we have only some of our vector isa extensions enabled, then there
2075 are some modes for which vector_mode_supported_p is false. For these
2076 modes, the generic vector support in gcc will choose some non-vector mode
2077 in order to implement the type. By computing the natural mode, we'll
2078 select the proper ABI location for the operand and not depend on whatever
2079 the middle-end decides to do with these vector types. */
2081 static enum machine_mode
2082 type_natural_mode (tree type)
2084 enum machine_mode mode = TYPE_MODE (type);
2086 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2088 HOST_WIDE_INT size = int_size_in_bytes (type);
2089 if ((size == 8 || size == 16)
2090 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2091 && TYPE_VECTOR_SUBPARTS (type) > 1)
2093 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2095 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2096 mode = MIN_MODE_VECTOR_FLOAT;
2098 mode = MIN_MODE_VECTOR_INT;
2100 /* Get the mode which has this inner mode and number of units. */
2101 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2102 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2103 && GET_MODE_INNER (mode) == innermode)
2113 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2114 this may not agree with the mode that the type system has chosen for the
2115 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2116 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2119 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2124 if (orig_mode != BLKmode)
2125 tmp = gen_rtx_REG (orig_mode, regno);
2128 tmp = gen_rtx_REG (mode, regno);
2129 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2130 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2136 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2137 of this code is to classify each 8bytes of incoming argument by the register
2138 class and assign registers accordingly. */
2140 /* Return the union class of CLASS1 and CLASS2.
2141 See the x86-64 PS ABI for details. */
2143 static enum x86_64_reg_class
2144 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2146 /* Rule #1: If both classes are equal, this is the resulting class. */
2147 if (class1 == class2)
2150 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2152 if (class1 == X86_64_NO_CLASS)
2154 if (class2 == X86_64_NO_CLASS)
2157 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2158 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2159 return X86_64_MEMORY_CLASS;
2161 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2162 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2163 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2164 return X86_64_INTEGERSI_CLASS;
2165 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2166 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2167 return X86_64_INTEGER_CLASS;
2169 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2171 if (class1 == X86_64_X87_CLASS
2172 || class1 == X86_64_X87UP_CLASS
2173 || class1 == X86_64_COMPLEX_X87_CLASS
2174 || class2 == X86_64_X87_CLASS
2175 || class2 == X86_64_X87UP_CLASS
2176 || class2 == X86_64_COMPLEX_X87_CLASS)
2177 return X86_64_MEMORY_CLASS;
2179 /* Rule #6: Otherwise class SSE is used. */
2180 return X86_64_SSE_CLASS;
2183 /* Classify the argument of type TYPE and mode MODE.
2184 CLASSES will be filled by the register class used to pass each word
2185 of the operand. The number of words is returned. In case the parameter
2186 should be passed in memory, 0 is returned. As a special case for zero
2187 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2189 BIT_OFFSET is used internally for handling records and specifies offset
2190 of the offset in bits modulo 256 to avoid overflow cases.
2192 See the x86-64 PS ABI for details.
2196 classify_argument (enum machine_mode mode, tree type,
2197 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2199 HOST_WIDE_INT bytes =
2200 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2201 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2203 /* Variable sized entities are always passed/returned in memory. */
2207 if (mode != VOIDmode
2208 && targetm.calls.must_pass_in_stack (mode, type))
2211 if (type && AGGREGATE_TYPE_P (type))
2215 enum x86_64_reg_class subclasses[MAX_CLASSES];
2217 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2221 for (i = 0; i < words; i++)
2222 classes[i] = X86_64_NO_CLASS;
2224 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2225 signalize memory class, so handle it as special case. */
2228 classes[0] = X86_64_NO_CLASS;
2232 /* Classify each field of record and merge classes. */
2233 if (TREE_CODE (type) == RECORD_TYPE)
2235 /* For classes first merge in the field of the subclasses. */
2236 if (TYPE_BINFO (type))
2238 tree binfo, base_binfo;
2241 for (binfo = TYPE_BINFO (type), basenum = 0;
2242 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2245 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2246 tree type = BINFO_TYPE (base_binfo);
2248 num = classify_argument (TYPE_MODE (type),
2250 (offset + bit_offset) % 256);
2253 for (i = 0; i < num; i++)
2255 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2257 merge_classes (subclasses[i], classes[i + pos]);
2261 /* And now merge the fields of structure. */
2262 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2264 if (TREE_CODE (field) == FIELD_DECL)
2268 /* Bitfields are always classified as integer. Handle them
2269 early, since later code would consider them to be
2270 misaligned integers. */
2271 if (DECL_BIT_FIELD (field))
2273 for (i = int_bit_position (field) / 8 / 8;
2274 i < (int_bit_position (field)
2275 + tree_low_cst (DECL_SIZE (field), 0)
2278 merge_classes (X86_64_INTEGER_CLASS,
2283 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2284 TREE_TYPE (field), subclasses,
2285 (int_bit_position (field)
2286 + bit_offset) % 256);
2289 for (i = 0; i < num; i++)
2292 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2294 merge_classes (subclasses[i], classes[i + pos]);
2300 /* Arrays are handled as small records. */
2301 else if (TREE_CODE (type) == ARRAY_TYPE)
2304 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2305 TREE_TYPE (type), subclasses, bit_offset);
2309 /* The partial classes are now full classes. */
2310 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2311 subclasses[0] = X86_64_SSE_CLASS;
2312 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2313 subclasses[0] = X86_64_INTEGER_CLASS;
2315 for (i = 0; i < words; i++)
2316 classes[i] = subclasses[i % num];
2318 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2319 else if (TREE_CODE (type) == UNION_TYPE
2320 || TREE_CODE (type) == QUAL_UNION_TYPE)
2322 /* For classes first merge in the field of the subclasses. */
2323 if (TYPE_BINFO (type))
2325 tree binfo, base_binfo;
2328 for (binfo = TYPE_BINFO (type), basenum = 0;
2329 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2332 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2333 tree type = BINFO_TYPE (base_binfo);
2335 num = classify_argument (TYPE_MODE (type),
2337 (offset + (bit_offset % 64)) % 256);
2340 for (i = 0; i < num; i++)
2342 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2344 merge_classes (subclasses[i], classes[i + pos]);
2348 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2350 if (TREE_CODE (field) == FIELD_DECL)
2353 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2354 TREE_TYPE (field), subclasses,
2358 for (i = 0; i < num; i++)
2359 classes[i] = merge_classes (subclasses[i], classes[i]);
2366 /* Final merger cleanup. */
2367 for (i = 0; i < words; i++)
2369 /* If one class is MEMORY, everything should be passed in
2371 if (classes[i] == X86_64_MEMORY_CLASS)
2374 /* The X86_64_SSEUP_CLASS should be always preceded by
2375 X86_64_SSE_CLASS. */
2376 if (classes[i] == X86_64_SSEUP_CLASS
2377 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2378 classes[i] = X86_64_SSE_CLASS;
2380 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2381 if (classes[i] == X86_64_X87UP_CLASS
2382 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2383 classes[i] = X86_64_SSE_CLASS;
2388 /* Compute alignment needed. We align all types to natural boundaries with
2389 exception of XFmode that is aligned to 64bits. */
2390 if (mode != VOIDmode && mode != BLKmode)
2392 int mode_alignment = GET_MODE_BITSIZE (mode);
2395 mode_alignment = 128;
2396 else if (mode == XCmode)
2397 mode_alignment = 256;
2398 if (COMPLEX_MODE_P (mode))
2399 mode_alignment /= 2;
2400 /* Misaligned fields are always returned in memory. */
2401 if (bit_offset % mode_alignment)
2405 /* for V1xx modes, just use the base mode */
2406 if (VECTOR_MODE_P (mode)
2407 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2408 mode = GET_MODE_INNER (mode);
2410 /* Classification of atomic types. */
2420 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2421 classes[0] = X86_64_INTEGERSI_CLASS;
2423 classes[0] = X86_64_INTEGER_CLASS;
2427 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2432 if (!(bit_offset % 64))
2433 classes[0] = X86_64_SSESF_CLASS;
2435 classes[0] = X86_64_SSE_CLASS;
2438 classes[0] = X86_64_SSEDF_CLASS;
2441 classes[0] = X86_64_X87_CLASS;
2442 classes[1] = X86_64_X87UP_CLASS;
2445 classes[0] = X86_64_SSE_CLASS;
2446 classes[1] = X86_64_SSEUP_CLASS;
2449 classes[0] = X86_64_SSE_CLASS;
2452 classes[0] = X86_64_SSEDF_CLASS;
2453 classes[1] = X86_64_SSEDF_CLASS;
2456 classes[0] = X86_64_COMPLEX_X87_CLASS;
2459 /* This modes is larger than 16 bytes. */
2467 classes[0] = X86_64_SSE_CLASS;
2468 classes[1] = X86_64_SSEUP_CLASS;
2474 classes[0] = X86_64_SSE_CLASS;
2480 if (VECTOR_MODE_P (mode))
2484 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2486 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2487 classes[0] = X86_64_INTEGERSI_CLASS;
2489 classes[0] = X86_64_INTEGER_CLASS;
2490 classes[1] = X86_64_INTEGER_CLASS;
2491 return 1 + (bytes > 8);
2498 /* Examine the argument and return set number of register required in each
2499 class. Return 0 iff parameter should be passed in memory. */
2501 examine_argument (enum machine_mode mode, tree type, int in_return,
2502 int *int_nregs, int *sse_nregs)
2504 enum x86_64_reg_class class[MAX_CLASSES];
2505 int n = classify_argument (mode, type, class, 0);
2511 for (n--; n >= 0; n--)
2514 case X86_64_INTEGER_CLASS:
2515 case X86_64_INTEGERSI_CLASS:
2518 case X86_64_SSE_CLASS:
2519 case X86_64_SSESF_CLASS:
2520 case X86_64_SSEDF_CLASS:
2523 case X86_64_NO_CLASS:
2524 case X86_64_SSEUP_CLASS:
2526 case X86_64_X87_CLASS:
2527 case X86_64_X87UP_CLASS:
2531 case X86_64_COMPLEX_X87_CLASS:
2532 return in_return ? 2 : 0;
2533 case X86_64_MEMORY_CLASS:
2539 /* Construct container for the argument used by GCC interface. See
2540 FUNCTION_ARG for the detailed description. */
2543 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2544 tree type, int in_return, int nintregs, int nsseregs,
2545 const int *intreg, int sse_regno)
2547 enum machine_mode tmpmode;
2549 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2550 enum x86_64_reg_class class[MAX_CLASSES];
2554 int needed_sseregs, needed_intregs;
2555 rtx exp[MAX_CLASSES];
2558 n = classify_argument (mode, type, class, 0);
2559 if (TARGET_DEBUG_ARG)
2562 fprintf (stderr, "Memory class\n");
2565 fprintf (stderr, "Classes:");
2566 for (i = 0; i < n; i++)
2568 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2570 fprintf (stderr, "\n");
2575 if (!examine_argument (mode, type, in_return, &needed_intregs,
2578 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2581 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2582 some less clueful developer tries to use floating-point anyway. */
2583 if (needed_sseregs && !TARGET_SSE)
2585 static bool issued_error;
2588 issued_error = true;
2590 error ("SSE register return with SSE disabled");
2592 error ("SSE register argument with SSE disabled");
2597 /* First construct simple cases. Avoid SCmode, since we want to use
2598 single register to pass this type. */
2599 if (n == 1 && mode != SCmode)
2602 case X86_64_INTEGER_CLASS:
2603 case X86_64_INTEGERSI_CLASS:
2604 return gen_rtx_REG (mode, intreg[0]);
2605 case X86_64_SSE_CLASS:
2606 case X86_64_SSESF_CLASS:
2607 case X86_64_SSEDF_CLASS:
2608 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2609 case X86_64_X87_CLASS:
2610 case X86_64_COMPLEX_X87_CLASS:
2611 return gen_rtx_REG (mode, FIRST_STACK_REG);
2612 case X86_64_NO_CLASS:
2613 /* Zero sized array, struct or class. */
2618 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2620 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2622 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2623 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2624 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2625 && class[1] == X86_64_INTEGER_CLASS
2626 && (mode == CDImode || mode == TImode || mode == TFmode)
2627 && intreg[0] + 1 == intreg[1])
2628 return gen_rtx_REG (mode, intreg[0]);
2630 /* Otherwise figure out the entries of the PARALLEL. */
2631 for (i = 0; i < n; i++)
2635 case X86_64_NO_CLASS:
2637 case X86_64_INTEGER_CLASS:
2638 case X86_64_INTEGERSI_CLASS:
2639 /* Merge TImodes on aligned occasions here too. */
2640 if (i * 8 + 8 > bytes)
2641 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2642 else if (class[i] == X86_64_INTEGERSI_CLASS)
2646 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2647 if (tmpmode == BLKmode)
2649 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2650 gen_rtx_REG (tmpmode, *intreg),
2654 case X86_64_SSESF_CLASS:
2655 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2656 gen_rtx_REG (SFmode,
2657 SSE_REGNO (sse_regno)),
2661 case X86_64_SSEDF_CLASS:
2662 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2663 gen_rtx_REG (DFmode,
2664 SSE_REGNO (sse_regno)),
2668 case X86_64_SSE_CLASS:
2669 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2673 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2674 gen_rtx_REG (tmpmode,
2675 SSE_REGNO (sse_regno)),
2677 if (tmpmode == TImode)
2685 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2686 for (i = 0; i < nexps; i++)
2687 XVECEXP (ret, 0, i) = exp [i];
2691 /* Update the data in CUM to advance over an argument
2692 of mode MODE and data type TYPE.
2693 (TYPE is null for libcalls where that information may not be available.) */
2696 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2697 tree type, int named)
2700 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2701 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2704 mode = type_natural_mode (type);
2706 if (TARGET_DEBUG_ARG)
2707 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2708 "mode=%s, named=%d)\n\n",
2709 words, cum->words, cum->nregs, cum->sse_nregs,
2710 GET_MODE_NAME (mode), named);
2714 int int_nregs, sse_nregs;
2715 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2716 cum->words += words;
2717 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2719 cum->nregs -= int_nregs;
2720 cum->sse_nregs -= sse_nregs;
2721 cum->regno += int_nregs;
2722 cum->sse_regno += sse_nregs;
2725 cum->words += words;
2743 cum->words += words;
2744 cum->nregs -= words;
2745 cum->regno += words;
2747 if (cum->nregs <= 0)
2758 if (!cum->float_in_sse)
2769 if (!type || !AGGREGATE_TYPE_P (type))
2771 cum->sse_words += words;
2772 cum->sse_nregs -= 1;
2773 cum->sse_regno += 1;
2774 if (cum->sse_nregs <= 0)
2786 if (!type || !AGGREGATE_TYPE_P (type))
2788 cum->mmx_words += words;
2789 cum->mmx_nregs -= 1;
2790 cum->mmx_regno += 1;
2791 if (cum->mmx_nregs <= 0)
2802 /* Define where to put the arguments to a function.
2803 Value is zero to push the argument on the stack,
2804 or a hard register in which to store the argument.
2806 MODE is the argument's machine mode.
2807 TYPE is the data type of the argument (as a tree).
2808 This is null for libcalls where that information may
2810 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2811 the preceding args and about the function being called.
2812 NAMED is nonzero if this argument is a named parameter
2813 (otherwise it is an extra parameter matching an ellipsis). */
2816 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2817 tree type, int named)
2819 enum machine_mode mode = orig_mode;
2822 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2823 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2824 static bool warnedsse, warnedmmx;
2826 /* To simplify the code below, represent vector types with a vector mode
2827 even if MMX/SSE are not active. */
2828 if (type && TREE_CODE (type) == VECTOR_TYPE)
2829 mode = type_natural_mode (type);
2831 /* Handle a hidden AL argument containing number of registers for varargs
2832 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2834 if (mode == VOIDmode)
2837 return GEN_INT (cum->maybe_vaarg
2838 ? (cum->sse_nregs < 0
2846 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2848 &x86_64_int_parameter_registers [cum->regno],
2853 /* For now, pass fp/complex values on the stack. */
2865 if (words <= cum->nregs)
2867 int regno = cum->regno;
2869 /* Fastcall allocates the first two DWORD (SImode) or
2870 smaller arguments to ECX and EDX. */
2873 if (mode == BLKmode || mode == DImode)
2876 /* ECX not EAX is the first allocated register. */
2880 ret = gen_rtx_REG (mode, regno);
2887 if (!cum->float_in_sse)
2897 if (!type || !AGGREGATE_TYPE_P (type))
2899 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2902 warning ("SSE vector argument without SSE enabled "
2906 ret = gen_reg_or_parallel (mode, orig_mode,
2907 cum->sse_regno + FIRST_SSE_REG);
2914 if (!type || !AGGREGATE_TYPE_P (type))
2916 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2919 warning ("MMX vector argument without MMX enabled "
2923 ret = gen_reg_or_parallel (mode, orig_mode,
2924 cum->mmx_regno + FIRST_MMX_REG);
2929 if (TARGET_DEBUG_ARG)
2932 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2933 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2936 print_simple_rtl (stderr, ret);
2938 fprintf (stderr, ", stack");
2940 fprintf (stderr, " )\n");
2946 /* A C expression that indicates when an argument must be passed by
2947 reference. If nonzero for an argument, a copy of that argument is
2948 made in memory and a pointer to the argument is passed instead of
2949 the argument itself. The pointer is passed in whatever way is
2950 appropriate for passing a pointer to that type. */
2953 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2954 enum machine_mode mode ATTRIBUTE_UNUSED,
2955 tree type, bool named ATTRIBUTE_UNUSED)
2960 if (type && int_size_in_bytes (type) == -1)
2962 if (TARGET_DEBUG_ARG)
2963 fprintf (stderr, "function_arg_pass_by_reference\n");
2970 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2971 ABI. Only called if TARGET_SSE. */
2973 contains_128bit_aligned_vector_p (tree type)
2975 enum machine_mode mode = TYPE_MODE (type);
2976 if (SSE_REG_MODE_P (mode)
2977 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2979 if (TYPE_ALIGN (type) < 128)
2982 if (AGGREGATE_TYPE_P (type))
2984 /* Walk the aggregates recursively. */
2985 if (TREE_CODE (type) == RECORD_TYPE
2986 || TREE_CODE (type) == UNION_TYPE
2987 || TREE_CODE (type) == QUAL_UNION_TYPE)
2991 if (TYPE_BINFO (type))
2993 tree binfo, base_binfo;
2996 for (binfo = TYPE_BINFO (type), i = 0;
2997 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2998 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
3001 /* And now merge the fields of structure. */
3002 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3004 if (TREE_CODE (field) == FIELD_DECL
3005 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3009 /* Just for use if some languages passes arrays by value. */
3010 else if (TREE_CODE (type) == ARRAY_TYPE)
3012 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3021 /* Gives the alignment boundary, in bits, of an argument with the
3022 specified mode and type. */
3025 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3029 align = TYPE_ALIGN (type);
3031 align = GET_MODE_ALIGNMENT (mode);
3032 if (align < PARM_BOUNDARY)
3033 align = PARM_BOUNDARY;
3036 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3037 make an exception for SSE modes since these require 128bit
3040 The handling here differs from field_alignment. ICC aligns MMX
3041 arguments to 4 byte boundaries, while structure fields are aligned
3042 to 8 byte boundaries. */
3044 align = PARM_BOUNDARY;
3047 if (!SSE_REG_MODE_P (mode))
3048 align = PARM_BOUNDARY;
3052 if (!contains_128bit_aligned_vector_p (type))
3053 align = PARM_BOUNDARY;
3061 /* Return true if N is a possible register number of function value. */
3063 ix86_function_value_regno_p (int regno)
3067 return ((regno) == 0
3068 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3069 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3071 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3072 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3073 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3076 /* Define how to find the value returned by a function.
3077 VALTYPE is the data type of the value (as a tree).
3078 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3079 otherwise, FUNC is 0. */
3081 ix86_function_value (tree valtype, tree func)
3083 enum machine_mode natmode = type_natural_mode (valtype);
3087 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3088 1, REGPARM_MAX, SSE_REGPARM_MAX,
3089 x86_64_int_return_registers, 0);
3090 /* For zero sized structures, construct_container return NULL, but we
3091 need to keep rest of compiler happy by returning meaningful value. */
3093 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3097 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
3100 /* Return false iff type is returned in memory. */
3102 ix86_return_in_memory (tree type)
3104 int needed_intregs, needed_sseregs, size;
3105 enum machine_mode mode = type_natural_mode (type);
3108 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3110 if (mode == BLKmode)
3113 size = int_size_in_bytes (type);
3115 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3118 if (VECTOR_MODE_P (mode) || mode == TImode)
3120 /* User-created vectors small enough to fit in EAX. */
3124 /* MMX/3dNow values are returned on the stack, since we've
3125 got to EMMS/FEMMS before returning. */
3129 /* SSE values are returned in XMM0, except when it doesn't exist. */
3131 return (TARGET_SSE ? 0 : 1);
3142 /* When returning SSE vector types, we have a choice of either
3143 (1) being abi incompatible with a -march switch, or
3144 (2) generating an error.
3145 Given no good solution, I think the safest thing is one warning.
3146 The user won't be able to use -Werror, but....
3148 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3149 called in response to actually generating a caller or callee that
3150 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3151 via aggregate_value_p for general type probing from tree-ssa. */
3154 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3158 if (!TARGET_SSE && type && !warned)
3160 /* Look at the return type of the function, not the function type. */
3161 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3164 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3167 warning ("SSE vector return without SSE enabled changes the ABI");
3174 /* Define how to find the value returned by a library function
3175 assuming the value has mode MODE. */
3177 ix86_libcall_value (enum machine_mode mode)
3188 return gen_rtx_REG (mode, FIRST_SSE_REG);
3191 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3195 return gen_rtx_REG (mode, 0);
3199 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
3202 /* Given a mode, return the register to use for a return value. */
3205 ix86_value_regno (enum machine_mode mode, tree func)
3207 gcc_assert (!TARGET_64BIT);
3209 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3210 we prevent this case when sse is not available. */
3211 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3212 return FIRST_SSE_REG;
3214 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3215 if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3218 /* Floating point return values in %st(0), except for local functions when
3219 SSE math is enabled. */
3220 if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
3221 && flag_unit_at_a_time)
3223 struct cgraph_local_info *i = cgraph_local_info (func);
3225 return FIRST_SSE_REG;
3228 return FIRST_FLOAT_REG;
3231 /* Create the va_list data type. */
3234 ix86_build_builtin_va_list (void)
3236 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3238 /* For i386 we use plain pointer to argument area. */
3240 return build_pointer_type (char_type_node);
3242 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3243 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3245 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3246 unsigned_type_node);
3247 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3248 unsigned_type_node);
3249 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3251 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3254 DECL_FIELD_CONTEXT (f_gpr) = record;
3255 DECL_FIELD_CONTEXT (f_fpr) = record;
3256 DECL_FIELD_CONTEXT (f_ovf) = record;
3257 DECL_FIELD_CONTEXT (f_sav) = record;
3259 TREE_CHAIN (record) = type_decl;
3260 TYPE_NAME (record) = type_decl;
3261 TYPE_FIELDS (record) = f_gpr;
3262 TREE_CHAIN (f_gpr) = f_fpr;
3263 TREE_CHAIN (f_fpr) = f_ovf;
3264 TREE_CHAIN (f_ovf) = f_sav;
3266 layout_type (record);
3268 /* The correct type is an array type of one element. */
3269 return build_array_type (record, build_index_type (size_zero_node));
3272 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3275 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3276 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3279 CUMULATIVE_ARGS next_cum;
3280 rtx save_area = NULL_RTX, mem;
3293 /* Indicate to allocate space on the stack for varargs save area. */
3294 ix86_save_varrargs_registers = 1;
3296 cfun->stack_alignment_needed = 128;
3298 fntype = TREE_TYPE (current_function_decl);
3299 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3300 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3301 != void_type_node));
3303 /* For varargs, we do not want to skip the dummy va_dcl argument.
3304 For stdargs, we do want to skip the last named argument. */
3307 function_arg_advance (&next_cum, mode, type, 1);
3310 save_area = frame_pointer_rtx;
3312 set = get_varargs_alias_set ();
3314 for (i = next_cum.regno; i < ix86_regparm; i++)
3316 mem = gen_rtx_MEM (Pmode,
3317 plus_constant (save_area, i * UNITS_PER_WORD));
3318 set_mem_alias_set (mem, set);
3319 emit_move_insn (mem, gen_rtx_REG (Pmode,
3320 x86_64_int_parameter_registers[i]));
3323 if (next_cum.sse_nregs)
3325 /* Now emit code to save SSE registers. The AX parameter contains number
3326 of SSE parameter registers used to call this function. We use
3327 sse_prologue_save insn template that produces computed jump across
3328 SSE saves. We need some preparation work to get this working. */
3330 label = gen_label_rtx ();
3331 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3333 /* Compute address to jump to :
3334 label - 5*eax + nnamed_sse_arguments*5 */
3335 tmp_reg = gen_reg_rtx (Pmode);
3336 nsse_reg = gen_reg_rtx (Pmode);
3337 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3338 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3339 gen_rtx_MULT (Pmode, nsse_reg,
3341 if (next_cum.sse_regno)
3344 gen_rtx_CONST (DImode,
3345 gen_rtx_PLUS (DImode,
3347 GEN_INT (next_cum.sse_regno * 4))));
3349 emit_move_insn (nsse_reg, label_ref);
3350 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3352 /* Compute address of memory block we save into. We always use pointer
3353 pointing 127 bytes after first byte to store - this is needed to keep
3354 instruction size limited by 4 bytes. */
3355 tmp_reg = gen_reg_rtx (Pmode);
3356 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3357 plus_constant (save_area,
3358 8 * REGPARM_MAX + 127)));
3359 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3360 set_mem_alias_set (mem, set);
3361 set_mem_align (mem, BITS_PER_WORD);
3363 /* And finally do the dirty job! */
3364 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3365 GEN_INT (next_cum.sse_regno), label));
3370 /* Implement va_start. */
3373 ix86_va_start (tree valist, rtx nextarg)
3375 HOST_WIDE_INT words, n_gpr, n_fpr;
3376 tree f_gpr, f_fpr, f_ovf, f_sav;
3377 tree gpr, fpr, ovf, sav, t;
3379 /* Only 64bit target needs something special. */
3382 std_expand_builtin_va_start (valist, nextarg);
3386 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3387 f_fpr = TREE_CHAIN (f_gpr);
3388 f_ovf = TREE_CHAIN (f_fpr);
3389 f_sav = TREE_CHAIN (f_ovf);
3391 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3392 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3393 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3394 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3395 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3397 /* Count number of gp and fp argument registers used. */
3398 words = current_function_args_info.words;
3399 n_gpr = current_function_args_info.regno;
3400 n_fpr = current_function_args_info.sse_regno;
3402 if (TARGET_DEBUG_ARG)
3403 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3404 (int) words, (int) n_gpr, (int) n_fpr);
3406 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3407 build_int_cst (NULL_TREE, n_gpr * 8));
3408 TREE_SIDE_EFFECTS (t) = 1;
3409 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3411 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3412 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3413 TREE_SIDE_EFFECTS (t) = 1;
3414 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3416 /* Find the overflow area. */
3417 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3419 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3420 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3421 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3422 TREE_SIDE_EFFECTS (t) = 1;
3423 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3425 /* Find the register save area.
3426 Prologue of the function save it right above stack frame. */
3427 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3428 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3429 TREE_SIDE_EFFECTS (t) = 1;
3430 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3433 /* Implement va_arg. */
3436 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3438 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3439 tree f_gpr, f_fpr, f_ovf, f_sav;
3440 tree gpr, fpr, ovf, sav, t;
3442 tree lab_false, lab_over = NULL_TREE;
3447 enum machine_mode nat_mode;
3449 /* Only 64bit target needs something special. */
3451 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3453 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3454 f_fpr = TREE_CHAIN (f_gpr);
3455 f_ovf = TREE_CHAIN (f_fpr);
3456 f_sav = TREE_CHAIN (f_ovf);
3458 valist = build_va_arg_indirect_ref (valist);
3459 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3460 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3461 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3462 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3464 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3466 type = build_pointer_type (type);
3467 size = int_size_in_bytes (type);
3468 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3470 nat_mode = type_natural_mode (type);
3471 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3472 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3474 /* Pull the value out of the saved registers. */
3476 addr = create_tmp_var (ptr_type_node, "addr");
3477 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3481 int needed_intregs, needed_sseregs;
3483 tree int_addr, sse_addr;
3485 lab_false = create_artificial_label ();
3486 lab_over = create_artificial_label ();
3488 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3490 need_temp = (!REG_P (container)
3491 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3492 || TYPE_ALIGN (type) > 128));
3494 /* In case we are passing structure, verify that it is consecutive block
3495 on the register save area. If not we need to do moves. */
3496 if (!need_temp && !REG_P (container))
3498 /* Verify that all registers are strictly consecutive */
3499 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3503 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3505 rtx slot = XVECEXP (container, 0, i);
3506 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3507 || INTVAL (XEXP (slot, 1)) != i * 16)
3515 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3517 rtx slot = XVECEXP (container, 0, i);
3518 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3519 || INTVAL (XEXP (slot, 1)) != i * 8)
3531 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3532 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3533 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3534 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3537 /* First ensure that we fit completely in registers. */
3540 t = build_int_cst (TREE_TYPE (gpr),
3541 (REGPARM_MAX - needed_intregs + 1) * 8);
3542 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3543 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3544 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3545 gimplify_and_add (t, pre_p);
3549 t = build_int_cst (TREE_TYPE (fpr),
3550 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3552 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3553 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3554 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3555 gimplify_and_add (t, pre_p);
3558 /* Compute index to start of area used for integer regs. */
3561 /* int_addr = gpr + sav; */
3562 t = fold_convert (ptr_type_node, gpr);
3563 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3564 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3565 gimplify_and_add (t, pre_p);
3569 /* sse_addr = fpr + sav; */
3570 t = fold_convert (ptr_type_node, fpr);
3571 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3572 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3573 gimplify_and_add (t, pre_p);
3578 tree temp = create_tmp_var (type, "va_arg_tmp");
3581 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3582 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3583 gimplify_and_add (t, pre_p);
3585 for (i = 0; i < XVECLEN (container, 0); i++)
3587 rtx slot = XVECEXP (container, 0, i);
3588 rtx reg = XEXP (slot, 0);
3589 enum machine_mode mode = GET_MODE (reg);
3590 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3591 tree addr_type = build_pointer_type (piece_type);
3594 tree dest_addr, dest;
3596 if (SSE_REGNO_P (REGNO (reg)))
3598 src_addr = sse_addr;
3599 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3603 src_addr = int_addr;
3604 src_offset = REGNO (reg) * 8;
3606 src_addr = fold_convert (addr_type, src_addr);
3607 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3608 size_int (src_offset)));
3609 src = build_va_arg_indirect_ref (src_addr);
3611 dest_addr = fold_convert (addr_type, addr);
3612 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3613 size_int (INTVAL (XEXP (slot, 1)))));
3614 dest = build_va_arg_indirect_ref (dest_addr);
3616 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3617 gimplify_and_add (t, pre_p);
3623 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3624 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3625 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3626 gimplify_and_add (t, pre_p);
3630 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3631 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3632 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3633 gimplify_and_add (t, pre_p);
3636 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3637 gimplify_and_add (t, pre_p);
3639 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3640 append_to_statement_list (t, pre_p);
3643 /* ... otherwise out of the overflow area. */
3645 /* Care for on-stack alignment if needed. */
3646 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3650 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3651 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3652 build_int_cst (TREE_TYPE (ovf), align - 1));
3653 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3654 build_int_cst (TREE_TYPE (t), -align));
3656 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3658 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3659 gimplify_and_add (t2, pre_p);
3661 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3662 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3663 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3664 gimplify_and_add (t, pre_p);
3668 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3669 append_to_statement_list (t, pre_p);
3672 ptrtype = build_pointer_type (type);
3673 addr = fold_convert (ptrtype, addr);
3676 addr = build_va_arg_indirect_ref (addr);
3677 return build_va_arg_indirect_ref (addr);
3680 /* Return nonzero if OPNUM's MEM should be matched
3681 in movabs* patterns. */
3684 ix86_check_movabs (rtx insn, int opnum)
3688 set = PATTERN (insn);
3689 if (GET_CODE (set) == PARALLEL)
3690 set = XVECEXP (set, 0, 0);
3691 if (GET_CODE (set) != SET)
3693 mem = XEXP (set, opnum);
3694 while (GET_CODE (mem) == SUBREG)
3695 mem = SUBREG_REG (mem);
3696 if (GET_CODE (mem) != MEM)
3698 return (volatile_ok || !MEM_VOLATILE_P (mem));
3701 /* Initialize the table of extra 80387 mathematical constants. */
3704 init_ext_80387_constants (void)
3706 static const char * cst[5] =
3708 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3709 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3710 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3711 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3712 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3716 for (i = 0; i < 5; i++)
3718 real_from_string (&ext_80387_constants_table[i], cst[i]);
3719 /* Ensure each constant is rounded to XFmode precision. */
3720 real_convert (&ext_80387_constants_table[i],
3721 XFmode, &ext_80387_constants_table[i]);
3724 ext_80387_constants_init = 1;
3727 /* Return true if the constant is something that can be loaded with
3728 a special instruction. */
3731 standard_80387_constant_p (rtx x)
3733 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3736 if (x == CONST0_RTX (GET_MODE (x)))
3738 if (x == CONST1_RTX (GET_MODE (x)))
3741 /* For XFmode constants, try to find a special 80387 instruction when
3742 optimizing for size or on those CPUs that benefit from them. */
3743 if (GET_MODE (x) == XFmode
3744 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3749 if (! ext_80387_constants_init)
3750 init_ext_80387_constants ();
3752 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3753 for (i = 0; i < 5; i++)
3754 if (real_identical (&r, &ext_80387_constants_table[i]))
3761 /* Return the opcode of the special instruction to be used to load
3765 standard_80387_constant_opcode (rtx x)
3767 switch (standard_80387_constant_p (x))
3787 /* Return the CONST_DOUBLE representing the 80387 constant that is
3788 loaded by the specified special instruction. The argument IDX
3789 matches the return value from standard_80387_constant_p. */
3792 standard_80387_constant_rtx (int idx)
3796 if (! ext_80387_constants_init)
3797 init_ext_80387_constants ();
3813 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3817 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3820 standard_sse_constant_p (rtx x)
3822 if (x == const0_rtx)
3824 return (x == CONST0_RTX (GET_MODE (x)));
3827 /* Returns 1 if OP contains a symbol reference */
3830 symbolic_reference_mentioned_p (rtx op)
3835 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3838 fmt = GET_RTX_FORMAT (GET_CODE (op));
3839 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3845 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3846 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3850 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3857 /* Return 1 if it is appropriate to emit `ret' instructions in the
3858 body of a function. Do this only if the epilogue is simple, needing a
3859 couple of insns. Prior to reloading, we can't tell how many registers
3860 must be saved, so return 0 then. Return 0 if there is no frame
3861 marker to de-allocate. */
3864 ix86_can_use_return_insn_p (void)
3866 struct ix86_frame frame;
3868 if (! reload_completed || frame_pointer_needed)
3871 /* Don't allow more than 32 pop, since that's all we can do
3872 with one instruction. */
3873 if (current_function_pops_args
3874 && current_function_args_size >= 32768)
3877 ix86_compute_frame_layout (&frame);
3878 return frame.to_allocate == 0 && frame.nregs == 0;
3881 /* Value should be nonzero if functions must have frame pointers.
3882 Zero means the frame pointer need not be set up (and parms may
3883 be accessed via the stack pointer) in functions that seem suitable. */
3886 ix86_frame_pointer_required (void)
3888 /* If we accessed previous frames, then the generated code expects
3889 to be able to access the saved ebp value in our frame. */
3890 if (cfun->machine->accesses_prev_frame)
3893 /* Several x86 os'es need a frame pointer for other reasons,
3894 usually pertaining to setjmp. */
3895 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3898 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3899 the frame pointer by default. Turn it back on now if we've not
3900 got a leaf function. */
3901 if (TARGET_OMIT_LEAF_FRAME_POINTER
3902 && (!current_function_is_leaf))
3905 if (current_function_profile)
3911 /* Record that the current function accesses previous call frames. */
3914 ix86_setup_frame_addresses (void)
3916 cfun->machine->accesses_prev_frame = 1;
3919 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3920 # define USE_HIDDEN_LINKONCE 1
3922 # define USE_HIDDEN_LINKONCE 0
3925 static int pic_labels_used;
3927 /* Fills in the label name that should be used for a pc thunk for
3928 the given register. */
3931 get_pc_thunk_name (char name[32], unsigned int regno)
3933 if (USE_HIDDEN_LINKONCE)
3934 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3936 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3940 /* This function generates code for -fpic that loads %ebx with
3941 the return address of the caller and then returns. */
3944 ix86_file_end (void)
3949 for (regno = 0; regno < 8; ++regno)
3953 if (! ((pic_labels_used >> regno) & 1))
3956 get_pc_thunk_name (name, regno);
3958 if (USE_HIDDEN_LINKONCE)
3962 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3964 TREE_PUBLIC (decl) = 1;
3965 TREE_STATIC (decl) = 1;
3966 DECL_ONE_ONLY (decl) = 1;
3968 (*targetm.asm_out.unique_section) (decl, 0);
3969 named_section (decl, NULL, 0);
3971 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3972 fputs ("\t.hidden\t", asm_out_file);
3973 assemble_name (asm_out_file, name);
3974 fputc ('\n', asm_out_file);
3975 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3980 ASM_OUTPUT_LABEL (asm_out_file, name);
3983 xops[0] = gen_rtx_REG (SImode, regno);
3984 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3985 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3986 output_asm_insn ("ret", xops);
3989 if (NEED_INDICATE_EXEC_STACK)
3990 file_end_indicate_exec_stack ();
3993 /* Emit code for the SET_GOT patterns. */
3996 output_set_got (rtx dest)
4001 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4003 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4005 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4008 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4010 output_asm_insn ("call\t%a2", xops);
4013 /* Output the "canonical" label name ("Lxx$pb") here too. This
4014 is what will be referred to by the Mach-O PIC subsystem. */
4015 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4017 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4018 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4021 output_asm_insn ("pop{l}\t%0", xops);
4026 get_pc_thunk_name (name, REGNO (dest));
4027 pic_labels_used |= 1 << REGNO (dest);
4029 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4030 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4031 output_asm_insn ("call\t%X2", xops);
4034 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4035 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4036 else if (!TARGET_MACHO)
4037 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4042 /* Generate an "push" pattern for input ARG. */
4047 return gen_rtx_SET (VOIDmode,
4049 gen_rtx_PRE_DEC (Pmode,
4050 stack_pointer_rtx)),
4054 /* Return >= 0 if there is an unused call-clobbered register available
4055 for the entire function. */
4058 ix86_select_alt_pic_regnum (void)
4060 if (current_function_is_leaf && !current_function_profile)
4063 for (i = 2; i >= 0; --i)
4064 if (!regs_ever_live[i])
4068 return INVALID_REGNUM;
4071 /* Return 1 if we need to save REGNO. */
4073 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4075 if (pic_offset_table_rtx
4076 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4077 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4078 || current_function_profile
4079 || current_function_calls_eh_return
4080 || current_function_uses_const_pool))
4082 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4087 if (current_function_calls_eh_return && maybe_eh_return)
4092 unsigned test = EH_RETURN_DATA_REGNO (i);
4093 if (test == INVALID_REGNUM)
4100 return (regs_ever_live[regno]
4101 && !call_used_regs[regno]
4102 && !fixed_regs[regno]
4103 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4106 /* Return number of registers to be saved on the stack. */
4109 ix86_nsaved_regs (void)
4114 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4115 if (ix86_save_reg (regno, true))
4120 /* Return the offset between two registers, one to be eliminated, and the other
4121 its replacement, at the start of a routine. */
4124 ix86_initial_elimination_offset (int from, int to)
4126 struct ix86_frame frame;
4127 ix86_compute_frame_layout (&frame);
4129 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4130 return frame.hard_frame_pointer_offset;
4131 else if (from == FRAME_POINTER_REGNUM
4132 && to == HARD_FRAME_POINTER_REGNUM)
4133 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4136 if (to != STACK_POINTER_REGNUM)
4138 else if (from == ARG_POINTER_REGNUM)
4139 return frame.stack_pointer_offset;
4140 else if (from != FRAME_POINTER_REGNUM)
4143 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4147 /* Fill structure ix86_frame about frame of currently computed function. */
4150 ix86_compute_frame_layout (struct ix86_frame *frame)
4152 HOST_WIDE_INT total_size;
4153 unsigned int stack_alignment_needed;
4154 HOST_WIDE_INT offset;
4155 unsigned int preferred_alignment;
4156 HOST_WIDE_INT size = get_frame_size ();
4158 frame->nregs = ix86_nsaved_regs ();
4161 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4162 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4164 /* During reload iteration the amount of registers saved can change.
4165 Recompute the value as needed. Do not recompute when amount of registers
4166 didn't change as reload does multiple calls to the function and does not
4167 expect the decision to change within single iteration. */
4169 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4171 int count = frame->nregs;
4173 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4174 /* The fast prologue uses move instead of push to save registers. This
4175 is significantly longer, but also executes faster as modern hardware
4176 can execute the moves in parallel, but can't do that for push/pop.
4178 Be careful about choosing what prologue to emit: When function takes
4179 many instructions to execute we may use slow version as well as in
4180 case function is known to be outside hot spot (this is known with
4181 feedback only). Weight the size of function by number of registers
4182 to save as it is cheap to use one or two push instructions but very
4183 slow to use many of them. */
4185 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4186 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4187 || (flag_branch_probabilities
4188 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4189 cfun->machine->use_fast_prologue_epilogue = false;
4191 cfun->machine->use_fast_prologue_epilogue
4192 = !expensive_function_p (count);
4194 if (TARGET_PROLOGUE_USING_MOVE
4195 && cfun->machine->use_fast_prologue_epilogue)
4196 frame->save_regs_using_mov = true;
4198 frame->save_regs_using_mov = false;
4201 /* Skip return address and saved base pointer. */
4202 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4204 frame->hard_frame_pointer_offset = offset;
4206 /* Do some sanity checking of stack_alignment_needed and
4207 preferred_alignment, since i386 port is the only using those features
4208 that may break easily. */
4210 if (size && !stack_alignment_needed)
4212 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4214 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4216 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4219 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4220 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4222 /* Register save area */
4223 offset += frame->nregs * UNITS_PER_WORD;
4226 if (ix86_save_varrargs_registers)
4228 offset += X86_64_VARARGS_SIZE;
4229 frame->va_arg_size = X86_64_VARARGS_SIZE;
4232 frame->va_arg_size = 0;
4234 /* Align start of frame for local function. */
4235 frame->padding1 = ((offset + stack_alignment_needed - 1)
4236 & -stack_alignment_needed) - offset;
4238 offset += frame->padding1;
4240 /* Frame pointer points here. */
4241 frame->frame_pointer_offset = offset;
4245 /* Add outgoing arguments area. Can be skipped if we eliminated
4246 all the function calls as dead code.
4247 Skipping is however impossible when function calls alloca. Alloca
4248 expander assumes that last current_function_outgoing_args_size
4249 of stack frame are unused. */
4250 if (ACCUMULATE_OUTGOING_ARGS
4251 && (!current_function_is_leaf || current_function_calls_alloca))
4253 offset += current_function_outgoing_args_size;
4254 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4257 frame->outgoing_arguments_size = 0;
4259 /* Align stack boundary. Only needed if we're calling another function
4261 if (!current_function_is_leaf || current_function_calls_alloca)
4262 frame->padding2 = ((offset + preferred_alignment - 1)
4263 & -preferred_alignment) - offset;
4265 frame->padding2 = 0;
4267 offset += frame->padding2;
4269 /* We've reached end of stack frame. */
4270 frame->stack_pointer_offset = offset;
4272 /* Size prologue needs to allocate. */
4273 frame->to_allocate =
4274 (size + frame->padding1 + frame->padding2
4275 + frame->outgoing_arguments_size + frame->va_arg_size);
4277 if ((!frame->to_allocate && frame->nregs <= 1)
4278 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4279 frame->save_regs_using_mov = false;
4281 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4282 && current_function_is_leaf)
4284 frame->red_zone_size = frame->to_allocate;
4285 if (frame->save_regs_using_mov)
4286 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4287 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4288 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4291 frame->red_zone_size = 0;
4292 frame->to_allocate -= frame->red_zone_size;
4293 frame->stack_pointer_offset -= frame->red_zone_size;
4295 fprintf (stderr, "nregs: %i\n", frame->nregs);
4296 fprintf (stderr, "size: %i\n", size);
4297 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4298 fprintf (stderr, "padding1: %i\n", frame->padding1);
4299 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4300 fprintf (stderr, "padding2: %i\n", frame->padding2);
4301 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4302 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4303 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4304 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4305 frame->hard_frame_pointer_offset);
4306 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4310 /* Emit code to save registers in the prologue. */
4313 ix86_emit_save_regs (void)
4318 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4319 if (ix86_save_reg (regno, true))
4321 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4322 RTX_FRAME_RELATED_P (insn) = 1;
4326 /* Emit code to save registers using MOV insns. First register
4327 is restored from POINTER + OFFSET. */
4329 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4334 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4335 if (ix86_save_reg (regno, true))
4337 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4339 gen_rtx_REG (Pmode, regno));
4340 RTX_FRAME_RELATED_P (insn) = 1;
4341 offset += UNITS_PER_WORD;
4345 /* Expand prologue or epilogue stack adjustment.
4346 The pattern exist to put a dependency on all ebp-based memory accesses.
4347 STYLE should be negative if instructions should be marked as frame related,
4348 zero if %r11 register is live and cannot be freely used and positive
4352 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4357 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4358 else if (x86_64_immediate_operand (offset, DImode))
4359 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4363 /* r11 is used by indirect sibcall return as well, set before the
4364 epilogue and used after the epilogue. ATM indirect sibcall
4365 shouldn't be used together with huge frame sizes in one
4366 function because of the frame_size check in sibcall.c. */
4369 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4370 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4372 RTX_FRAME_RELATED_P (insn) = 1;
4373 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4377 RTX_FRAME_RELATED_P (insn) = 1;
4380 /* Expand the prologue into a bunch of separate insns. */
4383 ix86_expand_prologue (void)
4387 struct ix86_frame frame;
4388 HOST_WIDE_INT allocate;
4390 ix86_compute_frame_layout (&frame);
4392 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4393 slower on all targets. Also sdb doesn't like it. */
4395 if (frame_pointer_needed)
4397 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4398 RTX_FRAME_RELATED_P (insn) = 1;
4400 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4401 RTX_FRAME_RELATED_P (insn) = 1;
4404 allocate = frame.to_allocate;
4406 if (!frame.save_regs_using_mov)
4407 ix86_emit_save_regs ();
4409 allocate += frame.nregs * UNITS_PER_WORD;
4411 /* When using red zone we may start register saving before allocating
4412 the stack frame saving one cycle of the prologue. */
4413 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4414 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4415 : stack_pointer_rtx,
4416 -frame.nregs * UNITS_PER_WORD);
4420 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4421 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4422 GEN_INT (-allocate), -1);
4425 /* Only valid for Win32. */
4426 rtx eax = gen_rtx_REG (SImode, 0);
4427 bool eax_live = ix86_eax_live_at_start_p ();
4435 emit_insn (gen_push (eax));
4439 emit_move_insn (eax, GEN_INT (allocate));
4441 insn = emit_insn (gen_allocate_stack_worker (eax));
4442 RTX_FRAME_RELATED_P (insn) = 1;
4443 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4444 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4445 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4446 t, REG_NOTES (insn));
4450 if (frame_pointer_needed)
4451 t = plus_constant (hard_frame_pointer_rtx,
4454 - frame.nregs * UNITS_PER_WORD);
4456 t = plus_constant (stack_pointer_rtx, allocate);
4457 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4461 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4463 if (!frame_pointer_needed || !frame.to_allocate)
4464 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4466 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4467 -frame.nregs * UNITS_PER_WORD);
4470 pic_reg_used = false;
4471 if (pic_offset_table_rtx
4472 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4473 || current_function_profile))
4475 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4477 if (alt_pic_reg_used != INVALID_REGNUM)
4478 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4480 pic_reg_used = true;
4485 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4487 /* Even with accurate pre-reload life analysis, we can wind up
4488 deleting all references to the pic register after reload.
4489 Consider if cross-jumping unifies two sides of a branch
4490 controlled by a comparison vs the only read from a global.
4491 In which case, allow the set_got to be deleted, though we're
4492 too late to do anything about the ebx save in the prologue. */
4493 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4496 /* Prevent function calls from be scheduled before the call to mcount.
4497 In the pic_reg_used case, make sure that the got load isn't deleted. */
4498 if (current_function_profile)
4499 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4502 /* Emit code to restore saved registers using MOV insns. First register
4503 is restored from POINTER + OFFSET. */
4505 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4506 int maybe_eh_return)
4509 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4511 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4512 if (ix86_save_reg (regno, maybe_eh_return))
4514 /* Ensure that adjust_address won't be forced to produce pointer
4515 out of range allowed by x86-64 instruction set. */
4516 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4520 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4521 emit_move_insn (r11, GEN_INT (offset));
4522 emit_insn (gen_adddi3 (r11, r11, pointer));
4523 base_address = gen_rtx_MEM (Pmode, r11);
4526 emit_move_insn (gen_rtx_REG (Pmode, regno),
4527 adjust_address (base_address, Pmode, offset));
4528 offset += UNITS_PER_WORD;
4532 /* Restore function stack, frame, and registers. */
4535 ix86_expand_epilogue (int style)
4538 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4539 struct ix86_frame frame;
4540 HOST_WIDE_INT offset;
4542 ix86_compute_frame_layout (&frame);
4544 /* Calculate start of saved registers relative to ebp. Special care
4545 must be taken for the normal return case of a function using
4546 eh_return: the eax and edx registers are marked as saved, but not
4547 restored along this path. */
4548 offset = frame.nregs;
4549 if (current_function_calls_eh_return && style != 2)
4551 offset *= -UNITS_PER_WORD;
4553 /* If we're only restoring one register and sp is not valid then
4554 using a move instruction to restore the register since it's
4555 less work than reloading sp and popping the register.
4557 The default code result in stack adjustment using add/lea instruction,
4558 while this code results in LEAVE instruction (or discrete equivalent),
4559 so it is profitable in some other cases as well. Especially when there
4560 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4561 and there is exactly one register to pop. This heuristic may need some
4562 tuning in future. */
4563 if ((!sp_valid && frame.nregs <= 1)
4564 || (TARGET_EPILOGUE_USING_MOVE
4565 && cfun->machine->use_fast_prologue_epilogue
4566 && (frame.nregs > 1 || frame.to_allocate))
4567 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4568 || (frame_pointer_needed && TARGET_USE_LEAVE
4569 && cfun->machine->use_fast_prologue_epilogue
4570 && frame.nregs == 1)
4571 || current_function_calls_eh_return)
4573 /* Restore registers. We can use ebp or esp to address the memory
4574 locations. If both are available, default to ebp, since offsets
4575 are known to be small. Only exception is esp pointing directly to the
4576 end of block of saved registers, where we may simplify addressing
4579 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4580 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4581 frame.to_allocate, style == 2);
4583 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4584 offset, style == 2);
4586 /* eh_return epilogues need %ecx added to the stack pointer. */
4589 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4591 if (frame_pointer_needed)
4593 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4594 tmp = plus_constant (tmp, UNITS_PER_WORD);
4595 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4597 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4598 emit_move_insn (hard_frame_pointer_rtx, tmp);
4600 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4605 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4606 tmp = plus_constant (tmp, (frame.to_allocate
4607 + frame.nregs * UNITS_PER_WORD));
4608 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4611 else if (!frame_pointer_needed)
4612 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4613 GEN_INT (frame.to_allocate
4614 + frame.nregs * UNITS_PER_WORD),
4616 /* If not an i386, mov & pop is faster than "leave". */
4617 else if (TARGET_USE_LEAVE || optimize_size
4618 || !cfun->machine->use_fast_prologue_epilogue)
4619 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4622 pro_epilogue_adjust_stack (stack_pointer_rtx,
4623 hard_frame_pointer_rtx,
4626 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4628 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4633 /* First step is to deallocate the stack frame so that we can
4634 pop the registers. */
4637 if (!frame_pointer_needed)
4639 pro_epilogue_adjust_stack (stack_pointer_rtx,
4640 hard_frame_pointer_rtx,
4641 GEN_INT (offset), style);
4643 else if (frame.to_allocate)
4644 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4645 GEN_INT (frame.to_allocate), style);
4647 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4648 if (ix86_save_reg (regno, false))
4651 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4653 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4655 if (frame_pointer_needed)
4657 /* Leave results in shorter dependency chains on CPUs that are
4658 able to grok it fast. */
4659 if (TARGET_USE_LEAVE)
4660 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4661 else if (TARGET_64BIT)
4662 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4664 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4668 /* Sibcall epilogues don't want a return instruction. */
4672 if (current_function_pops_args && current_function_args_size)
4674 rtx popc = GEN_INT (current_function_pops_args);
4676 /* i386 can only pop 64K bytes. If asked to pop more, pop
4677 return address, do explicit add, and jump indirectly to the
4680 if (current_function_pops_args >= 65536)
4682 rtx ecx = gen_rtx_REG (SImode, 2);
4684 /* There is no "pascal" calling convention in 64bit ABI. */
4688 emit_insn (gen_popsi1 (ecx));
4689 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4690 emit_jump_insn (gen_return_indirect_internal (ecx));
4693 emit_jump_insn (gen_return_pop_internal (popc));
4696 emit_jump_insn (gen_return_internal ());
4699 /* Reset from the function's potential modifications. */
4702 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4703 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4705 if (pic_offset_table_rtx)
4706 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4709 /* Extract the parts of an RTL expression that is a valid memory address
4710 for an instruction. Return 0 if the structure of the address is
4711 grossly off. Return -1 if the address contains ASHIFT, so it is not
4712 strictly valid, but still used for computing length of lea instruction. */
4715 ix86_decompose_address (rtx addr, struct ix86_address *out)
4717 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
4718 rtx base_reg, index_reg;
4719 HOST_WIDE_INT scale = 1;
4720 rtx scale_rtx = NULL_RTX;
4722 enum ix86_address_seg seg = SEG_DEFAULT;
4724 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4726 else if (GET_CODE (addr) == PLUS)
4736 addends[n++] = XEXP (op, 1);
4739 while (GET_CODE (op) == PLUS);
4744 for (i = n; i >= 0; --i)
4747 switch (GET_CODE (op))
4752 index = XEXP (op, 0);
4753 scale_rtx = XEXP (op, 1);
4757 if (XINT (op, 1) == UNSPEC_TP
4758 && TARGET_TLS_DIRECT_SEG_REFS
4759 && seg == SEG_DEFAULT)
4760 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4789 else if (GET_CODE (addr) == MULT)
4791 index = XEXP (addr, 0); /* index*scale */
4792 scale_rtx = XEXP (addr, 1);
4794 else if (GET_CODE (addr) == ASHIFT)
4798 /* We're called for lea too, which implements ashift on occasion. */
4799 index = XEXP (addr, 0);
4800 tmp = XEXP (addr, 1);
4801 if (GET_CODE (tmp) != CONST_INT)
4803 scale = INTVAL (tmp);
4804 if ((unsigned HOST_WIDE_INT) scale > 3)
4810 disp = addr; /* displacement */
4812 /* Extract the integral value of scale. */
4815 if (GET_CODE (scale_rtx) != CONST_INT)
4817 scale = INTVAL (scale_rtx);
4820 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
4821 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
4823 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4824 if (base_reg && index_reg && scale == 1
4825 && (index_reg == arg_pointer_rtx
4826 || index_reg == frame_pointer_rtx
4827 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
4830 tmp = base, base = index, index = tmp;
4831 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
4834 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4835 if ((base_reg == hard_frame_pointer_rtx
4836 || base_reg == frame_pointer_rtx
4837 || base_reg == arg_pointer_rtx) && !disp)
4840 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4841 Avoid this by transforming to [%esi+0]. */
4842 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4843 && base_reg && !index_reg && !disp
4845 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
4848 /* Special case: encode reg+reg instead of reg*2. */
4849 if (!base && index && scale && scale == 2)
4850 base = index, base_reg = index_reg, scale = 1;
4852 /* Special case: scaling cannot be encoded without base or displacement. */
4853 if (!base && !disp && index && scale != 1)
4865 /* Return cost of the memory address x.
4866 For i386, it is better to use a complex address than let gcc copy
4867 the address into a reg and make a new pseudo. But not if the address
4868 requires to two regs - that would mean more pseudos with longer
4871 ix86_address_cost (rtx x)
4873 struct ix86_address parts;
4876 if (!ix86_decompose_address (x, &parts))
4879 if (parts.base && GET_CODE (parts.base) == SUBREG)
4880 parts.base = SUBREG_REG (parts.base);
4881 if (parts.index && GET_CODE (parts.index) == SUBREG)
4882 parts.index = SUBREG_REG (parts.index);
4884 /* More complex memory references are better. */
4885 if (parts.disp && parts.disp != const0_rtx)
4887 if (parts.seg != SEG_DEFAULT)
4890 /* Attempt to minimize number of registers in the address. */
4892 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4894 && (!REG_P (parts.index)
4895 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4899 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4901 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4902 && parts.base != parts.index)
4905 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4906 since it's predecode logic can't detect the length of instructions
4907 and it degenerates to vector decoded. Increase cost of such
4908 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4909 to split such addresses or even refuse such addresses at all.
4911 Following addressing modes are affected:
4916 The first and last case may be avoidable by explicitly coding the zero in
4917 memory address, but I don't have AMD-K6 machine handy to check this
4921 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4922 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4923 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4929 /* If X is a machine specific address (i.e. a symbol or label being
4930 referenced as a displacement from the GOT implemented using an
4931 UNSPEC), then return the base term. Otherwise return X. */
4934 ix86_find_base_term (rtx x)
4940 if (GET_CODE (x) != CONST)
4943 if (GET_CODE (term) == PLUS
4944 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4945 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4946 term = XEXP (term, 0);
4947 if (GET_CODE (term) != UNSPEC
4948 || XINT (term, 1) != UNSPEC_GOTPCREL)
4951 term = XVECEXP (term, 0, 0);
4953 if (GET_CODE (term) != SYMBOL_REF
4954 && GET_CODE (term) != LABEL_REF)
4960 term = ix86_delegitimize_address (x);
4962 if (GET_CODE (term) != SYMBOL_REF
4963 && GET_CODE (term) != LABEL_REF)
4969 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4970 this is used for to form addresses to local data when -fPIC is in
4974 darwin_local_data_pic (rtx disp)
4976 if (GET_CODE (disp) == MINUS)
4978 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4979 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4980 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4982 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4983 if (! strcmp (sym_name, "<pic base>"))
4991 /* Determine if a given RTX is a valid constant. We already know this
4992 satisfies CONSTANT_P. */
4995 legitimate_constant_p (rtx x)
4997 switch (GET_CODE (x))
5002 if (GET_CODE (x) == PLUS)
5004 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5009 if (TARGET_MACHO && darwin_local_data_pic (x))
5012 /* Only some unspecs are valid as "constants". */
5013 if (GET_CODE (x) == UNSPEC)
5014 switch (XINT (x, 1))
5018 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5020 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5025 /* We must have drilled down to a symbol. */
5026 if (!symbolic_operand (x, Pmode))
5031 /* TLS symbols are never valid. */
5032 if (tls_symbolic_operand (x, Pmode))
5040 /* Otherwise we handle everything else in the move patterns. */
5044 /* Determine if it's legal to put X into the constant pool. This
5045 is not possible for the address of thread-local symbols, which
5046 is checked above. */
5049 ix86_cannot_force_const_mem (rtx x)
5051 return !legitimate_constant_p (x);
5054 /* Determine if a given RTX is a valid constant address. */
5057 constant_address_p (rtx x)
5059 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5062 /* Nonzero if the constant value X is a legitimate general operand
5063 when generating PIC code. It is given that flag_pic is on and
5064 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5067 legitimate_pic_operand_p (rtx x)
5071 switch (GET_CODE (x))
5074 inner = XEXP (x, 0);
5076 /* Only some unspecs are valid as "constants". */
5077 if (GET_CODE (inner) == UNSPEC)
5078 switch (XINT (inner, 1))
5081 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5089 return legitimate_pic_address_disp_p (x);
5096 /* Determine if a given CONST RTX is a valid memory displacement
5100 legitimate_pic_address_disp_p (rtx disp)
5104 /* In 64bit mode we can allow direct addresses of symbols and labels
5105 when they are not dynamic symbols. */
5108 /* TLS references should always be enclosed in UNSPEC. */
5109 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5111 if (GET_CODE (disp) == SYMBOL_REF
5112 && ix86_cmodel == CM_SMALL_PIC
5113 && SYMBOL_REF_LOCAL_P (disp))
5115 if (GET_CODE (disp) == LABEL_REF)
5117 if (GET_CODE (disp) == CONST
5118 && GET_CODE (XEXP (disp, 0)) == PLUS)
5120 rtx op0 = XEXP (XEXP (disp, 0), 0);
5121 rtx op1 = XEXP (XEXP (disp, 0), 1);
5123 /* TLS references should always be enclosed in UNSPEC. */
5124 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5126 if (((GET_CODE (op0) == SYMBOL_REF
5127 && ix86_cmodel == CM_SMALL_PIC
5128 && SYMBOL_REF_LOCAL_P (op0))
5129 || GET_CODE (op0) == LABEL_REF)
5130 && GET_CODE (op1) == CONST_INT
5131 && INTVAL (op1) < 16*1024*1024
5132 && INTVAL (op1) >= -16*1024*1024)
5136 if (GET_CODE (disp) != CONST)
5138 disp = XEXP (disp, 0);
5142 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5143 of GOT tables. We should not need these anyway. */
5144 if (GET_CODE (disp) != UNSPEC
5145 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5148 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5149 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5155 if (GET_CODE (disp) == PLUS)
5157 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5159 disp = XEXP (disp, 0);
5163 if (TARGET_MACHO && darwin_local_data_pic (disp))
5166 if (GET_CODE (disp) != UNSPEC)
5169 switch (XINT (disp, 1))
5174 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5176 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5177 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5178 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5180 case UNSPEC_GOTTPOFF:
5181 case UNSPEC_GOTNTPOFF:
5182 case UNSPEC_INDNTPOFF:
5185 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5187 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5189 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5195 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5196 memory address for an instruction. The MODE argument is the machine mode
5197 for the MEM expression that wants to use this address.
5199 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5200 convert common non-canonical forms to canonical form so that they will
5204 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5206 struct ix86_address parts;
5207 rtx base, index, disp;
5208 HOST_WIDE_INT scale;
5209 const char *reason = NULL;
5210 rtx reason_rtx = NULL_RTX;
5212 if (TARGET_DEBUG_ADDR)
5215 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5216 GET_MODE_NAME (mode), strict);
5220 if (ix86_decompose_address (addr, &parts) <= 0)
5222 reason = "decomposition failed";
5227 index = parts.index;
5229 scale = parts.scale;
5231 /* Validate base register.
5233 Don't allow SUBREG's that span more than a word here. It can lead to spill
5234 failures when the base is one word out of a two word structure, which is
5235 represented internally as a DImode int. */
5244 else if (GET_CODE (base) == SUBREG
5245 && REG_P (SUBREG_REG (base))
5246 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5248 reg = SUBREG_REG (base);
5251 reason = "base is not a register";
5255 if (GET_MODE (base) != Pmode)
5257 reason = "base is not in Pmode";
5261 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5262 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5264 reason = "base is not valid";
5269 /* Validate index register.
5271 Don't allow SUBREG's that span more than a word here -- same as above. */
5280 else if (GET_CODE (index) == SUBREG
5281 && REG_P (SUBREG_REG (index))
5282 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5284 reg = SUBREG_REG (index);
5287 reason = "index is not a register";
5291 if (GET_MODE (index) != Pmode)
5293 reason = "index is not in Pmode";
5297 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5298 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5300 reason = "index is not valid";
5305 /* Validate scale factor. */
5308 reason_rtx = GEN_INT (scale);
5311 reason = "scale without index";
5315 if (scale != 2 && scale != 4 && scale != 8)
5317 reason = "scale is not a valid multiplier";
5322 /* Validate displacement. */
5327 if (GET_CODE (disp) == CONST
5328 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5329 switch (XINT (XEXP (disp, 0), 1))
5333 case UNSPEC_GOTPCREL:
5336 goto is_legitimate_pic;
5338 case UNSPEC_GOTTPOFF:
5339 case UNSPEC_GOTNTPOFF:
5340 case UNSPEC_INDNTPOFF:
5346 reason = "invalid address unspec";
5350 else if (flag_pic && (SYMBOLIC_CONST (disp)
5352 && !machopic_operand_p (disp)
5357 if (TARGET_64BIT && (index || base))
5359 /* foo@dtpoff(%rX) is ok. */
5360 if (GET_CODE (disp) != CONST
5361 || GET_CODE (XEXP (disp, 0)) != PLUS
5362 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5363 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5364 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5365 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5367 reason = "non-constant pic memory reference";
5371 else if (! legitimate_pic_address_disp_p (disp))
5373 reason = "displacement is an invalid pic construct";
5377 /* This code used to verify that a symbolic pic displacement
5378 includes the pic_offset_table_rtx register.
5380 While this is good idea, unfortunately these constructs may
5381 be created by "adds using lea" optimization for incorrect
5390 This code is nonsensical, but results in addressing
5391 GOT table with pic_offset_table_rtx base. We can't
5392 just refuse it easily, since it gets matched by
5393 "addsi3" pattern, that later gets split to lea in the
5394 case output register differs from input. While this
5395 can be handled by separate addsi pattern for this case
5396 that never results in lea, this seems to be easier and
5397 correct fix for crash to disable this test. */
5399 else if (GET_CODE (disp) != LABEL_REF
5400 && GET_CODE (disp) != CONST_INT
5401 && (GET_CODE (disp) != CONST
5402 || !legitimate_constant_p (disp))
5403 && (GET_CODE (disp) != SYMBOL_REF
5404 || !legitimate_constant_p (disp)))
5406 reason = "displacement is not constant";
5409 else if (TARGET_64BIT
5410 && !x86_64_immediate_operand (disp, VOIDmode))
5412 reason = "displacement is out of range";
5417 /* Everything looks valid. */
5418 if (TARGET_DEBUG_ADDR)
5419 fprintf (stderr, "Success.\n");
5423 if (TARGET_DEBUG_ADDR)
5425 fprintf (stderr, "Error: %s\n", reason);
5426 debug_rtx (reason_rtx);
5431 /* Return an unique alias set for the GOT. */
5433 static HOST_WIDE_INT
5434 ix86_GOT_alias_set (void)
5436 static HOST_WIDE_INT set = -1;
5438 set = new_alias_set ();
5442 /* Return a legitimate reference for ORIG (an address) using the
5443 register REG. If REG is 0, a new pseudo is generated.
5445 There are two types of references that must be handled:
5447 1. Global data references must load the address from the GOT, via
5448 the PIC reg. An insn is emitted to do this load, and the reg is
5451 2. Static data references, constant pool addresses, and code labels
5452 compute the address as an offset from the GOT, whose base is in
5453 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5454 differentiate them from global data objects. The returned
5455 address is the PIC reg + an unspec constant.
5457 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5458 reg also appears in the address. */
5461 legitimize_pic_address (rtx orig, rtx reg)
5469 reg = gen_reg_rtx (Pmode);
5470 /* Use the generic Mach-O PIC machinery. */
5471 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5474 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5476 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5478 /* This symbol may be referenced via a displacement from the PIC
5479 base address (@GOTOFF). */
5481 if (reload_in_progress)
5482 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5483 if (GET_CODE (addr) == CONST)
5484 addr = XEXP (addr, 0);
5485 if (GET_CODE (addr) == PLUS)
5487 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5488 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5491 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5492 new = gen_rtx_CONST (Pmode, new);
5493 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5497 emit_move_insn (reg, new);
5501 else if (GET_CODE (addr) == SYMBOL_REF)
5505 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5506 new = gen_rtx_CONST (Pmode, new);
5507 new = gen_const_mem (Pmode, new);
5508 set_mem_alias_set (new, ix86_GOT_alias_set ());
5511 reg = gen_reg_rtx (Pmode);
5512 /* Use directly gen_movsi, otherwise the address is loaded
5513 into register for CSE. We don't want to CSE this addresses,
5514 instead we CSE addresses from the GOT table, so skip this. */
5515 emit_insn (gen_movsi (reg, new));
5520 /* This symbol must be referenced via a load from the
5521 Global Offset Table (@GOT). */
5523 if (reload_in_progress)
5524 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5525 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5526 new = gen_rtx_CONST (Pmode, new);
5527 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5528 new = gen_const_mem (Pmode, new);
5529 set_mem_alias_set (new, ix86_GOT_alias_set ());
5532 reg = gen_reg_rtx (Pmode);
5533 emit_move_insn (reg, new);
5539 if (GET_CODE (addr) == CONST)
5541 addr = XEXP (addr, 0);
5543 /* We must match stuff we generate before. Assume the only
5544 unspecs that can get here are ours. Not that we could do
5545 anything with them anyway.... */
5546 if (GET_CODE (addr) == UNSPEC
5547 || (GET_CODE (addr) == PLUS
5548 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5550 if (GET_CODE (addr) != PLUS)
5553 if (GET_CODE (addr) == PLUS)
5555 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5557 /* Check first to see if this is a constant offset from a @GOTOFF
5558 symbol reference. */
5559 if (local_symbolic_operand (op0, Pmode)
5560 && GET_CODE (op1) == CONST_INT)
5564 if (reload_in_progress)
5565 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5566 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5568 new = gen_rtx_PLUS (Pmode, new, op1);
5569 new = gen_rtx_CONST (Pmode, new);
5570 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5574 emit_move_insn (reg, new);
5580 if (INTVAL (op1) < -16*1024*1024
5581 || INTVAL (op1) >= 16*1024*1024)
5582 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5587 base = legitimize_pic_address (XEXP (addr, 0), reg);
5588 new = legitimize_pic_address (XEXP (addr, 1),
5589 base == reg ? NULL_RTX : reg);
5591 if (GET_CODE (new) == CONST_INT)
5592 new = plus_constant (base, INTVAL (new));
5595 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5597 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5598 new = XEXP (new, 1);
5600 new = gen_rtx_PLUS (Pmode, base, new);
5608 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5611 get_thread_pointer (int to_reg)
5615 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5619 reg = gen_reg_rtx (Pmode);
5620 insn = gen_rtx_SET (VOIDmode, reg, tp);
5621 insn = emit_insn (insn);
5626 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5627 false if we expect this to be used for a memory address and true if
5628 we expect to load the address into a register. */
5631 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5633 rtx dest, base, off, pic;
5638 case TLS_MODEL_GLOBAL_DYNAMIC:
5639 dest = gen_reg_rtx (Pmode);
5642 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5645 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5646 insns = get_insns ();
5649 emit_libcall_block (insns, dest, rax, x);
5652 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5655 case TLS_MODEL_LOCAL_DYNAMIC:
5656 base = gen_reg_rtx (Pmode);
5659 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5662 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5663 insns = get_insns ();
5666 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5667 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5668 emit_libcall_block (insns, base, rax, note);
5671 emit_insn (gen_tls_local_dynamic_base_32 (base));
5673 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5674 off = gen_rtx_CONST (Pmode, off);
5676 return gen_rtx_PLUS (Pmode, base, off);
5678 case TLS_MODEL_INITIAL_EXEC:
5682 type = UNSPEC_GOTNTPOFF;
5686 if (reload_in_progress)
5687 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5688 pic = pic_offset_table_rtx;
5689 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5691 else if (!TARGET_GNU_TLS)
5693 pic = gen_reg_rtx (Pmode);
5694 emit_insn (gen_set_got (pic));
5695 type = UNSPEC_GOTTPOFF;
5700 type = UNSPEC_INDNTPOFF;
5703 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5704 off = gen_rtx_CONST (Pmode, off);
5706 off = gen_rtx_PLUS (Pmode, pic, off);
5707 off = gen_const_mem (Pmode, off);
5708 set_mem_alias_set (off, ix86_GOT_alias_set ());
5710 if (TARGET_64BIT || TARGET_GNU_TLS)
5712 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5713 off = force_reg (Pmode, off);
5714 return gen_rtx_PLUS (Pmode, base, off);
5718 base = get_thread_pointer (true);
5719 dest = gen_reg_rtx (Pmode);
5720 emit_insn (gen_subsi3 (dest, base, off));
5724 case TLS_MODEL_LOCAL_EXEC:
5725 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5726 (TARGET_64BIT || TARGET_GNU_TLS)
5727 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5728 off = gen_rtx_CONST (Pmode, off);
5730 if (TARGET_64BIT || TARGET_GNU_TLS)
5732 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5733 return gen_rtx_PLUS (Pmode, base, off);
5737 base = get_thread_pointer (true);
5738 dest = gen_reg_rtx (Pmode);
5739 emit_insn (gen_subsi3 (dest, base, off));
5750 /* Try machine-dependent ways of modifying an illegitimate address
5751 to be legitimate. If we find one, return the new, valid address.
5752 This macro is used in only one place: `memory_address' in explow.c.
5754 OLDX is the address as it was before break_out_memory_refs was called.
5755 In some cases it is useful to look at this to decide what needs to be done.
5757 MODE and WIN are passed so that this macro can use
5758 GO_IF_LEGITIMATE_ADDRESS.
5760 It is always safe for this macro to do nothing. It exists to recognize
5761 opportunities to optimize the output.
5763 For the 80386, we handle X+REG by loading X into a register R and
5764 using R+REG. R will go in a general reg and indexing will be used.
5765 However, if REG is a broken-out memory address or multiplication,
5766 nothing needs to be done because REG can certainly go in a general reg.
5768 When -fpic is used, special handling is needed for symbolic references.
5769 See comments by legitimize_pic_address in i386.c for details. */
5772 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5777 if (TARGET_DEBUG_ADDR)
5779 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5780 GET_MODE_NAME (mode));
5784 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5786 return legitimize_tls_address (x, log, false);
5787 if (GET_CODE (x) == CONST
5788 && GET_CODE (XEXP (x, 0)) == PLUS
5789 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5790 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5792 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5793 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5796 if (flag_pic && SYMBOLIC_CONST (x))
5797 return legitimize_pic_address (x, 0);
5799 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5800 if (GET_CODE (x) == ASHIFT
5801 && GET_CODE (XEXP (x, 1)) == CONST_INT
5802 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5805 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5806 GEN_INT (1 << log));
5809 if (GET_CODE (x) == PLUS)
5811 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5813 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5814 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5815 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5818 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5819 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5820 GEN_INT (1 << log));
5823 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5824 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5825 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5828 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5829 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5830 GEN_INT (1 << log));
5833 /* Put multiply first if it isn't already. */
5834 if (GET_CODE (XEXP (x, 1)) == MULT)
5836 rtx tmp = XEXP (x, 0);
5837 XEXP (x, 0) = XEXP (x, 1);
5842 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5843 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5844 created by virtual register instantiation, register elimination, and
5845 similar optimizations. */
5846 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5849 x = gen_rtx_PLUS (Pmode,
5850 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5851 XEXP (XEXP (x, 1), 0)),
5852 XEXP (XEXP (x, 1), 1));
5856 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5857 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5858 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5859 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5860 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5861 && CONSTANT_P (XEXP (x, 1)))
5864 rtx other = NULL_RTX;
5866 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5868 constant = XEXP (x, 1);
5869 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5871 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5873 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5874 other = XEXP (x, 1);
5882 x = gen_rtx_PLUS (Pmode,
5883 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5884 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5885 plus_constant (other, INTVAL (constant)));
5889 if (changed && legitimate_address_p (mode, x, FALSE))
5892 if (GET_CODE (XEXP (x, 0)) == MULT)
5895 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5898 if (GET_CODE (XEXP (x, 1)) == MULT)
5901 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5905 && GET_CODE (XEXP (x, 1)) == REG
5906 && GET_CODE (XEXP (x, 0)) == REG)
5909 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5912 x = legitimize_pic_address (x, 0);
5915 if (changed && legitimate_address_p (mode, x, FALSE))
5918 if (GET_CODE (XEXP (x, 0)) == REG)
5920 rtx temp = gen_reg_rtx (Pmode);
5921 rtx val = force_operand (XEXP (x, 1), temp);
5923 emit_move_insn (temp, val);
5929 else if (GET_CODE (XEXP (x, 1)) == REG)
5931 rtx temp = gen_reg_rtx (Pmode);
5932 rtx val = force_operand (XEXP (x, 0), temp);
5934 emit_move_insn (temp, val);
5944 /* Print an integer constant expression in assembler syntax. Addition
5945 and subtraction are the only arithmetic that may appear in these
5946 expressions. FILE is the stdio stream to write to, X is the rtx, and
5947 CODE is the operand print code from the output string. */
5950 output_pic_addr_const (FILE *file, rtx x, int code)
5954 switch (GET_CODE (x))
5964 assemble_name (file, XSTR (x, 0));
5965 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5966 fputs ("@PLT", file);
5973 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5974 assemble_name (asm_out_file, buf);
5978 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5982 /* This used to output parentheses around the expression,
5983 but that does not work on the 386 (either ATT or BSD assembler). */
5984 output_pic_addr_const (file, XEXP (x, 0), code);
5988 if (GET_MODE (x) == VOIDmode)
5990 /* We can use %d if the number is <32 bits and positive. */
5991 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5992 fprintf (file, "0x%lx%08lx",
5993 (unsigned long) CONST_DOUBLE_HIGH (x),
5994 (unsigned long) CONST_DOUBLE_LOW (x));
5996 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5999 /* We can't handle floating point constants;
6000 PRINT_OPERAND must handle them. */
6001 output_operand_lossage ("floating constant misused");
6005 /* Some assemblers need integer constants to appear first. */
6006 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6008 output_pic_addr_const (file, XEXP (x, 0), code);
6010 output_pic_addr_const (file, XEXP (x, 1), code);
6012 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6014 output_pic_addr_const (file, XEXP (x, 1), code);
6016 output_pic_addr_const (file, XEXP (x, 0), code);
6024 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6025 output_pic_addr_const (file, XEXP (x, 0), code);
6027 output_pic_addr_const (file, XEXP (x, 1), code);
6029 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6033 if (XVECLEN (x, 0) != 1)
6035 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6036 switch (XINT (x, 1))
6039 fputs ("@GOT", file);
6042 fputs ("@GOTOFF", file);
6044 case UNSPEC_GOTPCREL:
6045 fputs ("@GOTPCREL(%rip)", file);
6047 case UNSPEC_GOTTPOFF:
6048 /* FIXME: This might be @TPOFF in Sun ld too. */
6049 fputs ("@GOTTPOFF", file);
6052 fputs ("@TPOFF", file);
6056 fputs ("@TPOFF", file);
6058 fputs ("@NTPOFF", file);
6061 fputs ("@DTPOFF", file);
6063 case UNSPEC_GOTNTPOFF:
6065 fputs ("@GOTTPOFF(%rip)", file);
6067 fputs ("@GOTNTPOFF", file);
6069 case UNSPEC_INDNTPOFF:
6070 fputs ("@INDNTPOFF", file);
6073 output_operand_lossage ("invalid UNSPEC as operand");
6079 output_operand_lossage ("invalid expression as operand");
6083 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6084 We need to emit DTP-relative relocations. */
6087 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6089 fputs (ASM_LONG, file);
6090 output_addr_const (file, x);
6091 fputs ("@DTPOFF", file);
6097 fputs (", 0", file);
6104 /* In the name of slightly smaller debug output, and to cater to
6105 general assembler lossage, recognize PIC+GOTOFF and turn it back
6106 into a direct symbol reference. */
6109 ix86_delegitimize_address (rtx orig_x)
6113 if (GET_CODE (x) == MEM)
6118 if (GET_CODE (x) != CONST
6119 || GET_CODE (XEXP (x, 0)) != UNSPEC
6120 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6121 || GET_CODE (orig_x) != MEM)
6123 return XVECEXP (XEXP (x, 0), 0, 0);
6126 if (GET_CODE (x) != PLUS
6127 || GET_CODE (XEXP (x, 1)) != CONST)
6130 if (GET_CODE (XEXP (x, 0)) == REG
6131 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6132 /* %ebx + GOT/GOTOFF */
6134 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6136 /* %ebx + %reg * scale + GOT/GOTOFF */
6138 if (GET_CODE (XEXP (y, 0)) == REG
6139 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6141 else if (GET_CODE (XEXP (y, 1)) == REG
6142 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6146 if (GET_CODE (y) != REG
6147 && GET_CODE (y) != MULT
6148 && GET_CODE (y) != ASHIFT)
6154 x = XEXP (XEXP (x, 1), 0);
6155 if (GET_CODE (x) == UNSPEC
6156 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6157 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6160 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6161 return XVECEXP (x, 0, 0);
6164 if (GET_CODE (x) == PLUS
6165 && GET_CODE (XEXP (x, 0)) == UNSPEC
6166 && GET_CODE (XEXP (x, 1)) == CONST_INT
6167 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6168 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6169 && GET_CODE (orig_x) != MEM)))
6171 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6173 return gen_rtx_PLUS (Pmode, y, x);
6181 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6186 if (mode == CCFPmode || mode == CCFPUmode)
6188 enum rtx_code second_code, bypass_code;
6189 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6190 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6192 code = ix86_fp_compare_code_to_integer (code);
6196 code = reverse_condition (code);
6207 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6212 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6213 Those same assemblers have the same but opposite lossage on cmov. */
6216 suffix = fp ? "nbe" : "a";
6219 if (mode == CCNOmode || mode == CCGOCmode)
6221 else if (mode == CCmode || mode == CCGCmode)
6232 if (mode == CCNOmode || mode == CCGOCmode)
6234 else if (mode == CCmode || mode == CCGCmode)
6243 suffix = fp ? "nb" : "ae";
6246 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6256 suffix = fp ? "u" : "p";
6259 suffix = fp ? "nu" : "np";
6264 fputs (suffix, file);
6267 /* Print the name of register X to FILE based on its machine mode and number.
6268 If CODE is 'w', pretend the mode is HImode.
6269 If CODE is 'b', pretend the mode is QImode.
6270 If CODE is 'k', pretend the mode is SImode.
6271 If CODE is 'q', pretend the mode is DImode.
6272 If CODE is 'h', pretend the reg is the `high' byte register.
6273 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6276 print_reg (rtx x, int code, FILE *file)
6278 if (REGNO (x) == ARG_POINTER_REGNUM
6279 || REGNO (x) == FRAME_POINTER_REGNUM
6280 || REGNO (x) == FLAGS_REG
6281 || REGNO (x) == FPSR_REG)
6284 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6287 if (code == 'w' || MMX_REG_P (x))
6289 else if (code == 'b')
6291 else if (code == 'k')
6293 else if (code == 'q')
6295 else if (code == 'y')
6297 else if (code == 'h')
6300 code = GET_MODE_SIZE (GET_MODE (x));
6302 /* Irritatingly, AMD extended registers use different naming convention
6303 from the normal registers. */
6304 if (REX_INT_REG_P (x))
6311 error ("extended registers have no high halves");
6314 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6317 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6320 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6323 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6326 error ("unsupported operand size for extended register");
6334 if (STACK_TOP_P (x))
6336 fputs ("st(0)", file);
6343 if (! ANY_FP_REG_P (x))
6344 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6349 fputs (hi_reg_name[REGNO (x)], file);
6352 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6354 fputs (qi_reg_name[REGNO (x)], file);
6357 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6359 fputs (qi_high_reg_name[REGNO (x)], file);
6366 /* Locate some local-dynamic symbol still in use by this function
6367 so that we can print its name in some tls_local_dynamic_base
6371 get_some_local_dynamic_name (void)
6375 if (cfun->machine->some_ld_name)
6376 return cfun->machine->some_ld_name;
6378 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6380 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6381 return cfun->machine->some_ld_name;
6387 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6391 if (GET_CODE (x) == SYMBOL_REF
6392 && local_dynamic_symbolic_operand (x, Pmode))
6394 cfun->machine->some_ld_name = XSTR (x, 0);
6402 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6403 C -- print opcode suffix for set/cmov insn.
6404 c -- like C, but print reversed condition
6405 F,f -- likewise, but for floating-point.
6406 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6408 R -- print the prefix for register names.
6409 z -- print the opcode suffix for the size of the current operand.
6410 * -- print a star (in certain assembler syntax)
6411 A -- print an absolute memory reference.
6412 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6413 s -- print a shift double count, followed by the assemblers argument
6415 b -- print the QImode name of the register for the indicated operand.
6416 %b0 would print %al if operands[0] is reg 0.
6417 w -- likewise, print the HImode name of the register.
6418 k -- likewise, print the SImode name of the register.
6419 q -- likewise, print the DImode name of the register.
6420 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6421 y -- print "st(0)" instead of "st" as a register.
6422 D -- print condition for SSE cmp instruction.
6423 P -- if PIC, print an @PLT suffix.
6424 X -- don't print any sort of PIC '@' suffix for a symbol.
6425 & -- print some in-use local-dynamic symbol name.
6426 H -- print a memory address offset by 8; used for sse high-parts
6430 print_operand (FILE *file, rtx x, int code)
6437 if (ASSEMBLER_DIALECT == ASM_ATT)
6442 assemble_name (file, get_some_local_dynamic_name ());
6446 if (ASSEMBLER_DIALECT == ASM_ATT)
6448 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6450 /* Intel syntax. For absolute addresses, registers should not
6451 be surrounded by braces. */
6452 if (GET_CODE (x) != REG)
6455 PRINT_OPERAND (file, x, 0);
6463 PRINT_OPERAND (file, x, 0);
6468 if (ASSEMBLER_DIALECT == ASM_ATT)
6473 if (ASSEMBLER_DIALECT == ASM_ATT)
6478 if (ASSEMBLER_DIALECT == ASM_ATT)
6483 if (ASSEMBLER_DIALECT == ASM_ATT)
6488 if (ASSEMBLER_DIALECT == ASM_ATT)
6493 if (ASSEMBLER_DIALECT == ASM_ATT)
6498 /* 387 opcodes don't get size suffixes if the operands are
6500 if (STACK_REG_P (x))
6503 /* Likewise if using Intel opcodes. */
6504 if (ASSEMBLER_DIALECT == ASM_INTEL)
6507 /* This is the size of op from size of operand. */
6508 switch (GET_MODE_SIZE (GET_MODE (x)))
6511 #ifdef HAVE_GAS_FILDS_FISTS
6517 if (GET_MODE (x) == SFmode)
6532 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6534 #ifdef GAS_MNEMONICS
6560 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6562 PRINT_OPERAND (file, x, 0);
6568 /* Little bit of braindamage here. The SSE compare instructions
6569 does use completely different names for the comparisons that the
6570 fp conditional moves. */
6571 switch (GET_CODE (x))
6586 fputs ("unord", file);
6590 fputs ("neq", file);
6594 fputs ("nlt", file);
6598 fputs ("nle", file);
6601 fputs ("ord", file);
6609 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6610 if (ASSEMBLER_DIALECT == ASM_ATT)
6612 switch (GET_MODE (x))
6614 case HImode: putc ('w', file); break;
6616 case SFmode: putc ('l', file); break;
6618 case DFmode: putc ('q', file); break;
6626 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6629 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6630 if (ASSEMBLER_DIALECT == ASM_ATT)
6633 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6636 /* Like above, but reverse condition */
6638 /* Check to see if argument to %c is really a constant
6639 and not a condition code which needs to be reversed. */
6640 if (!COMPARISON_P (x))
6642 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6645 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6648 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6649 if (ASSEMBLER_DIALECT == ASM_ATT)
6652 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6656 /* It doesn't actually matter what mode we use here, as we're
6657 only going to use this for printing. */
6658 x = adjust_address_nv (x, DImode, 8);
6665 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6668 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6671 int pred_val = INTVAL (XEXP (x, 0));
6673 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6674 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6676 int taken = pred_val > REG_BR_PROB_BASE / 2;
6677 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6679 /* Emit hints only in the case default branch prediction
6680 heuristics would fail. */
6681 if (taken != cputaken)
6683 /* We use 3e (DS) prefix for taken branches and
6684 2e (CS) prefix for not taken branches. */
6686 fputs ("ds ; ", file);
6688 fputs ("cs ; ", file);
6695 output_operand_lossage ("invalid operand code '%c'", code);
6699 if (GET_CODE (x) == REG)
6700 print_reg (x, code, file);
6702 else if (GET_CODE (x) == MEM)
6704 /* No `byte ptr' prefix for call instructions. */
6705 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6708 switch (GET_MODE_SIZE (GET_MODE (x)))
6710 case 1: size = "BYTE"; break;
6711 case 2: size = "WORD"; break;
6712 case 4: size = "DWORD"; break;
6713 case 8: size = "QWORD"; break;
6714 case 12: size = "XWORD"; break;
6715 case 16: size = "XMMWORD"; break;
6720 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6723 else if (code == 'w')
6725 else if (code == 'k')
6729 fputs (" PTR ", file);
6733 /* Avoid (%rip) for call operands. */
6734 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6735 && GET_CODE (x) != CONST_INT)
6736 output_addr_const (file, x);
6737 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6738 output_operand_lossage ("invalid constraints for operand");
6743 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6748 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6749 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6751 if (ASSEMBLER_DIALECT == ASM_ATT)
6753 fprintf (file, "0x%08lx", l);
6756 /* These float cases don't actually occur as immediate operands. */
6757 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6761 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6762 fprintf (file, "%s", dstr);
6765 else if (GET_CODE (x) == CONST_DOUBLE
6766 && GET_MODE (x) == XFmode)
6770 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6771 fprintf (file, "%s", dstr);
6778 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6780 if (ASSEMBLER_DIALECT == ASM_ATT)
6783 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6784 || GET_CODE (x) == LABEL_REF)
6786 if (ASSEMBLER_DIALECT == ASM_ATT)
6789 fputs ("OFFSET FLAT:", file);
6792 if (GET_CODE (x) == CONST_INT)
6793 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6795 output_pic_addr_const (file, x, code);
6797 output_addr_const (file, x);
6801 /* Print a memory operand whose address is ADDR. */
6804 print_operand_address (FILE *file, rtx addr)
6806 struct ix86_address parts;
6807 rtx base, index, disp;
6810 if (! ix86_decompose_address (addr, &parts))
6814 index = parts.index;
6816 scale = parts.scale;
6824 if (USER_LABEL_PREFIX[0] == 0)
6826 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6832 if (!base && !index)
6834 /* Displacement only requires special attention. */
6836 if (GET_CODE (disp) == CONST_INT)
6838 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6840 if (USER_LABEL_PREFIX[0] == 0)
6842 fputs ("ds:", file);
6844 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6847 output_pic_addr_const (file, disp, 0);
6849 output_addr_const (file, disp);
6851 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6853 && ((GET_CODE (disp) == SYMBOL_REF
6854 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6855 || GET_CODE (disp) == LABEL_REF
6856 || (GET_CODE (disp) == CONST
6857 && GET_CODE (XEXP (disp, 0)) == PLUS
6858 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6859 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6860 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6861 fputs ("(%rip)", file);
6865 if (ASSEMBLER_DIALECT == ASM_ATT)
6870 output_pic_addr_const (file, disp, 0);
6871 else if (GET_CODE (disp) == LABEL_REF)
6872 output_asm_label (disp);
6874 output_addr_const (file, disp);
6879 print_reg (base, 0, file);
6883 print_reg (index, 0, file);
6885 fprintf (file, ",%d", scale);
6891 rtx offset = NULL_RTX;
6895 /* Pull out the offset of a symbol; print any symbol itself. */
6896 if (GET_CODE (disp) == CONST
6897 && GET_CODE (XEXP (disp, 0)) == PLUS
6898 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6900 offset = XEXP (XEXP (disp, 0), 1);
6901 disp = gen_rtx_CONST (VOIDmode,
6902 XEXP (XEXP (disp, 0), 0));
6906 output_pic_addr_const (file, disp, 0);
6907 else if (GET_CODE (disp) == LABEL_REF)
6908 output_asm_label (disp);
6909 else if (GET_CODE (disp) == CONST_INT)
6912 output_addr_const (file, disp);
6918 print_reg (base, 0, file);
6921 if (INTVAL (offset) >= 0)
6923 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6927 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6934 print_reg (index, 0, file);
6936 fprintf (file, "*%d", scale);
6944 output_addr_const_extra (FILE *file, rtx x)
6948 if (GET_CODE (x) != UNSPEC)
6951 op = XVECEXP (x, 0, 0);
6952 switch (XINT (x, 1))
6954 case UNSPEC_GOTTPOFF:
6955 output_addr_const (file, op);
6956 /* FIXME: This might be @TPOFF in Sun ld. */
6957 fputs ("@GOTTPOFF", file);
6960 output_addr_const (file, op);
6961 fputs ("@TPOFF", file);
6964 output_addr_const (file, op);
6966 fputs ("@TPOFF", file);
6968 fputs ("@NTPOFF", file);
6971 output_addr_const (file, op);
6972 fputs ("@DTPOFF", file);
6974 case UNSPEC_GOTNTPOFF:
6975 output_addr_const (file, op);
6977 fputs ("@GOTTPOFF(%rip)", file);
6979 fputs ("@GOTNTPOFF", file);
6981 case UNSPEC_INDNTPOFF:
6982 output_addr_const (file, op);
6983 fputs ("@INDNTPOFF", file);
6993 /* Split one or more DImode RTL references into pairs of SImode
6994 references. The RTL can be REG, offsettable MEM, integer constant, or
6995 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6996 split and "num" is its length. lo_half and hi_half are output arrays
6997 that parallel "operands". */
7000 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7004 rtx op = operands[num];
7006 /* simplify_subreg refuse to split volatile memory addresses,
7007 but we still have to handle it. */
7008 if (GET_CODE (op) == MEM)
7010 lo_half[num] = adjust_address (op, SImode, 0);
7011 hi_half[num] = adjust_address (op, SImode, 4);
7015 lo_half[num] = simplify_gen_subreg (SImode, op,
7016 GET_MODE (op) == VOIDmode
7017 ? DImode : GET_MODE (op), 0);
7018 hi_half[num] = simplify_gen_subreg (SImode, op,
7019 GET_MODE (op) == VOIDmode
7020 ? DImode : GET_MODE (op), 4);
7024 /* Split one or more TImode RTL references into pairs of SImode
7025 references. The RTL can be REG, offsettable MEM, integer constant, or
7026 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7027 split and "num" is its length. lo_half and hi_half are output arrays
7028 that parallel "operands". */
7031 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7035 rtx op = operands[num];
7037 /* simplify_subreg refuse to split volatile memory addresses, but we
7038 still have to handle it. */
7039 if (GET_CODE (op) == MEM)
7041 lo_half[num] = adjust_address (op, DImode, 0);
7042 hi_half[num] = adjust_address (op, DImode, 8);
7046 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7047 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7052 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7053 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7054 is the expression of the binary operation. The output may either be
7055 emitted here, or returned to the caller, like all output_* functions.
7057 There is no guarantee that the operands are the same mode, as they
7058 might be within FLOAT or FLOAT_EXTEND expressions. */
7060 #ifndef SYSV386_COMPAT
7061 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7062 wants to fix the assemblers because that causes incompatibility
7063 with gcc. No-one wants to fix gcc because that causes
7064 incompatibility with assemblers... You can use the option of
7065 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7066 #define SYSV386_COMPAT 1
7070 output_387_binary_op (rtx insn, rtx *operands)
7072 static char buf[30];
7075 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7077 #ifdef ENABLE_CHECKING
7078 /* Even if we do not want to check the inputs, this documents input
7079 constraints. Which helps in understanding the following code. */
7080 if (STACK_REG_P (operands[0])
7081 && ((REG_P (operands[1])
7082 && REGNO (operands[0]) == REGNO (operands[1])
7083 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7084 || (REG_P (operands[2])
7085 && REGNO (operands[0]) == REGNO (operands[2])
7086 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7087 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7093 switch (GET_CODE (operands[3]))
7096 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7097 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7105 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7106 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7114 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7115 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7123 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7124 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7138 if (GET_MODE (operands[0]) == SFmode)
7139 strcat (buf, "ss\t{%2, %0|%0, %2}");
7141 strcat (buf, "sd\t{%2, %0|%0, %2}");
7146 switch (GET_CODE (operands[3]))
7150 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7152 rtx temp = operands[2];
7153 operands[2] = operands[1];
7157 /* know operands[0] == operands[1]. */
7159 if (GET_CODE (operands[2]) == MEM)
7165 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7167 if (STACK_TOP_P (operands[0]))
7168 /* How is it that we are storing to a dead operand[2]?
7169 Well, presumably operands[1] is dead too. We can't
7170 store the result to st(0) as st(0) gets popped on this
7171 instruction. Instead store to operands[2] (which I
7172 think has to be st(1)). st(1) will be popped later.
7173 gcc <= 2.8.1 didn't have this check and generated
7174 assembly code that the Unixware assembler rejected. */
7175 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7177 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7181 if (STACK_TOP_P (operands[0]))
7182 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7184 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7189 if (GET_CODE (operands[1]) == MEM)
7195 if (GET_CODE (operands[2]) == MEM)
7201 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7204 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7205 derived assemblers, confusingly reverse the direction of
7206 the operation for fsub{r} and fdiv{r} when the
7207 destination register is not st(0). The Intel assembler
7208 doesn't have this brain damage. Read !SYSV386_COMPAT to
7209 figure out what the hardware really does. */
7210 if (STACK_TOP_P (operands[0]))
7211 p = "{p\t%0, %2|rp\t%2, %0}";
7213 p = "{rp\t%2, %0|p\t%0, %2}";
7215 if (STACK_TOP_P (operands[0]))
7216 /* As above for fmul/fadd, we can't store to st(0). */
7217 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7219 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7224 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7227 if (STACK_TOP_P (operands[0]))
7228 p = "{rp\t%0, %1|p\t%1, %0}";
7230 p = "{p\t%1, %0|rp\t%0, %1}";
7232 if (STACK_TOP_P (operands[0]))
7233 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7235 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7240 if (STACK_TOP_P (operands[0]))
7242 if (STACK_TOP_P (operands[1]))
7243 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7245 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7248 else if (STACK_TOP_P (operands[1]))
7251 p = "{\t%1, %0|r\t%0, %1}";
7253 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7259 p = "{r\t%2, %0|\t%0, %2}";
7261 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7274 /* Output code to initialize control word copies used by trunc?f?i and
7275 rounding patterns. CURRENT_MODE is set to current control word,
7276 while NEW_MODE is set to new control word. */
7279 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7281 rtx reg = gen_reg_rtx (HImode);
7283 emit_insn (gen_x86_fnstcw_1 (current_mode));
7284 emit_move_insn (reg, current_mode);
7286 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7292 /* round down toward -oo */
7293 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7297 /* round up toward +oo */
7298 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7302 /* round toward zero (truncate) */
7303 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7306 case I387_CW_MASK_PM:
7307 /* mask precision exception for nearbyint() */
7308 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7320 /* round down toward -oo */
7321 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7322 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7326 /* round up toward +oo */
7327 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7328 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7332 /* round toward zero (truncate) */
7333 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7336 case I387_CW_MASK_PM:
7337 /* mask precision exception for nearbyint() */
7338 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7346 emit_move_insn (new_mode, reg);
7349 /* Output code for INSN to convert a float to a signed int. OPERANDS
7350 are the insn operands. The output may be [HSD]Imode and the input
7351 operand may be [SDX]Fmode. */
7354 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
7356 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7357 int dimode_p = GET_MODE (operands[0]) == DImode;
7358 int round_mode = get_attr_i387_cw (insn);
7360 /* Jump through a hoop or two for DImode, since the hardware has no
7361 non-popping instruction. We used to do this a different way, but
7362 that was somewhat fragile and broke with post-reload splitters. */
7363 if ((dimode_p || fisttp) && !stack_top_dies)
7364 output_asm_insn ("fld\t%y1", operands);
7366 if (!STACK_TOP_P (operands[1]))
7369 if (GET_CODE (operands[0]) != MEM)
7373 output_asm_insn ("fisttp%z0\t%0", operands);
7376 if (round_mode != I387_CW_ANY)
7377 output_asm_insn ("fldcw\t%3", operands);
7378 if (stack_top_dies || dimode_p)
7379 output_asm_insn ("fistp%z0\t%0", operands);
7381 output_asm_insn ("fist%z0\t%0", operands);
7382 if (round_mode != I387_CW_ANY)
7383 output_asm_insn ("fldcw\t%2", operands);
7389 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7390 should be used. UNORDERED_P is true when fucom should be used. */
7393 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7396 rtx cmp_op0, cmp_op1;
7397 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7401 cmp_op0 = operands[0];
7402 cmp_op1 = operands[1];
7406 cmp_op0 = operands[1];
7407 cmp_op1 = operands[2];
7412 if (GET_MODE (operands[0]) == SFmode)
7414 return "ucomiss\t{%1, %0|%0, %1}";
7416 return "comiss\t{%1, %0|%0, %1}";
7419 return "ucomisd\t{%1, %0|%0, %1}";
7421 return "comisd\t{%1, %0|%0, %1}";
7424 if (! STACK_TOP_P (cmp_op0))
7427 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7429 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7433 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7434 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7437 return "ftst\n\tfnstsw\t%0";
7440 if (STACK_REG_P (cmp_op1)
7442 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7443 && REGNO (cmp_op1) != FIRST_STACK_REG)
7445 /* If both the top of the 387 stack dies, and the other operand
7446 is also a stack register that dies, then this must be a
7447 `fcompp' float compare */
7451 /* There is no double popping fcomi variant. Fortunately,
7452 eflags is immune from the fstp's cc clobbering. */
7454 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7456 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7457 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7462 return "fucompp\n\tfnstsw\t%0";
7464 return "fcompp\n\tfnstsw\t%0";
7469 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7471 static const char * const alt[16] =
7473 "fcom%z2\t%y2\n\tfnstsw\t%0",
7474 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7475 "fucom%z2\t%y2\n\tfnstsw\t%0",
7476 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7478 "ficom%z2\t%y2\n\tfnstsw\t%0",
7479 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7483 "fcomi\t{%y1, %0|%0, %y1}",
7484 "fcomip\t{%y1, %0|%0, %y1}",
7485 "fucomi\t{%y1, %0|%0, %y1}",
7486 "fucomip\t{%y1, %0|%0, %y1}",
7497 mask = eflags_p << 3;
7498 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7499 mask |= unordered_p << 1;
7500 mask |= stack_top_dies;
7513 ix86_output_addr_vec_elt (FILE *file, int value)
7515 const char *directive = ASM_LONG;
7520 directive = ASM_QUAD;
7526 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7530 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7533 fprintf (file, "%s%s%d-%s%d\n",
7534 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7535 else if (HAVE_AS_GOTOFF_IN_DATA)
7536 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7538 else if (TARGET_MACHO)
7540 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7541 machopic_output_function_base_name (file);
7542 fprintf(file, "\n");
7546 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7547 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7550 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7554 ix86_expand_clear (rtx dest)
7558 /* We play register width games, which are only valid after reload. */
7559 if (!reload_completed)
7562 /* Avoid HImode and its attendant prefix byte. */
7563 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7564 dest = gen_rtx_REG (SImode, REGNO (dest));
7566 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7568 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7569 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7571 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7572 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7578 /* X is an unchanging MEM. If it is a constant pool reference, return
7579 the constant pool rtx, else NULL. */
7582 maybe_get_pool_constant (rtx x)
7584 x = ix86_delegitimize_address (XEXP (x, 0));
7586 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7587 return get_pool_constant (x);
7593 ix86_expand_move (enum machine_mode mode, rtx operands[])
7595 int strict = (reload_in_progress || reload_completed);
7597 enum tls_model model;
7602 if (GET_CODE (op1) == SYMBOL_REF)
7604 model = SYMBOL_REF_TLS_MODEL (op1);
7607 op1 = legitimize_tls_address (op1, model, true);
7608 op1 = force_operand (op1, op0);
7613 else if (GET_CODE (op1) == CONST
7614 && GET_CODE (XEXP (op1, 0)) == PLUS
7615 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7617 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7620 rtx addend = XEXP (XEXP (op1, 0), 1);
7621 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7622 op1 = force_operand (op1, NULL);
7623 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7624 op0, 1, OPTAB_DIRECT);
7630 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7635 rtx temp = ((reload_in_progress
7636 || ((op0 && GET_CODE (op0) == REG)
7638 ? op0 : gen_reg_rtx (Pmode));
7639 op1 = machopic_indirect_data_reference (op1, temp);
7640 op1 = machopic_legitimize_pic_address (op1, mode,
7641 temp == op1 ? 0 : temp);
7643 else if (MACHOPIC_INDIRECT)
7644 op1 = machopic_indirect_data_reference (op1, 0);
7648 if (GET_CODE (op0) == MEM)
7649 op1 = force_reg (Pmode, op1);
7651 op1 = legitimize_address (op1, op1, Pmode);
7652 #endif /* TARGET_MACHO */
7656 if (GET_CODE (op0) == MEM
7657 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7658 || !push_operand (op0, mode))
7659 && GET_CODE (op1) == MEM)
7660 op1 = force_reg (mode, op1);
7662 if (push_operand (op0, mode)
7663 && ! general_no_elim_operand (op1, mode))
7664 op1 = copy_to_mode_reg (mode, op1);
7666 /* Force large constants in 64bit compilation into register
7667 to get them CSEed. */
7668 if (TARGET_64BIT && mode == DImode
7669 && immediate_operand (op1, mode)
7670 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7671 && !register_operand (op0, mode)
7672 && optimize && !reload_completed && !reload_in_progress)
7673 op1 = copy_to_mode_reg (mode, op1);
7675 if (FLOAT_MODE_P (mode))
7677 /* If we are loading a floating point constant to a register,
7678 force the value to memory now, since we'll get better code
7679 out the back end. */
7683 else if (GET_CODE (op1) == CONST_DOUBLE)
7685 op1 = validize_mem (force_const_mem (mode, op1));
7686 if (!register_operand (op0, mode))
7688 rtx temp = gen_reg_rtx (mode);
7689 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7690 emit_move_insn (op0, temp);
7697 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7701 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7703 rtx op0 = operands[0], op1 = operands[1];
7705 /* Force constants other than zero into memory. We do not know how
7706 the instructions used to build constants modify the upper 64 bits
7707 of the register, once we have that information we may be able
7708 to handle some of them more efficiently. */
7709 if ((reload_in_progress | reload_completed) == 0
7710 && register_operand (op0, mode)
7711 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7712 op1 = validize_mem (force_const_mem (mode, op1));
7714 /* Make operand1 a register if it isn't already. */
7716 && !register_operand (op0, mode)
7717 && !register_operand (op1, mode))
7719 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7723 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7726 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7727 straight to ix86_expand_vector_move. */
7730 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7739 /* If we're optimizing for size, movups is the smallest. */
7742 op0 = gen_lowpart (V4SFmode, op0);
7743 op1 = gen_lowpart (V4SFmode, op1);
7744 emit_insn (gen_sse_movups (op0, op1));
7748 /* ??? If we have typed data, then it would appear that using
7749 movdqu is the only way to get unaligned data loaded with
7751 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7753 op0 = gen_lowpart (V16QImode, op0);
7754 op1 = gen_lowpart (V16QImode, op1);
7755 emit_insn (gen_sse2_movdqu (op0, op1));
7759 if (TARGET_SSE2 && mode == V2DFmode)
7763 /* When SSE registers are split into halves, we can avoid
7764 writing to the top half twice. */
7765 if (TARGET_SSE_SPLIT_REGS)
7767 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7772 /* ??? Not sure about the best option for the Intel chips.
7773 The following would seem to satisfy; the register is
7774 entirely cleared, breaking the dependency chain. We
7775 then store to the upper half, with a dependency depth
7776 of one. A rumor has it that Intel recommends two movsd
7777 followed by an unpacklpd, but this is unconfirmed. And
7778 given that the dependency depth of the unpacklpd would
7779 still be one, I'm not sure why this would be better. */
7780 zero = CONST0_RTX (V2DFmode);
7783 m = adjust_address (op1, DFmode, 0);
7784 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7785 m = adjust_address (op1, DFmode, 8);
7786 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7790 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7791 emit_move_insn (op0, CONST0_RTX (mode));
7793 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7795 if (mode != V4SFmode)
7796 op0 = gen_lowpart (V4SFmode, op0);
7797 m = adjust_address (op1, V2SFmode, 0);
7798 emit_insn (gen_sse_loadlps (op0, op0, m));
7799 m = adjust_address (op1, V2SFmode, 8);
7800 emit_insn (gen_sse_loadhps (op0, op0, m));
7803 else if (MEM_P (op0))
7805 /* If we're optimizing for size, movups is the smallest. */
7808 op0 = gen_lowpart (V4SFmode, op0);
7809 op1 = gen_lowpart (V4SFmode, op1);
7810 emit_insn (gen_sse_movups (op0, op1));
7814 /* ??? Similar to above, only less clear because of quote
7815 typeless stores unquote. */
7816 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7817 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7819 op0 = gen_lowpart (V16QImode, op0);
7820 op1 = gen_lowpart (V16QImode, op1);
7821 emit_insn (gen_sse2_movdqu (op0, op1));
7825 if (TARGET_SSE2 && mode == V2DFmode)
7827 m = adjust_address (op0, DFmode, 0);
7828 emit_insn (gen_sse2_storelpd (m, op1));
7829 m = adjust_address (op0, DFmode, 8);
7830 emit_insn (gen_sse2_storehpd (m, op1));
7834 if (mode != V4SFmode)
7835 op1 = gen_lowpart (V4SFmode, op1);
7836 m = adjust_address (op0, V2SFmode, 0);
7837 emit_insn (gen_sse_storelps (m, op1));
7838 m = adjust_address (op0, V2SFmode, 8);
7839 emit_insn (gen_sse_storehps (m, op1));
7846 /* Expand a push in MODE. This is some mode for which we do not support
7847 proper push instructions, at least from the registers that we expect
7848 the value to live in. */
7851 ix86_expand_push (enum machine_mode mode, rtx x)
7855 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
7856 GEN_INT (-GET_MODE_SIZE (mode)),
7857 stack_pointer_rtx, 1, OPTAB_DIRECT);
7858 if (tmp != stack_pointer_rtx)
7859 emit_move_insn (stack_pointer_rtx, tmp);
7861 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
7862 emit_move_insn (tmp, x);
7865 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7866 destination to use for the operation. If different from the true
7867 destination in operands[0], a copy operation will be required. */
7870 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7873 int matching_memory;
7874 rtx src1, src2, dst;
7880 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7881 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7882 && (rtx_equal_p (dst, src2)
7883 || immediate_operand (src1, mode)))
7890 /* If the destination is memory, and we do not have matching source
7891 operands, do things in registers. */
7892 matching_memory = 0;
7893 if (GET_CODE (dst) == MEM)
7895 if (rtx_equal_p (dst, src1))
7896 matching_memory = 1;
7897 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7898 && rtx_equal_p (dst, src2))
7899 matching_memory = 2;
7901 dst = gen_reg_rtx (mode);
7904 /* Both source operands cannot be in memory. */
7905 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7907 if (matching_memory != 2)
7908 src2 = force_reg (mode, src2);
7910 src1 = force_reg (mode, src1);
7913 /* If the operation is not commutable, source 1 cannot be a constant
7914 or non-matching memory. */
7915 if ((CONSTANT_P (src1)
7916 || (!matching_memory && GET_CODE (src1) == MEM))
7917 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7918 src1 = force_reg (mode, src1);
7920 /* If optimizing, copy to regs to improve CSE */
7921 if (optimize && ! no_new_pseudos)
7923 if (GET_CODE (dst) == MEM)
7924 dst = gen_reg_rtx (mode);
7925 if (GET_CODE (src1) == MEM)
7926 src1 = force_reg (mode, src1);
7927 if (GET_CODE (src2) == MEM)
7928 src2 = force_reg (mode, src2);
7931 src1 = operands[1] = src1;
7932 src2 = operands[2] = src2;
7936 /* Similarly, but assume that the destination has already been
7940 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7941 enum machine_mode mode, rtx operands[])
7943 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7944 gcc_assert (dst == operands[0]);
7947 /* Attempt to expand a binary operator. Make the expansion closer to the
7948 actual machine, then just general_operand, which will allow 3 separate
7949 memory references (one output, two input) in a single insn. */
7952 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7955 rtx src1, src2, dst, op, clob;
7957 dst = ix86_fixup_binary_operands (code, mode, operands);
7961 /* Emit the instruction. */
7963 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7964 if (reload_in_progress)
7966 /* Reload doesn't know about the flags register, and doesn't know that
7967 it doesn't want to clobber it. We can only do this with PLUS. */
7974 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7975 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7978 /* Fix up the destination if needed. */
7979 if (dst != operands[0])
7980 emit_move_insn (operands[0], dst);
7983 /* Return TRUE or FALSE depending on whether the binary operator meets the
7984 appropriate constraints. */
7987 ix86_binary_operator_ok (enum rtx_code code,
7988 enum machine_mode mode ATTRIBUTE_UNUSED,
7991 /* Both source operands cannot be in memory. */
7992 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7994 /* If the operation is not commutable, source 1 cannot be a constant. */
7995 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7997 /* If the destination is memory, we must have a matching source operand. */
7998 if (GET_CODE (operands[0]) == MEM
7999 && ! (rtx_equal_p (operands[0], operands[1])
8000 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8001 && rtx_equal_p (operands[0], operands[2]))))
8003 /* If the operation is not commutable and the source 1 is memory, we must
8004 have a matching destination. */
8005 if (GET_CODE (operands[1]) == MEM
8006 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8007 && ! rtx_equal_p (operands[0], operands[1]))
8012 /* Attempt to expand a unary operator. Make the expansion closer to the
8013 actual machine, then just general_operand, which will allow 2 separate
8014 memory references (one output, one input) in a single insn. */
8017 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8020 int matching_memory;
8021 rtx src, dst, op, clob;
8026 /* If the destination is memory, and we do not have matching source
8027 operands, do things in registers. */
8028 matching_memory = 0;
8031 if (rtx_equal_p (dst, src))
8032 matching_memory = 1;
8034 dst = gen_reg_rtx (mode);
8037 /* When source operand is memory, destination must match. */
8038 if (MEM_P (src) && !matching_memory)
8039 src = force_reg (mode, src);
8041 /* If optimizing, copy to regs to improve CSE. */
8042 if (optimize && ! no_new_pseudos)
8044 if (GET_CODE (dst) == MEM)
8045 dst = gen_reg_rtx (mode);
8046 if (GET_CODE (src) == MEM)
8047 src = force_reg (mode, src);
8050 /* Emit the instruction. */
8052 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8053 if (reload_in_progress || code == NOT)
8055 /* Reload doesn't know about the flags register, and doesn't know that
8056 it doesn't want to clobber it. */
8063 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8064 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8067 /* Fix up the destination if needed. */
8068 if (dst != operands[0])
8069 emit_move_insn (operands[0], dst);
8072 /* Return TRUE or FALSE depending on whether the unary operator meets the
8073 appropriate constraints. */
8076 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8077 enum machine_mode mode ATTRIBUTE_UNUSED,
8078 rtx operands[2] ATTRIBUTE_UNUSED)
8080 /* If one of operands is memory, source and destination must match. */
8081 if ((GET_CODE (operands[0]) == MEM
8082 || GET_CODE (operands[1]) == MEM)
8083 && ! rtx_equal_p (operands[0], operands[1]))
8088 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8089 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8090 true, then replicate the mask for all elements of the vector register.
8091 If INVERT is true, then create a mask excluding the sign bit. */
8094 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8096 enum machine_mode vec_mode;
8097 HOST_WIDE_INT hi, lo;
8102 /* Find the sign bit, sign extended to 2*HWI. */
8104 lo = 0x80000000, hi = lo < 0;
8105 else if (HOST_BITS_PER_WIDE_INT >= 64)
8106 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8108 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8113 /* Force this value into the low part of a fp vector constant. */
8114 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8115 mask = gen_lowpart (mode, mask);
8120 v = gen_rtvec (4, mask, mask, mask, mask);
8122 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8123 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8124 vec_mode = V4SFmode;
8129 v = gen_rtvec (2, mask, mask);
8131 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8132 vec_mode = V2DFmode;
8135 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8138 /* Generate code for floating point ABS or NEG. */
8141 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8144 rtx mask, set, use, clob, dst, src;
8145 bool matching_memory;
8146 bool use_sse = false;
8147 bool vector_mode = VECTOR_MODE_P (mode);
8148 enum machine_mode elt_mode = mode;
8152 elt_mode = GET_MODE_INNER (mode);
8155 else if (TARGET_SSE_MATH)
8156 use_sse = SSE_FLOAT_MODE_P (mode);
8158 /* NEG and ABS performed with SSE use bitwise mask operations.
8159 Create the appropriate mask now. */
8161 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8164 /* When not using SSE, we don't use the mask, but prefer to keep the
8165 same general form of the insn pattern to reduce duplication when
8166 it comes time to split. */
8173 /* If the destination is memory, and we don't have matching source
8174 operands, do things in registers. */
8175 matching_memory = false;
8178 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8179 matching_memory = true;
8181 dst = gen_reg_rtx (mode);
8183 if (MEM_P (src) && !matching_memory)
8184 src = force_reg (mode, src);
8188 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8189 set = gen_rtx_SET (VOIDmode, dst, set);
8194 set = gen_rtx_fmt_e (code, mode, src);
8195 set = gen_rtx_SET (VOIDmode, dst, set);
8196 use = gen_rtx_USE (VOIDmode, mask);
8197 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8198 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8201 if (dst != operands[0])
8202 emit_move_insn (operands[0], dst);
8205 /* Expand a copysign operation. Special case operand 0 being a constant. */
8208 ix86_expand_copysign (rtx operands[])
8210 enum machine_mode mode, vmode;
8211 rtx dest, op0, op1, mask, nmask;
8217 mode = GET_MODE (dest);
8218 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8220 if (GET_CODE (op0) == CONST_DOUBLE)
8224 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8225 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8227 if (op0 == CONST0_RTX (mode))
8228 op0 = CONST0_RTX (vmode);
8232 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8233 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8235 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8236 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8239 mask = ix86_build_signbit_mask (mode, 0, 0);
8242 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8244 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8248 nmask = ix86_build_signbit_mask (mode, 0, 1);
8249 mask = ix86_build_signbit_mask (mode, 0, 0);
8252 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8254 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8258 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8259 be a constant, and so has already been expanded into a vector constant. */
8262 ix86_split_copysign_const (rtx operands[])
8264 enum machine_mode mode, vmode;
8265 rtx dest, op0, op1, mask, x;
8272 mode = GET_MODE (dest);
8273 vmode = GET_MODE (mask);
8275 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8276 x = gen_rtx_AND (vmode, dest, mask);
8277 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8279 if (op0 != CONST0_RTX (vmode))
8281 x = gen_rtx_IOR (vmode, dest, op0);
8282 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8286 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8287 so we have to do two masks. */
8290 ix86_split_copysign_var (rtx operands[])
8292 enum machine_mode mode, vmode;
8293 rtx dest, scratch, op0, op1, mask, nmask, x;
8296 scratch = operands[1];
8299 nmask = operands[4];
8302 mode = GET_MODE (dest);
8303 vmode = GET_MODE (mask);
8305 if (rtx_equal_p (op0, op1))
8307 /* Shouldn't happen often (it's useless, obviously), but when it does
8308 we'd generate incorrect code if we continue below. */
8309 emit_move_insn (dest, op0);
8313 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8315 gcc_assert (REGNO (op1) == REGNO (scratch));
8317 x = gen_rtx_AND (vmode, scratch, mask);
8318 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8321 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8322 x = gen_rtx_NOT (vmode, dest);
8323 x = gen_rtx_AND (vmode, x, op0);
8324 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8328 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8330 x = gen_rtx_AND (vmode, scratch, mask);
8332 else /* alternative 2,4 */
8334 gcc_assert (REGNO (mask) == REGNO (scratch));
8335 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8336 x = gen_rtx_AND (vmode, scratch, op1);
8338 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8340 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8342 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8343 x = gen_rtx_AND (vmode, dest, nmask);
8345 else /* alternative 3,4 */
8347 gcc_assert (REGNO (nmask) == REGNO (dest));
8349 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8350 x = gen_rtx_AND (vmode, dest, op0);
8352 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8355 x = gen_rtx_IOR (vmode, dest, scratch);
8356 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8359 /* Return TRUE or FALSE depending on whether the first SET in INSN
8360 has source and destination with matching CC modes, and that the
8361 CC mode is at least as constrained as REQ_MODE. */
8364 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8367 enum machine_mode set_mode;
8369 set = PATTERN (insn);
8370 if (GET_CODE (set) == PARALLEL)
8371 set = XVECEXP (set, 0, 0);
8372 if (GET_CODE (set) != SET)
8374 if (GET_CODE (SET_SRC (set)) != COMPARE)
8377 set_mode = GET_MODE (SET_DEST (set));
8381 if (req_mode != CCNOmode
8382 && (req_mode != CCmode
8383 || XEXP (SET_SRC (set), 1) != const0_rtx))
8387 if (req_mode == CCGCmode)
8391 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8395 if (req_mode == CCZmode)
8405 return (GET_MODE (SET_SRC (set)) == set_mode);
8408 /* Generate insn patterns to do an integer compare of OPERANDS. */
8411 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8413 enum machine_mode cmpmode;
8416 cmpmode = SELECT_CC_MODE (code, op0, op1);
8417 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8419 /* This is very simple, but making the interface the same as in the
8420 FP case makes the rest of the code easier. */
8421 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8422 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8424 /* Return the test that should be put into the flags user, i.e.
8425 the bcc, scc, or cmov instruction. */
8426 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8429 /* Figure out whether to use ordered or unordered fp comparisons.
8430 Return the appropriate mode to use. */
8433 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8435 /* ??? In order to make all comparisons reversible, we do all comparisons
8436 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8437 all forms trapping and nontrapping comparisons, we can make inequality
8438 comparisons trapping again, since it results in better code when using
8439 FCOM based compares. */
8440 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8444 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8446 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8447 return ix86_fp_compare_mode (code);
8450 /* Only zero flag is needed. */
8452 case NE: /* ZF!=0 */
8454 /* Codes needing carry flag. */
8455 case GEU: /* CF=0 */
8456 case GTU: /* CF=0 & ZF=0 */
8457 case LTU: /* CF=1 */
8458 case LEU: /* CF=1 | ZF=1 */
8460 /* Codes possibly doable only with sign flag when
8461 comparing against zero. */
8462 case GE: /* SF=OF or SF=0 */
8463 case LT: /* SF<>OF or SF=1 */
8464 if (op1 == const0_rtx)
8467 /* For other cases Carry flag is not required. */
8469 /* Codes doable only with sign flag when comparing
8470 against zero, but we miss jump instruction for it
8471 so we need to use relational tests against overflow
8472 that thus needs to be zero. */
8473 case GT: /* ZF=0 & SF=OF */
8474 case LE: /* ZF=1 | SF<>OF */
8475 if (op1 == const0_rtx)
8479 /* strcmp pattern do (use flags) and combine may ask us for proper
8488 /* Return the fixed registers used for condition codes. */
8491 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8498 /* If two condition code modes are compatible, return a condition code
8499 mode which is compatible with both. Otherwise, return
8502 static enum machine_mode
8503 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8508 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8511 if ((m1 == CCGCmode && m2 == CCGOCmode)
8512 || (m1 == CCGOCmode && m2 == CCGCmode))
8540 /* These are only compatible with themselves, which we already
8546 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8549 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8551 enum rtx_code swapped_code = swap_condition (code);
8552 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8553 || (ix86_fp_comparison_cost (swapped_code)
8554 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8557 /* Swap, force into registers, or otherwise massage the two operands
8558 to a fp comparison. The operands are updated in place; the new
8559 comparison code is returned. */
8561 static enum rtx_code
8562 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8564 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8565 rtx op0 = *pop0, op1 = *pop1;
8566 enum machine_mode op_mode = GET_MODE (op0);
8567 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8569 /* All of the unordered compare instructions only work on registers.
8570 The same is true of the fcomi compare instructions. The same is
8571 true of the XFmode compare instructions if not comparing with
8572 zero (ftst insn is used in this case). */
8575 && (fpcmp_mode == CCFPUmode
8576 || (op_mode == XFmode
8577 && ! (standard_80387_constant_p (op0) == 1
8578 || standard_80387_constant_p (op1) == 1))
8579 || ix86_use_fcomi_compare (code)))
8581 op0 = force_reg (op_mode, op0);
8582 op1 = force_reg (op_mode, op1);
8586 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8587 things around if they appear profitable, otherwise force op0
8590 if (standard_80387_constant_p (op0) == 0
8591 || (GET_CODE (op0) == MEM
8592 && ! (standard_80387_constant_p (op1) == 0
8593 || GET_CODE (op1) == MEM)))
8596 tmp = op0, op0 = op1, op1 = tmp;
8597 code = swap_condition (code);
8600 if (GET_CODE (op0) != REG)
8601 op0 = force_reg (op_mode, op0);
8603 if (CONSTANT_P (op1))
8605 int tmp = standard_80387_constant_p (op1);
8607 op1 = validize_mem (force_const_mem (op_mode, op1));
8611 op1 = force_reg (op_mode, op1);
8614 op1 = force_reg (op_mode, op1);
8618 /* Try to rearrange the comparison to make it cheaper. */
8619 if (ix86_fp_comparison_cost (code)
8620 > ix86_fp_comparison_cost (swap_condition (code))
8621 && (GET_CODE (op1) == REG || !no_new_pseudos))
8624 tmp = op0, op0 = op1, op1 = tmp;
8625 code = swap_condition (code);
8626 if (GET_CODE (op0) != REG)
8627 op0 = force_reg (op_mode, op0);
8635 /* Convert comparison codes we use to represent FP comparison to integer
8636 code that will result in proper branch. Return UNKNOWN if no such code
8640 ix86_fp_compare_code_to_integer (enum rtx_code code)
8669 /* Split comparison code CODE into comparisons we can do using branch
8670 instructions. BYPASS_CODE is comparison code for branch that will
8671 branch around FIRST_CODE and SECOND_CODE. If some of branches
8672 is not required, set value to UNKNOWN.
8673 We never require more than two branches. */
8676 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8677 enum rtx_code *first_code,
8678 enum rtx_code *second_code)
8681 *bypass_code = UNKNOWN;
8682 *second_code = UNKNOWN;
8684 /* The fcomi comparison sets flags as follows:
8694 case GT: /* GTU - CF=0 & ZF=0 */
8695 case GE: /* GEU - CF=0 */
8696 case ORDERED: /* PF=0 */
8697 case UNORDERED: /* PF=1 */
8698 case UNEQ: /* EQ - ZF=1 */
8699 case UNLT: /* LTU - CF=1 */
8700 case UNLE: /* LEU - CF=1 | ZF=1 */
8701 case LTGT: /* EQ - ZF=0 */
8703 case LT: /* LTU - CF=1 - fails on unordered */
8705 *bypass_code = UNORDERED;
8707 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8709 *bypass_code = UNORDERED;
8711 case EQ: /* EQ - ZF=1 - fails on unordered */
8713 *bypass_code = UNORDERED;
8715 case NE: /* NE - ZF=0 - fails on unordered */
8717 *second_code = UNORDERED;
8719 case UNGE: /* GEU - CF=0 - fails on unordered */
8721 *second_code = UNORDERED;
8723 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8725 *second_code = UNORDERED;
8730 if (!TARGET_IEEE_FP)
8732 *second_code = UNKNOWN;
8733 *bypass_code = UNKNOWN;
8737 /* Return cost of comparison done fcom + arithmetics operations on AX.
8738 All following functions do use number of instructions as a cost metrics.
8739 In future this should be tweaked to compute bytes for optimize_size and
8740 take into account performance of various instructions on various CPUs. */
8742 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8744 if (!TARGET_IEEE_FP)
8746 /* The cost of code output by ix86_expand_fp_compare. */
8774 /* Return cost of comparison done using fcomi operation.
8775 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8777 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8779 enum rtx_code bypass_code, first_code, second_code;
8780 /* Return arbitrarily high cost when instruction is not supported - this
8781 prevents gcc from using it. */
8784 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8785 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8788 /* Return cost of comparison done using sahf operation.
8789 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8791 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8793 enum rtx_code bypass_code, first_code, second_code;
8794 /* Return arbitrarily high cost when instruction is not preferred - this
8795 avoids gcc from using it. */
8796 if (!TARGET_USE_SAHF && !optimize_size)
8798 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8799 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8802 /* Compute cost of the comparison done using any method.
8803 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8805 ix86_fp_comparison_cost (enum rtx_code code)
8807 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8810 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8811 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8813 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8814 if (min > sahf_cost)
8816 if (min > fcomi_cost)
8821 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8824 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8825 rtx *second_test, rtx *bypass_test)
8827 enum machine_mode fpcmp_mode, intcmp_mode;
8829 int cost = ix86_fp_comparison_cost (code);
8830 enum rtx_code bypass_code, first_code, second_code;
8832 fpcmp_mode = ix86_fp_compare_mode (code);
8833 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8836 *second_test = NULL_RTX;
8838 *bypass_test = NULL_RTX;
8840 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8842 /* Do fcomi/sahf based test when profitable. */
8843 if ((bypass_code == UNKNOWN || bypass_test)
8844 && (second_code == UNKNOWN || second_test)
8845 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8849 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8850 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8856 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8857 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8859 scratch = gen_reg_rtx (HImode);
8860 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8861 emit_insn (gen_x86_sahf_1 (scratch));
8864 /* The FP codes work out to act like unsigned. */
8865 intcmp_mode = fpcmp_mode;
8867 if (bypass_code != UNKNOWN)
8868 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8869 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8871 if (second_code != UNKNOWN)
8872 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8873 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8878 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8879 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8880 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8882 scratch = gen_reg_rtx (HImode);
8883 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8885 /* In the unordered case, we have to check C2 for NaN's, which
8886 doesn't happen to work out to anything nice combination-wise.
8887 So do some bit twiddling on the value we've got in AH to come
8888 up with an appropriate set of condition codes. */
8890 intcmp_mode = CCNOmode;
8895 if (code == GT || !TARGET_IEEE_FP)
8897 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8902 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8903 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8904 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8905 intcmp_mode = CCmode;
8911 if (code == LT && TARGET_IEEE_FP)
8913 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8914 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8915 intcmp_mode = CCmode;
8920 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8926 if (code == GE || !TARGET_IEEE_FP)
8928 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8933 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8934 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8941 if (code == LE && TARGET_IEEE_FP)
8943 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8944 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8945 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8946 intcmp_mode = CCmode;
8951 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8957 if (code == EQ && TARGET_IEEE_FP)
8959 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8960 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8961 intcmp_mode = CCmode;
8966 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8973 if (code == NE && TARGET_IEEE_FP)
8975 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8976 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8982 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8988 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8992 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9001 /* Return the test that should be put into the flags user, i.e.
9002 the bcc, scc, or cmov instruction. */
9003 return gen_rtx_fmt_ee (code, VOIDmode,
9004 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9009 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9012 op0 = ix86_compare_op0;
9013 op1 = ix86_compare_op1;
9016 *second_test = NULL_RTX;
9018 *bypass_test = NULL_RTX;
9020 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9021 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9022 second_test, bypass_test);
9024 ret = ix86_expand_int_compare (code, op0, op1);
9029 /* Return true if the CODE will result in nontrivial jump sequence. */
9031 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9033 enum rtx_code bypass_code, first_code, second_code;
9036 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9037 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9041 ix86_expand_branch (enum rtx_code code, rtx label)
9045 switch (GET_MODE (ix86_compare_op0))
9051 tmp = ix86_expand_compare (code, NULL, NULL);
9052 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9053 gen_rtx_LABEL_REF (VOIDmode, label),
9055 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9064 enum rtx_code bypass_code, first_code, second_code;
9066 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9069 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9071 /* Check whether we will use the natural sequence with one jump. If
9072 so, we can expand jump early. Otherwise delay expansion by
9073 creating compound insn to not confuse optimizers. */
9074 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9077 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9078 gen_rtx_LABEL_REF (VOIDmode, label),
9079 pc_rtx, NULL_RTX, NULL_RTX);
9083 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9084 ix86_compare_op0, ix86_compare_op1);
9085 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9086 gen_rtx_LABEL_REF (VOIDmode, label),
9088 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9090 use_fcomi = ix86_use_fcomi_compare (code);
9091 vec = rtvec_alloc (3 + !use_fcomi);
9092 RTVEC_ELT (vec, 0) = tmp;
9094 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9096 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9099 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9101 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9109 /* Expand DImode branch into multiple compare+branch. */
9111 rtx lo[2], hi[2], label2;
9112 enum rtx_code code1, code2, code3;
9114 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9116 tmp = ix86_compare_op0;
9117 ix86_compare_op0 = ix86_compare_op1;
9118 ix86_compare_op1 = tmp;
9119 code = swap_condition (code);
9121 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9122 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9124 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9125 avoid two branches. This costs one extra insn, so disable when
9126 optimizing for size. */
9128 if ((code == EQ || code == NE)
9130 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9135 if (hi[1] != const0_rtx)
9136 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9137 NULL_RTX, 0, OPTAB_WIDEN);
9140 if (lo[1] != const0_rtx)
9141 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9142 NULL_RTX, 0, OPTAB_WIDEN);
9144 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9145 NULL_RTX, 0, OPTAB_WIDEN);
9147 ix86_compare_op0 = tmp;
9148 ix86_compare_op1 = const0_rtx;
9149 ix86_expand_branch (code, label);
9153 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9154 op1 is a constant and the low word is zero, then we can just
9155 examine the high word. */
9157 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9160 case LT: case LTU: case GE: case GEU:
9161 ix86_compare_op0 = hi[0];
9162 ix86_compare_op1 = hi[1];
9163 ix86_expand_branch (code, label);
9169 /* Otherwise, we need two or three jumps. */
9171 label2 = gen_label_rtx ();
9174 code2 = swap_condition (code);
9175 code3 = unsigned_condition (code);
9179 case LT: case GT: case LTU: case GTU:
9182 case LE: code1 = LT; code2 = GT; break;
9183 case GE: code1 = GT; code2 = LT; break;
9184 case LEU: code1 = LTU; code2 = GTU; break;
9185 case GEU: code1 = GTU; code2 = LTU; break;
9187 case EQ: code1 = UNKNOWN; code2 = NE; break;
9188 case NE: code2 = UNKNOWN; break;
9196 * if (hi(a) < hi(b)) goto true;
9197 * if (hi(a) > hi(b)) goto false;
9198 * if (lo(a) < lo(b)) goto true;
9202 ix86_compare_op0 = hi[0];
9203 ix86_compare_op1 = hi[1];
9205 if (code1 != UNKNOWN)
9206 ix86_expand_branch (code1, label);
9207 if (code2 != UNKNOWN)
9208 ix86_expand_branch (code2, label2);
9210 ix86_compare_op0 = lo[0];
9211 ix86_compare_op1 = lo[1];
9212 ix86_expand_branch (code3, label);
9214 if (code2 != UNKNOWN)
9215 emit_label (label2);
9224 /* Split branch based on floating point condition. */
9226 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9227 rtx target1, rtx target2, rtx tmp, rtx pushed)
9230 rtx label = NULL_RTX;
9232 int bypass_probability = -1, second_probability = -1, probability = -1;
9235 if (target2 != pc_rtx)
9238 code = reverse_condition_maybe_unordered (code);
9243 condition = ix86_expand_fp_compare (code, op1, op2,
9244 tmp, &second, &bypass);
9246 /* Remove pushed operand from stack. */
9248 ix86_free_from_memory (GET_MODE (pushed));
9250 if (split_branch_probability >= 0)
9252 /* Distribute the probabilities across the jumps.
9253 Assume the BYPASS and SECOND to be always test
9255 probability = split_branch_probability;
9257 /* Value of 1 is low enough to make no need for probability
9258 to be updated. Later we may run some experiments and see
9259 if unordered values are more frequent in practice. */
9261 bypass_probability = 1;
9263 second_probability = 1;
9265 if (bypass != NULL_RTX)
9267 label = gen_label_rtx ();
9268 i = emit_jump_insn (gen_rtx_SET
9270 gen_rtx_IF_THEN_ELSE (VOIDmode,
9272 gen_rtx_LABEL_REF (VOIDmode,
9275 if (bypass_probability >= 0)
9277 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9278 GEN_INT (bypass_probability),
9281 i = emit_jump_insn (gen_rtx_SET
9283 gen_rtx_IF_THEN_ELSE (VOIDmode,
9284 condition, target1, target2)));
9285 if (probability >= 0)
9287 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9288 GEN_INT (probability),
9290 if (second != NULL_RTX)
9292 i = emit_jump_insn (gen_rtx_SET
9294 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9296 if (second_probability >= 0)
9298 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9299 GEN_INT (second_probability),
9302 if (label != NULL_RTX)
9307 ix86_expand_setcc (enum rtx_code code, rtx dest)
9309 rtx ret, tmp, tmpreg, equiv;
9310 rtx second_test, bypass_test;
9312 if (GET_MODE (ix86_compare_op0) == DImode
9314 return 0; /* FAIL */
9316 if (GET_MODE (dest) != QImode)
9319 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9320 PUT_MODE (ret, QImode);
9325 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9326 if (bypass_test || second_test)
9328 rtx test = second_test;
9330 rtx tmp2 = gen_reg_rtx (QImode);
9337 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9339 PUT_MODE (test, QImode);
9340 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9343 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9345 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9348 /* Attach a REG_EQUAL note describing the comparison result. */
9349 equiv = simplify_gen_relational (code, QImode,
9350 GET_MODE (ix86_compare_op0),
9351 ix86_compare_op0, ix86_compare_op1);
9352 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9354 return 1; /* DONE */
9357 /* Expand comparison setting or clearing carry flag. Return true when
9358 successful and set pop for the operation. */
9360 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9362 enum machine_mode mode =
9363 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9365 /* Do not handle DImode compares that go trought special path. Also we can't
9366 deal with FP compares yet. This is possible to add. */
9367 if ((mode == DImode && !TARGET_64BIT))
9369 if (FLOAT_MODE_P (mode))
9371 rtx second_test = NULL, bypass_test = NULL;
9372 rtx compare_op, compare_seq;
9374 /* Shortcut: following common codes never translate into carry flag compares. */
9375 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9376 || code == ORDERED || code == UNORDERED)
9379 /* These comparisons require zero flag; swap operands so they won't. */
9380 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9386 code = swap_condition (code);
9389 /* Try to expand the comparison and verify that we end up with carry flag
9390 based comparison. This is fails to be true only when we decide to expand
9391 comparison using arithmetic that is not too common scenario. */
9393 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9394 &second_test, &bypass_test);
9395 compare_seq = get_insns ();
9398 if (second_test || bypass_test)
9400 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9401 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9402 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9404 code = GET_CODE (compare_op);
9405 if (code != LTU && code != GEU)
9407 emit_insn (compare_seq);
9411 if (!INTEGRAL_MODE_P (mode))
9419 /* Convert a==0 into (unsigned)a<1. */
9422 if (op1 != const0_rtx)
9425 code = (code == EQ ? LTU : GEU);
9428 /* Convert a>b into b<a or a>=b-1. */
9431 if (GET_CODE (op1) == CONST_INT)
9433 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9434 /* Bail out on overflow. We still can swap operands but that
9435 would force loading of the constant into register. */
9436 if (op1 == const0_rtx
9437 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9439 code = (code == GTU ? GEU : LTU);
9446 code = (code == GTU ? LTU : GEU);
9450 /* Convert a>=0 into (unsigned)a<0x80000000. */
9453 if (mode == DImode || op1 != const0_rtx)
9455 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9456 code = (code == LT ? GEU : LTU);
9460 if (mode == DImode || op1 != constm1_rtx)
9462 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9463 code = (code == LE ? GEU : LTU);
9469 /* Swapping operands may cause constant to appear as first operand. */
9470 if (!nonimmediate_operand (op0, VOIDmode))
9474 op0 = force_reg (mode, op0);
9476 ix86_compare_op0 = op0;
9477 ix86_compare_op1 = op1;
9478 *pop = ix86_expand_compare (code, NULL, NULL);
9479 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9485 ix86_expand_int_movcc (rtx operands[])
9487 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9488 rtx compare_seq, compare_op;
9489 rtx second_test, bypass_test;
9490 enum machine_mode mode = GET_MODE (operands[0]);
9491 bool sign_bit_compare_p = false;;
9494 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9495 compare_seq = get_insns ();
9498 compare_code = GET_CODE (compare_op);
9500 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9501 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9502 sign_bit_compare_p = true;
9504 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9505 HImode insns, we'd be swallowed in word prefix ops. */
9507 if ((mode != HImode || TARGET_FAST_PREFIX)
9508 && (mode != DImode || TARGET_64BIT)
9509 && GET_CODE (operands[2]) == CONST_INT
9510 && GET_CODE (operands[3]) == CONST_INT)
9512 rtx out = operands[0];
9513 HOST_WIDE_INT ct = INTVAL (operands[2]);
9514 HOST_WIDE_INT cf = INTVAL (operands[3]);
9518 /* Sign bit compares are better done using shifts than we do by using
9520 if (sign_bit_compare_p
9521 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9522 ix86_compare_op1, &compare_op))
9524 /* Detect overlap between destination and compare sources. */
9527 if (!sign_bit_compare_p)
9531 compare_code = GET_CODE (compare_op);
9533 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9534 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9537 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9540 /* To simplify rest of code, restrict to the GEU case. */
9541 if (compare_code == LTU)
9543 HOST_WIDE_INT tmp = ct;
9546 compare_code = reverse_condition (compare_code);
9547 code = reverse_condition (code);
9552 PUT_CODE (compare_op,
9553 reverse_condition_maybe_unordered
9554 (GET_CODE (compare_op)));
9556 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9560 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9561 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9562 tmp = gen_reg_rtx (mode);
9565 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9567 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9571 if (code == GT || code == GE)
9572 code = reverse_condition (code);
9575 HOST_WIDE_INT tmp = ct;
9580 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9581 ix86_compare_op1, VOIDmode, 0, -1);
9594 tmp = expand_simple_binop (mode, PLUS,
9596 copy_rtx (tmp), 1, OPTAB_DIRECT);
9607 tmp = expand_simple_binop (mode, IOR,
9609 copy_rtx (tmp), 1, OPTAB_DIRECT);
9611 else if (diff == -1 && ct)
9621 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9623 tmp = expand_simple_binop (mode, PLUS,
9624 copy_rtx (tmp), GEN_INT (cf),
9625 copy_rtx (tmp), 1, OPTAB_DIRECT);
9633 * andl cf - ct, dest
9643 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9646 tmp = expand_simple_binop (mode, AND,
9648 gen_int_mode (cf - ct, mode),
9649 copy_rtx (tmp), 1, OPTAB_DIRECT);
9651 tmp = expand_simple_binop (mode, PLUS,
9652 copy_rtx (tmp), GEN_INT (ct),
9653 copy_rtx (tmp), 1, OPTAB_DIRECT);
9656 if (!rtx_equal_p (tmp, out))
9657 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9659 return 1; /* DONE */
9665 tmp = ct, ct = cf, cf = tmp;
9667 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9669 /* We may be reversing unordered compare to normal compare, that
9670 is not valid in general (we may convert non-trapping condition
9671 to trapping one), however on i386 we currently emit all
9672 comparisons unordered. */
9673 compare_code = reverse_condition_maybe_unordered (compare_code);
9674 code = reverse_condition_maybe_unordered (code);
9678 compare_code = reverse_condition (compare_code);
9679 code = reverse_condition (code);
9683 compare_code = UNKNOWN;
9684 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9685 && GET_CODE (ix86_compare_op1) == CONST_INT)
9687 if (ix86_compare_op1 == const0_rtx
9688 && (code == LT || code == GE))
9689 compare_code = code;
9690 else if (ix86_compare_op1 == constm1_rtx)
9694 else if (code == GT)
9699 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9700 if (compare_code != UNKNOWN
9701 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9702 && (cf == -1 || ct == -1))
9704 /* If lea code below could be used, only optimize
9705 if it results in a 2 insn sequence. */
9707 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9708 || diff == 3 || diff == 5 || diff == 9)
9709 || (compare_code == LT && ct == -1)
9710 || (compare_code == GE && cf == -1))
9713 * notl op1 (if necessary)
9721 code = reverse_condition (code);
9724 out = emit_store_flag (out, code, ix86_compare_op0,
9725 ix86_compare_op1, VOIDmode, 0, -1);
9727 out = expand_simple_binop (mode, IOR,
9729 out, 1, OPTAB_DIRECT);
9730 if (out != operands[0])
9731 emit_move_insn (operands[0], out);
9733 return 1; /* DONE */
9738 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9739 || diff == 3 || diff == 5 || diff == 9)
9740 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9742 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9748 * lea cf(dest*(ct-cf)),dest
9752 * This also catches the degenerate setcc-only case.
9758 out = emit_store_flag (out, code, ix86_compare_op0,
9759 ix86_compare_op1, VOIDmode, 0, 1);
9762 /* On x86_64 the lea instruction operates on Pmode, so we need
9763 to get arithmetics done in proper mode to match. */
9765 tmp = copy_rtx (out);
9769 out1 = copy_rtx (out);
9770 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9774 tmp = gen_rtx_PLUS (mode, tmp, out1);
9780 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9783 if (!rtx_equal_p (tmp, out))
9786 out = force_operand (tmp, copy_rtx (out));
9788 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9790 if (!rtx_equal_p (out, operands[0]))
9791 emit_move_insn (operands[0], copy_rtx (out));
9793 return 1; /* DONE */
9797 * General case: Jumpful:
9798 * xorl dest,dest cmpl op1, op2
9799 * cmpl op1, op2 movl ct, dest
9801 * decl dest movl cf, dest
9802 * andl (cf-ct),dest 1:
9807 * This is reasonably steep, but branch mispredict costs are
9808 * high on modern cpus, so consider failing only if optimizing
9812 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9813 && BRANCH_COST >= 2)
9819 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9820 /* We may be reversing unordered compare to normal compare,
9821 that is not valid in general (we may convert non-trapping
9822 condition to trapping one), however on i386 we currently
9823 emit all comparisons unordered. */
9824 code = reverse_condition_maybe_unordered (code);
9827 code = reverse_condition (code);
9828 if (compare_code != UNKNOWN)
9829 compare_code = reverse_condition (compare_code);
9833 if (compare_code != UNKNOWN)
9835 /* notl op1 (if needed)
9840 For x < 0 (resp. x <= -1) there will be no notl,
9841 so if possible swap the constants to get rid of the
9843 True/false will be -1/0 while code below (store flag
9844 followed by decrement) is 0/-1, so the constants need
9845 to be exchanged once more. */
9847 if (compare_code == GE || !cf)
9849 code = reverse_condition (code);
9854 HOST_WIDE_INT tmp = cf;
9859 out = emit_store_flag (out, code, ix86_compare_op0,
9860 ix86_compare_op1, VOIDmode, 0, -1);
9864 out = emit_store_flag (out, code, ix86_compare_op0,
9865 ix86_compare_op1, VOIDmode, 0, 1);
9867 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9868 copy_rtx (out), 1, OPTAB_DIRECT);
9871 out = expand_simple_binop (mode, AND, copy_rtx (out),
9872 gen_int_mode (cf - ct, mode),
9873 copy_rtx (out), 1, OPTAB_DIRECT);
9875 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9876 copy_rtx (out), 1, OPTAB_DIRECT);
9877 if (!rtx_equal_p (out, operands[0]))
9878 emit_move_insn (operands[0], copy_rtx (out));
9880 return 1; /* DONE */
9884 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9886 /* Try a few things more with specific constants and a variable. */
9889 rtx var, orig_out, out, tmp;
9891 if (BRANCH_COST <= 2)
9892 return 0; /* FAIL */
9894 /* If one of the two operands is an interesting constant, load a
9895 constant with the above and mask it in with a logical operation. */
9897 if (GET_CODE (operands[2]) == CONST_INT)
9900 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9901 operands[3] = constm1_rtx, op = and_optab;
9902 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9903 operands[3] = const0_rtx, op = ior_optab;
9905 return 0; /* FAIL */
9907 else if (GET_CODE (operands[3]) == CONST_INT)
9910 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9911 operands[2] = constm1_rtx, op = and_optab;
9912 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9913 operands[2] = const0_rtx, op = ior_optab;
9915 return 0; /* FAIL */
9918 return 0; /* FAIL */
9920 orig_out = operands[0];
9921 tmp = gen_reg_rtx (mode);
9924 /* Recurse to get the constant loaded. */
9925 if (ix86_expand_int_movcc (operands) == 0)
9926 return 0; /* FAIL */
9928 /* Mask in the interesting variable. */
9929 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9931 if (!rtx_equal_p (out, orig_out))
9932 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9934 return 1; /* DONE */
9938 * For comparison with above,
9948 if (! nonimmediate_operand (operands[2], mode))
9949 operands[2] = force_reg (mode, operands[2]);
9950 if (! nonimmediate_operand (operands[3], mode))
9951 operands[3] = force_reg (mode, operands[3]);
9953 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9955 rtx tmp = gen_reg_rtx (mode);
9956 emit_move_insn (tmp, operands[3]);
9959 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9961 rtx tmp = gen_reg_rtx (mode);
9962 emit_move_insn (tmp, operands[2]);
9966 if (! register_operand (operands[2], VOIDmode)
9968 || ! register_operand (operands[3], VOIDmode)))
9969 operands[2] = force_reg (mode, operands[2]);
9972 && ! register_operand (operands[3], VOIDmode))
9973 operands[3] = force_reg (mode, operands[3]);
9975 emit_insn (compare_seq);
9976 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9977 gen_rtx_IF_THEN_ELSE (mode,
9978 compare_op, operands[2],
9981 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9982 gen_rtx_IF_THEN_ELSE (mode,
9984 copy_rtx (operands[3]),
9985 copy_rtx (operands[0]))));
9987 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9988 gen_rtx_IF_THEN_ELSE (mode,
9990 copy_rtx (operands[2]),
9991 copy_rtx (operands[0]))));
9993 return 1; /* DONE */
9997 ix86_expand_fp_movcc (rtx operands[])
9999 enum machine_mode mode = GET_MODE (operands[0]);
10000 enum rtx_code code = GET_CODE (operands[1]);
10001 rtx tmp, compare_op, second_test, bypass_test;
10003 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10005 rtx cmp_op0, cmp_op1, if_true, if_false;
10007 enum machine_mode vmode, cmode;
10008 bool is_minmax = false;
10010 cmp_op0 = ix86_compare_op0;
10011 cmp_op1 = ix86_compare_op1;
10012 if_true = operands[2];
10013 if_false = operands[3];
10015 /* Since we've no cmove for sse registers, don't force bad register
10016 allocation just to gain access to it. Deny movcc when the
10017 comparison mode doesn't match the move mode. */
10018 cmode = GET_MODE (cmp_op0);
10019 if (cmode == VOIDmode)
10020 cmode = GET_MODE (cmp_op1);
10024 /* We have no LTGT as an operator. We could implement it with
10025 NE & ORDERED, but this requires an extra temporary. It's
10026 not clear that it's worth it. */
10027 if (code == LTGT || code == UNEQ)
10030 /* Massage condition to satisfy sse_comparison_operator. Try
10031 to canonicalize the destination operand to be first in the
10032 comparison - this helps reload to avoid extra moves. */
10033 if (!sse_comparison_operator (operands[1], VOIDmode)
10034 || (COMMUTATIVE_P (operands[1])
10035 && rtx_equal_p (operands[0], cmp_op1)))
10040 code = swap_condition (code);
10043 /* Detect conditional moves that exactly match min/max operational
10044 semantics. Note that this is IEEE safe, as long as we don't
10045 interchange the operands. Which is why we keep this in the form
10046 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
10047 if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
10049 if (((cmp_op0 == if_true && cmp_op1 == if_false)
10050 || (cmp_op0 == if_false && cmp_op1 == if_true)))
10057 if_true = if_false;
10063 if (mode == SFmode)
10065 else if (mode == DFmode)
10068 gcc_unreachable ();
10070 cmp_op0 = force_reg (mode, cmp_op0);
10071 if (!nonimmediate_operand (cmp_op1, mode))
10072 cmp_op1 = force_reg (mode, cmp_op1);
10074 tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10075 gcc_assert (sse_comparison_operator (tmp, VOIDmode));
10077 tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
10078 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
10082 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
10083 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10090 /* The floating point conditional move instructions don't directly
10091 support conditions resulting from a signed integer comparison. */
10093 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10095 /* The floating point conditional move instructions don't directly
10096 support signed integer comparisons. */
10098 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10100 if (second_test != NULL || bypass_test != NULL)
10102 tmp = gen_reg_rtx (QImode);
10103 ix86_expand_setcc (code, tmp);
10105 ix86_compare_op0 = tmp;
10106 ix86_compare_op1 = const0_rtx;
10107 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10109 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10111 tmp = gen_reg_rtx (mode);
10112 emit_move_insn (tmp, operands[3]);
10115 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10117 tmp = gen_reg_rtx (mode);
10118 emit_move_insn (tmp, operands[2]);
10122 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10123 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10124 operands[2], operands[3])));
10126 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10127 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10128 operands[3], operands[0])));
10130 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10131 gen_rtx_IF_THEN_ELSE (mode, second_test,
10132 operands[2], operands[0])));
10138 ix86_split_sse_movcc (rtx operands[])
10140 rtx dest, scratch, cmp, op_true, op_false, x;
10141 enum machine_mode mode, vmode;
10143 /* Note that the operator CMP has been set up with matching constraints
10144 such that dest is valid for the comparison. Unless one of the true
10145 or false operands are zero, the true operand has already been placed
10147 dest = operands[0];
10148 scratch = operands[1];
10149 op_true = operands[2];
10150 op_false = operands[3];
10153 mode = GET_MODE (dest);
10154 vmode = GET_MODE (scratch);
10156 /* We need to make sure that the TRUE and FALSE operands are out of the
10157 way of the destination. Marking the destination earlyclobber doesn't
10158 work, since we want matching constraints for the actual comparison, so
10159 at some point we always wind up having to do a copy ourselves here.
10160 We very much prefer the TRUE value to be in SCRATCH. If it turns out
10161 that FALSE overlaps DEST, then we invert the comparison so that we
10162 still only have to do one move. */
10163 if (rtx_equal_p (op_false, dest))
10165 enum rtx_code code;
10167 if (rtx_equal_p (op_true, dest))
10169 /* ??? Really ought not happen. It means some optimizer managed
10170 to prove the operands were identical, but failed to fold the
10171 conditional move to a straight move. Do so here, because
10172 otherwise we'll generate incorrect code. And since they're
10173 both already in the destination register, nothing to do. */
10177 x = gen_rtx_REG (mode, REGNO (scratch));
10178 emit_move_insn (x, op_false);
10179 op_false = op_true;
10182 code = GET_CODE (cmp);
10183 code = reverse_condition_maybe_unordered (code);
10184 cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
10186 else if (op_true == CONST0_RTX (mode))
10188 else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
10192 x = gen_rtx_REG (mode, REGNO (scratch));
10193 emit_move_insn (x, op_true);
10197 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
10198 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10200 if (op_false == CONST0_RTX (mode))
10202 op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
10203 x = gen_rtx_AND (vmode, dest, op_true);
10204 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10208 op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
10210 if (op_true == CONST0_RTX (mode))
10212 x = gen_rtx_NOT (vmode, dest);
10213 x = gen_rtx_AND (vmode, x, op_false);
10214 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10218 x = gen_rtx_AND (vmode, scratch, dest);
10219 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10221 x = gen_rtx_NOT (vmode, dest);
10222 x = gen_rtx_AND (vmode, x, op_false);
10223 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10225 x = gen_rtx_IOR (vmode, dest, scratch);
10226 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10231 /* Expand conditional increment or decrement using adb/sbb instructions.
10232 The default case using setcc followed by the conditional move can be
10233 done by generic code. */
10235 ix86_expand_int_addcc (rtx operands[])
10237 enum rtx_code code = GET_CODE (operands[1]);
10239 rtx val = const0_rtx;
10240 bool fpcmp = false;
10241 enum machine_mode mode = GET_MODE (operands[0]);
10243 if (operands[3] != const1_rtx
10244 && operands[3] != constm1_rtx)
10246 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10247 ix86_compare_op1, &compare_op))
10249 code = GET_CODE (compare_op);
10251 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10252 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10255 code = ix86_fp_compare_code_to_integer (code);
10262 PUT_CODE (compare_op,
10263 reverse_condition_maybe_unordered
10264 (GET_CODE (compare_op)));
10266 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10268 PUT_MODE (compare_op, mode);
10270 /* Construct either adc or sbb insn. */
10271 if ((code == LTU) == (operands[3] == constm1_rtx))
10273 switch (GET_MODE (operands[0]))
10276 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10279 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10282 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10285 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10293 switch (GET_MODE (operands[0]))
10296 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10299 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10302 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10305 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10311 return 1; /* DONE */
10315 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10316 works for floating pointer parameters and nonoffsetable memories.
10317 For pushes, it returns just stack offsets; the values will be saved
10318 in the right order. Maximally three parts are generated. */
10321 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10326 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10328 size = (GET_MODE_SIZE (mode) + 4) / 8;
10330 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10332 if (size < 2 || size > 3)
10335 /* Optimize constant pool reference to immediates. This is used by fp
10336 moves, that force all constants to memory to allow combining. */
10337 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10339 rtx tmp = maybe_get_pool_constant (operand);
10344 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10346 /* The only non-offsetable memories we handle are pushes. */
10347 if (! push_operand (operand, VOIDmode))
10350 operand = copy_rtx (operand);
10351 PUT_MODE (operand, Pmode);
10352 parts[0] = parts[1] = parts[2] = operand;
10354 else if (!TARGET_64BIT)
10356 if (mode == DImode)
10357 split_di (&operand, 1, &parts[0], &parts[1]);
10360 if (REG_P (operand))
10362 if (!reload_completed)
10364 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10365 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10367 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10369 else if (offsettable_memref_p (operand))
10371 operand = adjust_address (operand, SImode, 0);
10372 parts[0] = operand;
10373 parts[1] = adjust_address (operand, SImode, 4);
10375 parts[2] = adjust_address (operand, SImode, 8);
10377 else if (GET_CODE (operand) == CONST_DOUBLE)
10382 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10386 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10387 parts[2] = gen_int_mode (l[2], SImode);
10390 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10395 parts[1] = gen_int_mode (l[1], SImode);
10396 parts[0] = gen_int_mode (l[0], SImode);
10404 if (mode == TImode)
10405 split_ti (&operand, 1, &parts[0], &parts[1]);
10406 if (mode == XFmode || mode == TFmode)
10408 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10409 if (REG_P (operand))
10411 if (!reload_completed)
10413 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10414 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10416 else if (offsettable_memref_p (operand))
10418 operand = adjust_address (operand, DImode, 0);
10419 parts[0] = operand;
10420 parts[1] = adjust_address (operand, upper_mode, 8);
10422 else if (GET_CODE (operand) == CONST_DOUBLE)
10427 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10428 real_to_target (l, &r, mode);
10430 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10431 if (HOST_BITS_PER_WIDE_INT >= 64)
10434 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10435 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10438 parts[0] = immed_double_const (l[0], l[1], DImode);
10440 if (upper_mode == SImode)
10441 parts[1] = gen_int_mode (l[2], SImode);
10442 else if (HOST_BITS_PER_WIDE_INT >= 64)
10445 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10446 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10449 parts[1] = immed_double_const (l[2], l[3], DImode);
10459 /* Emit insns to perform a move or push of DI, DF, and XF values.
10460 Return false when normal moves are needed; true when all required
10461 insns have been emitted. Operands 2-4 contain the input values
10462 int the correct order; operands 5-7 contain the output values. */
10465 ix86_split_long_move (rtx operands[])
10470 int collisions = 0;
10471 enum machine_mode mode = GET_MODE (operands[0]);
10473 /* The DFmode expanders may ask us to move double.
10474 For 64bit target this is single move. By hiding the fact
10475 here we simplify i386.md splitters. */
10476 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10478 /* Optimize constant pool reference to immediates. This is used by
10479 fp moves, that force all constants to memory to allow combining. */
10481 if (GET_CODE (operands[1]) == MEM
10482 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10483 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10484 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10485 if (push_operand (operands[0], VOIDmode))
10487 operands[0] = copy_rtx (operands[0]);
10488 PUT_MODE (operands[0], Pmode);
10491 operands[0] = gen_lowpart (DImode, operands[0]);
10492 operands[1] = gen_lowpart (DImode, operands[1]);
10493 emit_move_insn (operands[0], operands[1]);
10497 /* The only non-offsettable memory we handle is push. */
10498 if (push_operand (operands[0], VOIDmode))
10500 else if (GET_CODE (operands[0]) == MEM
10501 && ! offsettable_memref_p (operands[0]))
10504 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10505 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10507 /* When emitting push, take care for source operands on the stack. */
10508 if (push && GET_CODE (operands[1]) == MEM
10509 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10512 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10513 XEXP (part[1][2], 0));
10514 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10515 XEXP (part[1][1], 0));
10518 /* We need to do copy in the right order in case an address register
10519 of the source overlaps the destination. */
10520 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10522 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10524 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10527 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10530 /* Collision in the middle part can be handled by reordering. */
10531 if (collisions == 1 && nparts == 3
10532 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10535 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10536 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10539 /* If there are more collisions, we can't handle it by reordering.
10540 Do an lea to the last part and use only one colliding move. */
10541 else if (collisions > 1)
10547 base = part[0][nparts - 1];
10549 /* Handle the case when the last part isn't valid for lea.
10550 Happens in 64-bit mode storing the 12-byte XFmode. */
10551 if (GET_MODE (base) != Pmode)
10552 base = gen_rtx_REG (Pmode, REGNO (base));
10554 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10555 part[1][0] = replace_equiv_address (part[1][0], base);
10556 part[1][1] = replace_equiv_address (part[1][1],
10557 plus_constant (base, UNITS_PER_WORD));
10559 part[1][2] = replace_equiv_address (part[1][2],
10560 plus_constant (base, 8));
10570 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10571 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10572 emit_move_insn (part[0][2], part[1][2]);
10577 /* In 64bit mode we don't have 32bit push available. In case this is
10578 register, it is OK - we will just use larger counterpart. We also
10579 retype memory - these comes from attempt to avoid REX prefix on
10580 moving of second half of TFmode value. */
10581 if (GET_MODE (part[1][1]) == SImode)
10583 if (GET_CODE (part[1][1]) == MEM)
10584 part[1][1] = adjust_address (part[1][1], DImode, 0);
10585 else if (REG_P (part[1][1]))
10586 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10589 if (GET_MODE (part[1][0]) == SImode)
10590 part[1][0] = part[1][1];
10593 emit_move_insn (part[0][1], part[1][1]);
10594 emit_move_insn (part[0][0], part[1][0]);
10598 /* Choose correct order to not overwrite the source before it is copied. */
10599 if ((REG_P (part[0][0])
10600 && REG_P (part[1][1])
10601 && (REGNO (part[0][0]) == REGNO (part[1][1])
10603 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10605 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10609 operands[2] = part[0][2];
10610 operands[3] = part[0][1];
10611 operands[4] = part[0][0];
10612 operands[5] = part[1][2];
10613 operands[6] = part[1][1];
10614 operands[7] = part[1][0];
10618 operands[2] = part[0][1];
10619 operands[3] = part[0][0];
10620 operands[5] = part[1][1];
10621 operands[6] = part[1][0];
10628 operands[2] = part[0][0];
10629 operands[3] = part[0][1];
10630 operands[4] = part[0][2];
10631 operands[5] = part[1][0];
10632 operands[6] = part[1][1];
10633 operands[7] = part[1][2];
10637 operands[2] = part[0][0];
10638 operands[3] = part[0][1];
10639 operands[5] = part[1][0];
10640 operands[6] = part[1][1];
10644 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10647 if (GET_CODE (operands[5]) == CONST_INT
10648 && operands[5] != const0_rtx
10649 && REG_P (operands[2]))
10651 if (GET_CODE (operands[6]) == CONST_INT
10652 && INTVAL (operands[6]) == INTVAL (operands[5]))
10653 operands[6] = operands[2];
10656 && GET_CODE (operands[7]) == CONST_INT
10657 && INTVAL (operands[7]) == INTVAL (operands[5]))
10658 operands[7] = operands[2];
10662 && GET_CODE (operands[6]) == CONST_INT
10663 && operands[6] != const0_rtx
10664 && REG_P (operands[3])
10665 && GET_CODE (operands[7]) == CONST_INT
10666 && INTVAL (operands[7]) == INTVAL (operands[6]))
10667 operands[7] = operands[3];
10670 emit_move_insn (operands[2], operands[5]);
10671 emit_move_insn (operands[3], operands[6]);
10673 emit_move_insn (operands[4], operands[7]);
10678 /* Helper function of ix86_split_ashldi used to generate an SImode
10679 left shift by a constant, either using a single shift or
10680 a sequence of add instructions. */
10683 ix86_expand_ashlsi3_const (rtx operand, int count)
10686 emit_insn (gen_addsi3 (operand, operand, operand));
10687 else if (!optimize_size
10688 && count * ix86_cost->add <= ix86_cost->shift_const)
10691 for (i=0; i<count; i++)
10692 emit_insn (gen_addsi3 (operand, operand, operand));
10695 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10699 ix86_split_ashldi (rtx *operands, rtx scratch)
10701 rtx low[2], high[2];
10704 if (GET_CODE (operands[2]) == CONST_INT)
10706 split_di (operands, 2, low, high);
10707 count = INTVAL (operands[2]) & 63;
10711 emit_move_insn (high[0], low[1]);
10712 emit_move_insn (low[0], const0_rtx);
10715 ix86_expand_ashlsi3_const (high[0], count - 32);
10719 if (!rtx_equal_p (operands[0], operands[1]))
10720 emit_move_insn (operands[0], operands[1]);
10721 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10722 ix86_expand_ashlsi3_const (low[0], count);
10727 split_di (operands, 1, low, high);
10729 if (operands[1] == const1_rtx)
10731 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10732 can be done with two 32-bit shifts, no branches, no cmoves. */
10733 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10735 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10737 ix86_expand_clear (low[0]);
10738 ix86_expand_clear (high[0]);
10739 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10741 d = gen_lowpart (QImode, low[0]);
10742 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10743 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10744 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10746 d = gen_lowpart (QImode, high[0]);
10747 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10748 s = gen_rtx_NE (QImode, flags, const0_rtx);
10749 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10752 /* Otherwise, we can get the same results by manually performing
10753 a bit extract operation on bit 5, and then performing the two
10754 shifts. The two methods of getting 0/1 into low/high are exactly
10755 the same size. Avoiding the shift in the bit extract case helps
10756 pentium4 a bit; no one else seems to care much either way. */
10761 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10762 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10764 x = gen_lowpart (SImode, operands[2]);
10765 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10767 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10768 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10769 emit_move_insn (low[0], high[0]);
10770 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10773 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10774 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10778 if (operands[1] == constm1_rtx)
10780 /* For -1LL << N, we can avoid the shld instruction, because we
10781 know that we're shifting 0...31 ones into a -1. */
10782 emit_move_insn (low[0], constm1_rtx);
10784 emit_move_insn (high[0], low[0]);
10786 emit_move_insn (high[0], constm1_rtx);
10790 if (!rtx_equal_p (operands[0], operands[1]))
10791 emit_move_insn (operands[0], operands[1]);
10793 split_di (operands, 1, low, high);
10794 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10797 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10799 if (TARGET_CMOVE && scratch)
10801 ix86_expand_clear (scratch);
10802 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10805 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10809 ix86_split_ashrdi (rtx *operands, rtx scratch)
10811 rtx low[2], high[2];
10814 if (GET_CODE (operands[2]) == CONST_INT)
10816 split_di (operands, 2, low, high);
10817 count = INTVAL (operands[2]) & 63;
10821 emit_move_insn (high[0], high[1]);
10822 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10823 emit_move_insn (low[0], high[0]);
10826 else if (count >= 32)
10828 emit_move_insn (low[0], high[1]);
10829 emit_move_insn (high[0], low[0]);
10830 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10832 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10836 if (!rtx_equal_p (operands[0], operands[1]))
10837 emit_move_insn (operands[0], operands[1]);
10838 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10839 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10844 if (!rtx_equal_p (operands[0], operands[1]))
10845 emit_move_insn (operands[0], operands[1]);
10847 split_di (operands, 1, low, high);
10849 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10850 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10852 if (TARGET_CMOVE && scratch)
10854 emit_move_insn (scratch, high[0]);
10855 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10856 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10860 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10865 ix86_split_lshrdi (rtx *operands, rtx scratch)
10867 rtx low[2], high[2];
10870 if (GET_CODE (operands[2]) == CONST_INT)
10872 split_di (operands, 2, low, high);
10873 count = INTVAL (operands[2]) & 63;
10877 emit_move_insn (low[0], high[1]);
10878 ix86_expand_clear (high[0]);
10881 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10885 if (!rtx_equal_p (operands[0], operands[1]))
10886 emit_move_insn (operands[0], operands[1]);
10887 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10888 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10893 if (!rtx_equal_p (operands[0], operands[1]))
10894 emit_move_insn (operands[0], operands[1]);
10896 split_di (operands, 1, low, high);
10898 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10899 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10901 /* Heh. By reversing the arguments, we can reuse this pattern. */
10902 if (TARGET_CMOVE && scratch)
10904 ix86_expand_clear (scratch);
10905 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10909 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10913 /* Helper function for the string operations below. Dest VARIABLE whether
10914 it is aligned to VALUE bytes. If true, jump to the label. */
10916 ix86_expand_aligntest (rtx variable, int value)
10918 rtx label = gen_label_rtx ();
10919 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10920 if (GET_MODE (variable) == DImode)
10921 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10923 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10924 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10929 /* Adjust COUNTER by the VALUE. */
10931 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10933 if (GET_MODE (countreg) == DImode)
10934 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10936 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10939 /* Zero extend possibly SImode EXP to Pmode register. */
10941 ix86_zero_extend_to_Pmode (rtx exp)
10944 if (GET_MODE (exp) == VOIDmode)
10945 return force_reg (Pmode, exp);
10946 if (GET_MODE (exp) == Pmode)
10947 return copy_to_mode_reg (Pmode, exp);
10948 r = gen_reg_rtx (Pmode);
10949 emit_insn (gen_zero_extendsidi2 (r, exp));
10953 /* Expand string move (memcpy) operation. Use i386 string operations when
10954 profitable. expand_clrmem contains similar code. */
10956 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10958 rtx srcreg, destreg, countreg, srcexp, destexp;
10959 enum machine_mode counter_mode;
10960 HOST_WIDE_INT align = 0;
10961 unsigned HOST_WIDE_INT count = 0;
10963 if (GET_CODE (align_exp) == CONST_INT)
10964 align = INTVAL (align_exp);
10966 /* Can't use any of this if the user has appropriated esi or edi. */
10967 if (global_regs[4] || global_regs[5])
10970 /* This simple hack avoids all inlining code and simplifies code below. */
10971 if (!TARGET_ALIGN_STRINGOPS)
10974 if (GET_CODE (count_exp) == CONST_INT)
10976 count = INTVAL (count_exp);
10977 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10981 /* Figure out proper mode for counter. For 32bits it is always SImode,
10982 for 64bits use SImode when possible, otherwise DImode.
10983 Set count to number of bytes copied when known at compile time. */
10985 || GET_MODE (count_exp) == SImode
10986 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10987 counter_mode = SImode;
10989 counter_mode = DImode;
10991 if (counter_mode != SImode && counter_mode != DImode)
10994 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10995 if (destreg != XEXP (dst, 0))
10996 dst = replace_equiv_address_nv (dst, destreg);
10997 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10998 if (srcreg != XEXP (src, 0))
10999 src = replace_equiv_address_nv (src, srcreg);
11001 /* When optimizing for size emit simple rep ; movsb instruction for
11002 counts not divisible by 4. */
11004 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11006 emit_insn (gen_cld ());
11007 countreg = ix86_zero_extend_to_Pmode (count_exp);
11008 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11009 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11010 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11014 /* For constant aligned (or small unaligned) copies use rep movsl
11015 followed by code copying the rest. For PentiumPro ensure 8 byte
11016 alignment to allow rep movsl acceleration. */
11018 else if (count != 0
11020 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11021 || optimize_size || count < (unsigned int) 64))
11023 unsigned HOST_WIDE_INT offset = 0;
11024 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11025 rtx srcmem, dstmem;
11027 emit_insn (gen_cld ());
11028 if (count & ~(size - 1))
11030 countreg = copy_to_mode_reg (counter_mode,
11031 GEN_INT ((count >> (size == 4 ? 2 : 3))
11032 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11033 countreg = ix86_zero_extend_to_Pmode (countreg);
11035 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11036 GEN_INT (size == 4 ? 2 : 3));
11037 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11038 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11040 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11041 countreg, destexp, srcexp));
11042 offset = count & ~(size - 1);
11044 if (size == 8 && (count & 0x04))
11046 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11048 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11050 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11055 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11057 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11059 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11064 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11066 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11068 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11071 /* The generic code based on the glibc implementation:
11072 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11073 allowing accelerated copying there)
11074 - copy the data using rep movsl
11075 - copy the rest. */
11080 rtx srcmem, dstmem;
11081 int desired_alignment = (TARGET_PENTIUMPRO
11082 && (count == 0 || count >= (unsigned int) 260)
11083 ? 8 : UNITS_PER_WORD);
11084 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11085 dst = change_address (dst, BLKmode, destreg);
11086 src = change_address (src, BLKmode, srcreg);
11088 /* In case we don't know anything about the alignment, default to
11089 library version, since it is usually equally fast and result in
11092 Also emit call when we know that the count is large and call overhead
11093 will not be important. */
11094 if (!TARGET_INLINE_ALL_STRINGOPS
11095 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11098 if (TARGET_SINGLE_STRINGOP)
11099 emit_insn (gen_cld ());
11101 countreg2 = gen_reg_rtx (Pmode);
11102 countreg = copy_to_mode_reg (counter_mode, count_exp);
11104 /* We don't use loops to align destination and to copy parts smaller
11105 than 4 bytes, because gcc is able to optimize such code better (in
11106 the case the destination or the count really is aligned, gcc is often
11107 able to predict the branches) and also it is friendlier to the
11108 hardware branch prediction.
11110 Using loops is beneficial for generic case, because we can
11111 handle small counts using the loops. Many CPUs (such as Athlon)
11112 have large REP prefix setup costs.
11114 This is quite costly. Maybe we can revisit this decision later or
11115 add some customizability to this code. */
11117 if (count == 0 && align < desired_alignment)
11119 label = gen_label_rtx ();
11120 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11121 LEU, 0, counter_mode, 1, label);
11125 rtx label = ix86_expand_aligntest (destreg, 1);
11126 srcmem = change_address (src, QImode, srcreg);
11127 dstmem = change_address (dst, QImode, destreg);
11128 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11129 ix86_adjust_counter (countreg, 1);
11130 emit_label (label);
11131 LABEL_NUSES (label) = 1;
11135 rtx label = ix86_expand_aligntest (destreg, 2);
11136 srcmem = change_address (src, HImode, srcreg);
11137 dstmem = change_address (dst, HImode, destreg);
11138 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11139 ix86_adjust_counter (countreg, 2);
11140 emit_label (label);
11141 LABEL_NUSES (label) = 1;
11143 if (align <= 4 && desired_alignment > 4)
11145 rtx label = ix86_expand_aligntest (destreg, 4);
11146 srcmem = change_address (src, SImode, srcreg);
11147 dstmem = change_address (dst, SImode, destreg);
11148 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11149 ix86_adjust_counter (countreg, 4);
11150 emit_label (label);
11151 LABEL_NUSES (label) = 1;
11154 if (label && desired_alignment > 4 && !TARGET_64BIT)
11156 emit_label (label);
11157 LABEL_NUSES (label) = 1;
11160 if (!TARGET_SINGLE_STRINGOP)
11161 emit_insn (gen_cld ());
11164 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11166 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11170 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11171 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11173 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11174 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11175 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11176 countreg2, destexp, srcexp));
11180 emit_label (label);
11181 LABEL_NUSES (label) = 1;
11183 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11185 srcmem = change_address (src, SImode, srcreg);
11186 dstmem = change_address (dst, SImode, destreg);
11187 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11189 if ((align <= 4 || count == 0) && TARGET_64BIT)
11191 rtx label = ix86_expand_aligntest (countreg, 4);
11192 srcmem = change_address (src, SImode, srcreg);
11193 dstmem = change_address (dst, SImode, destreg);
11194 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11195 emit_label (label);
11196 LABEL_NUSES (label) = 1;
11198 if (align > 2 && count != 0 && (count & 2))
11200 srcmem = change_address (src, HImode, srcreg);
11201 dstmem = change_address (dst, HImode, destreg);
11202 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11204 if (align <= 2 || count == 0)
11206 rtx label = ix86_expand_aligntest (countreg, 2);
11207 srcmem = change_address (src, HImode, srcreg);
11208 dstmem = change_address (dst, HImode, destreg);
11209 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11210 emit_label (label);
11211 LABEL_NUSES (label) = 1;
11213 if (align > 1 && count != 0 && (count & 1))
11215 srcmem = change_address (src, QImode, srcreg);
11216 dstmem = change_address (dst, QImode, destreg);
11217 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11219 if (align <= 1 || count == 0)
11221 rtx label = ix86_expand_aligntest (countreg, 1);
11222 srcmem = change_address (src, QImode, srcreg);
11223 dstmem = change_address (dst, QImode, destreg);
11224 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11225 emit_label (label);
11226 LABEL_NUSES (label) = 1;
11233 /* Expand string clear operation (bzero). Use i386 string operations when
11234 profitable. expand_movmem contains similar code. */
11236 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11238 rtx destreg, zeroreg, countreg, destexp;
11239 enum machine_mode counter_mode;
11240 HOST_WIDE_INT align = 0;
11241 unsigned HOST_WIDE_INT count = 0;
11243 if (GET_CODE (align_exp) == CONST_INT)
11244 align = INTVAL (align_exp);
11246 /* Can't use any of this if the user has appropriated esi. */
11247 if (global_regs[4])
11250 /* This simple hack avoids all inlining code and simplifies code below. */
11251 if (!TARGET_ALIGN_STRINGOPS)
11254 if (GET_CODE (count_exp) == CONST_INT)
11256 count = INTVAL (count_exp);
11257 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11260 /* Figure out proper mode for counter. For 32bits it is always SImode,
11261 for 64bits use SImode when possible, otherwise DImode.
11262 Set count to number of bytes copied when known at compile time. */
11264 || GET_MODE (count_exp) == SImode
11265 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11266 counter_mode = SImode;
11268 counter_mode = DImode;
11270 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11271 if (destreg != XEXP (dst, 0))
11272 dst = replace_equiv_address_nv (dst, destreg);
11275 /* When optimizing for size emit simple rep ; movsb instruction for
11276 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11277 sequence is 7 bytes long, so if optimizing for size and count is
11278 small enough that some stosl, stosw and stosb instructions without
11279 rep are shorter, fall back into the next if. */
11281 if ((!optimize || optimize_size)
11284 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11286 emit_insn (gen_cld ());
11288 countreg = ix86_zero_extend_to_Pmode (count_exp);
11289 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11290 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11291 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11293 else if (count != 0
11295 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11296 || optimize_size || count < (unsigned int) 64))
11298 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11299 unsigned HOST_WIDE_INT offset = 0;
11301 emit_insn (gen_cld ());
11303 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11304 if (count & ~(size - 1))
11306 unsigned HOST_WIDE_INT repcount;
11307 unsigned int max_nonrep;
11309 repcount = count >> (size == 4 ? 2 : 3);
11311 repcount &= 0x3fffffff;
11313 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11314 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11315 bytes. In both cases the latter seems to be faster for small
11317 max_nonrep = size == 4 ? 7 : 4;
11318 if (!optimize_size)
11321 case PROCESSOR_PENTIUM4:
11322 case PROCESSOR_NOCONA:
11329 if (repcount <= max_nonrep)
11330 while (repcount-- > 0)
11332 rtx mem = adjust_automodify_address_nv (dst,
11333 GET_MODE (zeroreg),
11335 emit_insn (gen_strset (destreg, mem, zeroreg));
11340 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11341 countreg = ix86_zero_extend_to_Pmode (countreg);
11342 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11343 GEN_INT (size == 4 ? 2 : 3));
11344 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11345 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11347 offset = count & ~(size - 1);
11350 if (size == 8 && (count & 0x04))
11352 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11354 emit_insn (gen_strset (destreg, mem,
11355 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11360 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11362 emit_insn (gen_strset (destreg, mem,
11363 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11368 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11370 emit_insn (gen_strset (destreg, mem,
11371 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11378 /* Compute desired alignment of the string operation. */
11379 int desired_alignment = (TARGET_PENTIUMPRO
11380 && (count == 0 || count >= (unsigned int) 260)
11381 ? 8 : UNITS_PER_WORD);
11383 /* In case we don't know anything about the alignment, default to
11384 library version, since it is usually equally fast and result in
11387 Also emit call when we know that the count is large and call overhead
11388 will not be important. */
11389 if (!TARGET_INLINE_ALL_STRINGOPS
11390 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11393 if (TARGET_SINGLE_STRINGOP)
11394 emit_insn (gen_cld ());
11396 countreg2 = gen_reg_rtx (Pmode);
11397 countreg = copy_to_mode_reg (counter_mode, count_exp);
11398 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11399 /* Get rid of MEM_OFFSET, it won't be accurate. */
11400 dst = change_address (dst, BLKmode, destreg);
11402 if (count == 0 && align < desired_alignment)
11404 label = gen_label_rtx ();
11405 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11406 LEU, 0, counter_mode, 1, label);
11410 rtx label = ix86_expand_aligntest (destreg, 1);
11411 emit_insn (gen_strset (destreg, dst,
11412 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11413 ix86_adjust_counter (countreg, 1);
11414 emit_label (label);
11415 LABEL_NUSES (label) = 1;
11419 rtx label = ix86_expand_aligntest (destreg, 2);
11420 emit_insn (gen_strset (destreg, dst,
11421 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11422 ix86_adjust_counter (countreg, 2);
11423 emit_label (label);
11424 LABEL_NUSES (label) = 1;
11426 if (align <= 4 && desired_alignment > 4)
11428 rtx label = ix86_expand_aligntest (destreg, 4);
11429 emit_insn (gen_strset (destreg, dst,
11431 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11433 ix86_adjust_counter (countreg, 4);
11434 emit_label (label);
11435 LABEL_NUSES (label) = 1;
11438 if (label && desired_alignment > 4 && !TARGET_64BIT)
11440 emit_label (label);
11441 LABEL_NUSES (label) = 1;
11445 if (!TARGET_SINGLE_STRINGOP)
11446 emit_insn (gen_cld ());
11449 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11451 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11455 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11456 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11458 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11459 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11463 emit_label (label);
11464 LABEL_NUSES (label) = 1;
11467 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11468 emit_insn (gen_strset (destreg, dst,
11469 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11470 if (TARGET_64BIT && (align <= 4 || count == 0))
11472 rtx label = ix86_expand_aligntest (countreg, 4);
11473 emit_insn (gen_strset (destreg, dst,
11474 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11475 emit_label (label);
11476 LABEL_NUSES (label) = 1;
11478 if (align > 2 && count != 0 && (count & 2))
11479 emit_insn (gen_strset (destreg, dst,
11480 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11481 if (align <= 2 || count == 0)
11483 rtx label = ix86_expand_aligntest (countreg, 2);
11484 emit_insn (gen_strset (destreg, dst,
11485 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11486 emit_label (label);
11487 LABEL_NUSES (label) = 1;
11489 if (align > 1 && count != 0 && (count & 1))
11490 emit_insn (gen_strset (destreg, dst,
11491 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11492 if (align <= 1 || count == 0)
11494 rtx label = ix86_expand_aligntest (countreg, 1);
11495 emit_insn (gen_strset (destreg, dst,
11496 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11497 emit_label (label);
11498 LABEL_NUSES (label) = 1;
11504 /* Expand strlen. */
11506 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11508 rtx addr, scratch1, scratch2, scratch3, scratch4;
11510 /* The generic case of strlen expander is long. Avoid it's
11511 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11513 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11514 && !TARGET_INLINE_ALL_STRINGOPS
11516 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11519 addr = force_reg (Pmode, XEXP (src, 0));
11520 scratch1 = gen_reg_rtx (Pmode);
11522 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11525 /* Well it seems that some optimizer does not combine a call like
11526 foo(strlen(bar), strlen(bar));
11527 when the move and the subtraction is done here. It does calculate
11528 the length just once when these instructions are done inside of
11529 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11530 often used and I use one fewer register for the lifetime of
11531 output_strlen_unroll() this is better. */
11533 emit_move_insn (out, addr);
11535 ix86_expand_strlensi_unroll_1 (out, src, align);
11537 /* strlensi_unroll_1 returns the address of the zero at the end of
11538 the string, like memchr(), so compute the length by subtracting
11539 the start address. */
11541 emit_insn (gen_subdi3 (out, out, addr));
11543 emit_insn (gen_subsi3 (out, out, addr));
11548 scratch2 = gen_reg_rtx (Pmode);
11549 scratch3 = gen_reg_rtx (Pmode);
11550 scratch4 = force_reg (Pmode, constm1_rtx);
11552 emit_move_insn (scratch3, addr);
11553 eoschar = force_reg (QImode, eoschar);
11555 emit_insn (gen_cld ());
11556 src = replace_equiv_address_nv (src, scratch3);
11558 /* If .md starts supporting :P, this can be done in .md. */
11559 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11560 scratch4), UNSPEC_SCAS);
11561 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11564 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11565 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11569 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11570 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11576 /* Expand the appropriate insns for doing strlen if not just doing
11579 out = result, initialized with the start address
11580 align_rtx = alignment of the address.
11581 scratch = scratch register, initialized with the startaddress when
11582 not aligned, otherwise undefined
11584 This is just the body. It needs the initializations mentioned above and
11585 some address computing at the end. These things are done in i386.md. */
11588 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11592 rtx align_2_label = NULL_RTX;
11593 rtx align_3_label = NULL_RTX;
11594 rtx align_4_label = gen_label_rtx ();
11595 rtx end_0_label = gen_label_rtx ();
11597 rtx tmpreg = gen_reg_rtx (SImode);
11598 rtx scratch = gen_reg_rtx (SImode);
11602 if (GET_CODE (align_rtx) == CONST_INT)
11603 align = INTVAL (align_rtx);
11605 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11607 /* Is there a known alignment and is it less than 4? */
11610 rtx scratch1 = gen_reg_rtx (Pmode);
11611 emit_move_insn (scratch1, out);
11612 /* Is there a known alignment and is it not 2? */
11615 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11616 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11618 /* Leave just the 3 lower bits. */
11619 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11620 NULL_RTX, 0, OPTAB_WIDEN);
11622 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11623 Pmode, 1, align_4_label);
11624 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11625 Pmode, 1, align_2_label);
11626 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11627 Pmode, 1, align_3_label);
11631 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11632 check if is aligned to 4 - byte. */
11634 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11635 NULL_RTX, 0, OPTAB_WIDEN);
11637 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11638 Pmode, 1, align_4_label);
11641 mem = change_address (src, QImode, out);
11643 /* Now compare the bytes. */
11645 /* Compare the first n unaligned byte on a byte per byte basis. */
11646 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11647 QImode, 1, end_0_label);
11649 /* Increment the address. */
11651 emit_insn (gen_adddi3 (out, out, const1_rtx));
11653 emit_insn (gen_addsi3 (out, out, const1_rtx));
11655 /* Not needed with an alignment of 2 */
11658 emit_label (align_2_label);
11660 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11664 emit_insn (gen_adddi3 (out, out, const1_rtx));
11666 emit_insn (gen_addsi3 (out, out, const1_rtx));
11668 emit_label (align_3_label);
11671 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11675 emit_insn (gen_adddi3 (out, out, const1_rtx));
11677 emit_insn (gen_addsi3 (out, out, const1_rtx));
11680 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11681 align this loop. It gives only huge programs, but does not help to
11683 emit_label (align_4_label);
11685 mem = change_address (src, SImode, out);
11686 emit_move_insn (scratch, mem);
11688 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11690 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11692 /* This formula yields a nonzero result iff one of the bytes is zero.
11693 This saves three branches inside loop and many cycles. */
11695 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11696 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11697 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11698 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11699 gen_int_mode (0x80808080, SImode)));
11700 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11705 rtx reg = gen_reg_rtx (SImode);
11706 rtx reg2 = gen_reg_rtx (Pmode);
11707 emit_move_insn (reg, tmpreg);
11708 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11710 /* If zero is not in the first two bytes, move two bytes forward. */
11711 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11712 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11713 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11714 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11715 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11718 /* Emit lea manually to avoid clobbering of flags. */
11719 emit_insn (gen_rtx_SET (SImode, reg2,
11720 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11722 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11723 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11724 emit_insn (gen_rtx_SET (VOIDmode, out,
11725 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11732 rtx end_2_label = gen_label_rtx ();
11733 /* Is zero in the first two bytes? */
11735 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11736 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11737 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11738 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11739 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11741 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11742 JUMP_LABEL (tmp) = end_2_label;
11744 /* Not in the first two. Move two bytes forward. */
11745 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11747 emit_insn (gen_adddi3 (out, out, const2_rtx));
11749 emit_insn (gen_addsi3 (out, out, const2_rtx));
11751 emit_label (end_2_label);
11755 /* Avoid branch in fixing the byte. */
11756 tmpreg = gen_lowpart (QImode, tmpreg);
11757 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11758 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11760 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11762 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11764 emit_label (end_0_label);
11768 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11769 rtx callarg2 ATTRIBUTE_UNUSED,
11770 rtx pop, int sibcall)
11772 rtx use = NULL, call;
11774 if (pop == const0_rtx)
11776 if (TARGET_64BIT && pop)
11780 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11781 fnaddr = machopic_indirect_call_target (fnaddr);
11783 /* Static functions and indirect calls don't need the pic register. */
11784 if (! TARGET_64BIT && flag_pic
11785 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11786 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11787 use_reg (&use, pic_offset_table_rtx);
11789 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11791 rtx al = gen_rtx_REG (QImode, 0);
11792 emit_move_insn (al, callarg2);
11793 use_reg (&use, al);
11795 #endif /* TARGET_MACHO */
11797 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11799 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11800 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11802 if (sibcall && TARGET_64BIT
11803 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11806 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11807 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11808 emit_move_insn (fnaddr, addr);
11809 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11812 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11814 call = gen_rtx_SET (VOIDmode, retval, call);
11817 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11818 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11819 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11822 call = emit_call_insn (call);
11824 CALL_INSN_FUNCTION_USAGE (call) = use;
11828 /* Clear stack slot assignments remembered from previous functions.
11829 This is called from INIT_EXPANDERS once before RTL is emitted for each
11832 static struct machine_function *
11833 ix86_init_machine_status (void)
11835 struct machine_function *f;
11837 f = ggc_alloc_cleared (sizeof (struct machine_function));
11838 f->use_fast_prologue_epilogue_nregs = -1;
11843 /* Return a MEM corresponding to a stack slot with mode MODE.
11844 Allocate a new slot if necessary.
11846 The RTL for a function can have several slots available: N is
11847 which slot to use. */
11850 assign_386_stack_local (enum machine_mode mode, int n)
11852 struct stack_local_entry *s;
11854 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11857 for (s = ix86_stack_locals; s; s = s->next)
11858 if (s->mode == mode && s->n == n)
11861 s = (struct stack_local_entry *)
11862 ggc_alloc (sizeof (struct stack_local_entry));
11865 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11867 s->next = ix86_stack_locals;
11868 ix86_stack_locals = s;
11872 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11874 static GTY(()) rtx ix86_tls_symbol;
11876 ix86_tls_get_addr (void)
11879 if (!ix86_tls_symbol)
11881 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11882 (TARGET_GNU_TLS && !TARGET_64BIT)
11883 ? "___tls_get_addr"
11884 : "__tls_get_addr");
11887 return ix86_tls_symbol;
11890 /* Calculate the length of the memory address in the instruction
11891 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11894 memory_address_length (rtx addr)
11896 struct ix86_address parts;
11897 rtx base, index, disp;
11900 if (GET_CODE (addr) == PRE_DEC
11901 || GET_CODE (addr) == POST_INC
11902 || GET_CODE (addr) == PRE_MODIFY
11903 || GET_CODE (addr) == POST_MODIFY)
11906 if (! ix86_decompose_address (addr, &parts))
11909 if (parts.base && GET_CODE (parts.base) == SUBREG)
11910 parts.base = SUBREG_REG (parts.base);
11911 if (parts.index && GET_CODE (parts.index) == SUBREG)
11912 parts.index = SUBREG_REG (parts.index);
11915 index = parts.index;
11920 - esp as the base always wants an index,
11921 - ebp as the base always wants a displacement. */
11923 /* Register Indirect. */
11924 if (base && !index && !disp)
11926 /* esp (for its index) and ebp (for its displacement) need
11927 the two-byte modrm form. */
11928 if (addr == stack_pointer_rtx
11929 || addr == arg_pointer_rtx
11930 || addr == frame_pointer_rtx
11931 || addr == hard_frame_pointer_rtx)
11935 /* Direct Addressing. */
11936 else if (disp && !base && !index)
11941 /* Find the length of the displacement constant. */
11944 if (GET_CODE (disp) == CONST_INT
11945 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11951 /* ebp always wants a displacement. */
11952 else if (base == hard_frame_pointer_rtx)
11955 /* An index requires the two-byte modrm form.... */
11957 /* ...like esp, which always wants an index. */
11958 || base == stack_pointer_rtx
11959 || base == arg_pointer_rtx
11960 || base == frame_pointer_rtx)
11967 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11968 is set, expect that insn have 8bit immediate alternative. */
11970 ix86_attr_length_immediate_default (rtx insn, int shortform)
11974 extract_insn_cached (insn);
11975 for (i = recog_data.n_operands - 1; i >= 0; --i)
11976 if (CONSTANT_P (recog_data.operand[i]))
11981 && GET_CODE (recog_data.operand[i]) == CONST_INT
11982 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11986 switch (get_attr_mode (insn))
11997 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12002 fatal_insn ("unknown insn mode", insn);
12008 /* Compute default value for "length_address" attribute. */
12010 ix86_attr_length_address_default (rtx insn)
12014 if (get_attr_type (insn) == TYPE_LEA)
12016 rtx set = PATTERN (insn);
12017 if (GET_CODE (set) == SET)
12019 else if (GET_CODE (set) == PARALLEL
12020 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12021 set = XVECEXP (set, 0, 0);
12024 #ifdef ENABLE_CHECKING
12030 return memory_address_length (SET_SRC (set));
12033 extract_insn_cached (insn);
12034 for (i = recog_data.n_operands - 1; i >= 0; --i)
12035 if (GET_CODE (recog_data.operand[i]) == MEM)
12037 return memory_address_length (XEXP (recog_data.operand[i], 0));
12043 /* Return the maximum number of instructions a cpu can issue. */
12046 ix86_issue_rate (void)
12050 case PROCESSOR_PENTIUM:
12054 case PROCESSOR_PENTIUMPRO:
12055 case PROCESSOR_PENTIUM4:
12056 case PROCESSOR_ATHLON:
12058 case PROCESSOR_NOCONA:
12066 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12067 by DEP_INSN and nothing set by DEP_INSN. */
12070 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12074 /* Simplify the test for uninteresting insns. */
12075 if (insn_type != TYPE_SETCC
12076 && insn_type != TYPE_ICMOV
12077 && insn_type != TYPE_FCMOV
12078 && insn_type != TYPE_IBR)
12081 if ((set = single_set (dep_insn)) != 0)
12083 set = SET_DEST (set);
12086 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12087 && XVECLEN (PATTERN (dep_insn), 0) == 2
12088 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12089 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12091 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12092 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12097 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12100 /* This test is true if the dependent insn reads the flags but
12101 not any other potentially set register. */
12102 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12105 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12111 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12112 address with operands set by DEP_INSN. */
12115 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12119 if (insn_type == TYPE_LEA
12122 addr = PATTERN (insn);
12123 if (GET_CODE (addr) == SET)
12125 else if (GET_CODE (addr) == PARALLEL
12126 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12127 addr = XVECEXP (addr, 0, 0);
12130 addr = SET_SRC (addr);
12135 extract_insn_cached (insn);
12136 for (i = recog_data.n_operands - 1; i >= 0; --i)
12137 if (GET_CODE (recog_data.operand[i]) == MEM)
12139 addr = XEXP (recog_data.operand[i], 0);
12146 return modified_in_p (addr, dep_insn);
12150 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12152 enum attr_type insn_type, dep_insn_type;
12153 enum attr_memory memory;
12155 int dep_insn_code_number;
12157 /* Anti and output dependencies have zero cost on all CPUs. */
12158 if (REG_NOTE_KIND (link) != 0)
12161 dep_insn_code_number = recog_memoized (dep_insn);
12163 /* If we can't recognize the insns, we can't really do anything. */
12164 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12167 insn_type = get_attr_type (insn);
12168 dep_insn_type = get_attr_type (dep_insn);
12172 case PROCESSOR_PENTIUM:
12173 /* Address Generation Interlock adds a cycle of latency. */
12174 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12177 /* ??? Compares pair with jump/setcc. */
12178 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12181 /* Floating point stores require value to be ready one cycle earlier. */
12182 if (insn_type == TYPE_FMOV
12183 && get_attr_memory (insn) == MEMORY_STORE
12184 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12188 case PROCESSOR_PENTIUMPRO:
12189 memory = get_attr_memory (insn);
12191 /* INT->FP conversion is expensive. */
12192 if (get_attr_fp_int_src (dep_insn))
12195 /* There is one cycle extra latency between an FP op and a store. */
12196 if (insn_type == TYPE_FMOV
12197 && (set = single_set (dep_insn)) != NULL_RTX
12198 && (set2 = single_set (insn)) != NULL_RTX
12199 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12200 && GET_CODE (SET_DEST (set2)) == MEM)
12203 /* Show ability of reorder buffer to hide latency of load by executing
12204 in parallel with previous instruction in case
12205 previous instruction is not needed to compute the address. */
12206 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12207 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12209 /* Claim moves to take one cycle, as core can issue one load
12210 at time and the next load can start cycle later. */
12211 if (dep_insn_type == TYPE_IMOV
12212 || dep_insn_type == TYPE_FMOV)
12220 memory = get_attr_memory (insn);
12222 /* The esp dependency is resolved before the instruction is really
12224 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12225 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12228 /* INT->FP conversion is expensive. */
12229 if (get_attr_fp_int_src (dep_insn))
12232 /* Show ability of reorder buffer to hide latency of load by executing
12233 in parallel with previous instruction in case
12234 previous instruction is not needed to compute the address. */
12235 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12236 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12238 /* Claim moves to take one cycle, as core can issue one load
12239 at time and the next load can start cycle later. */
12240 if (dep_insn_type == TYPE_IMOV
12241 || dep_insn_type == TYPE_FMOV)
12250 case PROCESSOR_ATHLON:
12252 memory = get_attr_memory (insn);
12254 /* Show ability of reorder buffer to hide latency of load by executing
12255 in parallel with previous instruction in case
12256 previous instruction is not needed to compute the address. */
12257 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12258 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12260 enum attr_unit unit = get_attr_unit (insn);
12263 /* Because of the difference between the length of integer and
12264 floating unit pipeline preparation stages, the memory operands
12265 for floating point are cheaper.
12267 ??? For Athlon it the difference is most probably 2. */
12268 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12271 loadcost = TARGET_ATHLON ? 2 : 0;
12273 if (cost >= loadcost)
12286 /* How many alternative schedules to try. This should be as wide as the
12287 scheduling freedom in the DFA, but no wider. Making this value too
12288 large results extra work for the scheduler. */
12291 ia32_multipass_dfa_lookahead (void)
12293 if (ix86_tune == PROCESSOR_PENTIUM)
12296 if (ix86_tune == PROCESSOR_PENTIUMPRO
12297 || ix86_tune == PROCESSOR_K6)
12305 /* Compute the alignment given to a constant that is being placed in memory.
12306 EXP is the constant and ALIGN is the alignment that the object would
12308 The value of this function is used instead of that alignment to align
12312 ix86_constant_alignment (tree exp, int align)
12314 if (TREE_CODE (exp) == REAL_CST)
12316 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12318 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12321 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12322 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12323 return BITS_PER_WORD;
12328 /* Compute the alignment for a static variable.
12329 TYPE is the data type, and ALIGN is the alignment that
12330 the object would ordinarily have. The value of this function is used
12331 instead of that alignment to align the object. */
12334 ix86_data_alignment (tree type, int align)
12336 if (AGGREGATE_TYPE_P (type)
12337 && TYPE_SIZE (type)
12338 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12339 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12340 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12343 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12344 to 16byte boundary. */
12347 if (AGGREGATE_TYPE_P (type)
12348 && TYPE_SIZE (type)
12349 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12350 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12351 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12355 if (TREE_CODE (type) == ARRAY_TYPE)
12357 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12359 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12362 else if (TREE_CODE (type) == COMPLEX_TYPE)
12365 if (TYPE_MODE (type) == DCmode && align < 64)
12367 if (TYPE_MODE (type) == XCmode && align < 128)
12370 else if ((TREE_CODE (type) == RECORD_TYPE
12371 || TREE_CODE (type) == UNION_TYPE
12372 || TREE_CODE (type) == QUAL_UNION_TYPE)
12373 && TYPE_FIELDS (type))
12375 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12377 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12380 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12381 || TREE_CODE (type) == INTEGER_TYPE)
12383 if (TYPE_MODE (type) == DFmode && align < 64)
12385 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12392 /* Compute the alignment for a local variable.
12393 TYPE is the data type, and ALIGN is the alignment that
12394 the object would ordinarily have. The value of this macro is used
12395 instead of that alignment to align the object. */
12398 ix86_local_alignment (tree type, int align)
12400 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12401 to 16byte boundary. */
12404 if (AGGREGATE_TYPE_P (type)
12405 && TYPE_SIZE (type)
12406 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12407 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12408 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12411 if (TREE_CODE (type) == ARRAY_TYPE)
12413 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12415 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12418 else if (TREE_CODE (type) == COMPLEX_TYPE)
12420 if (TYPE_MODE (type) == DCmode && align < 64)
12422 if (TYPE_MODE (type) == XCmode && align < 128)
12425 else if ((TREE_CODE (type) == RECORD_TYPE
12426 || TREE_CODE (type) == UNION_TYPE
12427 || TREE_CODE (type) == QUAL_UNION_TYPE)
12428 && TYPE_FIELDS (type))
12430 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12432 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12435 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12436 || TREE_CODE (type) == INTEGER_TYPE)
12439 if (TYPE_MODE (type) == DFmode && align < 64)
12441 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12447 /* Emit RTL insns to initialize the variable parts of a trampoline.
12448 FNADDR is an RTX for the address of the function's pure code.
12449 CXT is an RTX for the static chain value for the function. */
12451 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12455 /* Compute offset from the end of the jmp to the target function. */
12456 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12457 plus_constant (tramp, 10),
12458 NULL_RTX, 1, OPTAB_DIRECT);
12459 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12460 gen_int_mode (0xb9, QImode));
12461 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12462 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12463 gen_int_mode (0xe9, QImode));
12464 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12469 /* Try to load address using shorter movl instead of movabs.
12470 We may want to support movq for kernel mode, but kernel does not use
12471 trampolines at the moment. */
12472 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12474 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12475 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12476 gen_int_mode (0xbb41, HImode));
12477 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12478 gen_lowpart (SImode, fnaddr));
12483 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12484 gen_int_mode (0xbb49, HImode));
12485 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12489 /* Load static chain using movabs to r10. */
12490 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12491 gen_int_mode (0xba49, HImode));
12492 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12495 /* Jump to the r11 */
12496 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12497 gen_int_mode (0xff49, HImode));
12498 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12499 gen_int_mode (0xe3, QImode));
12501 if (offset > TRAMPOLINE_SIZE)
12505 #ifdef ENABLE_EXECUTE_STACK
12506 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12507 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12511 /* Codes for all the SSE/MMX builtins. */
12514 IX86_BUILTIN_ADDPS,
12515 IX86_BUILTIN_ADDSS,
12516 IX86_BUILTIN_DIVPS,
12517 IX86_BUILTIN_DIVSS,
12518 IX86_BUILTIN_MULPS,
12519 IX86_BUILTIN_MULSS,
12520 IX86_BUILTIN_SUBPS,
12521 IX86_BUILTIN_SUBSS,
12523 IX86_BUILTIN_CMPEQPS,
12524 IX86_BUILTIN_CMPLTPS,
12525 IX86_BUILTIN_CMPLEPS,
12526 IX86_BUILTIN_CMPGTPS,
12527 IX86_BUILTIN_CMPGEPS,
12528 IX86_BUILTIN_CMPNEQPS,
12529 IX86_BUILTIN_CMPNLTPS,
12530 IX86_BUILTIN_CMPNLEPS,
12531 IX86_BUILTIN_CMPNGTPS,
12532 IX86_BUILTIN_CMPNGEPS,
12533 IX86_BUILTIN_CMPORDPS,
12534 IX86_BUILTIN_CMPUNORDPS,
12535 IX86_BUILTIN_CMPNEPS,
12536 IX86_BUILTIN_CMPEQSS,
12537 IX86_BUILTIN_CMPLTSS,
12538 IX86_BUILTIN_CMPLESS,
12539 IX86_BUILTIN_CMPNEQSS,
12540 IX86_BUILTIN_CMPNLTSS,
12541 IX86_BUILTIN_CMPNLESS,
12542 IX86_BUILTIN_CMPNGTSS,
12543 IX86_BUILTIN_CMPNGESS,
12544 IX86_BUILTIN_CMPORDSS,
12545 IX86_BUILTIN_CMPUNORDSS,
12546 IX86_BUILTIN_CMPNESS,
12548 IX86_BUILTIN_COMIEQSS,
12549 IX86_BUILTIN_COMILTSS,
12550 IX86_BUILTIN_COMILESS,
12551 IX86_BUILTIN_COMIGTSS,
12552 IX86_BUILTIN_COMIGESS,
12553 IX86_BUILTIN_COMINEQSS,
12554 IX86_BUILTIN_UCOMIEQSS,
12555 IX86_BUILTIN_UCOMILTSS,
12556 IX86_BUILTIN_UCOMILESS,
12557 IX86_BUILTIN_UCOMIGTSS,
12558 IX86_BUILTIN_UCOMIGESS,
12559 IX86_BUILTIN_UCOMINEQSS,
12561 IX86_BUILTIN_CVTPI2PS,
12562 IX86_BUILTIN_CVTPS2PI,
12563 IX86_BUILTIN_CVTSI2SS,
12564 IX86_BUILTIN_CVTSI642SS,
12565 IX86_BUILTIN_CVTSS2SI,
12566 IX86_BUILTIN_CVTSS2SI64,
12567 IX86_BUILTIN_CVTTPS2PI,
12568 IX86_BUILTIN_CVTTSS2SI,
12569 IX86_BUILTIN_CVTTSS2SI64,
12571 IX86_BUILTIN_MAXPS,
12572 IX86_BUILTIN_MAXSS,
12573 IX86_BUILTIN_MINPS,
12574 IX86_BUILTIN_MINSS,
12576 IX86_BUILTIN_LOADUPS,
12577 IX86_BUILTIN_STOREUPS,
12578 IX86_BUILTIN_MOVSS,
12580 IX86_BUILTIN_MOVHLPS,
12581 IX86_BUILTIN_MOVLHPS,
12582 IX86_BUILTIN_LOADHPS,
12583 IX86_BUILTIN_LOADLPS,
12584 IX86_BUILTIN_STOREHPS,
12585 IX86_BUILTIN_STORELPS,
12587 IX86_BUILTIN_MASKMOVQ,
12588 IX86_BUILTIN_MOVMSKPS,
12589 IX86_BUILTIN_PMOVMSKB,
12591 IX86_BUILTIN_MOVNTPS,
12592 IX86_BUILTIN_MOVNTQ,
12594 IX86_BUILTIN_LOADDQU,
12595 IX86_BUILTIN_STOREDQU,
12597 IX86_BUILTIN_PACKSSWB,
12598 IX86_BUILTIN_PACKSSDW,
12599 IX86_BUILTIN_PACKUSWB,
12601 IX86_BUILTIN_PADDB,
12602 IX86_BUILTIN_PADDW,
12603 IX86_BUILTIN_PADDD,
12604 IX86_BUILTIN_PADDQ,
12605 IX86_BUILTIN_PADDSB,
12606 IX86_BUILTIN_PADDSW,
12607 IX86_BUILTIN_PADDUSB,
12608 IX86_BUILTIN_PADDUSW,
12609 IX86_BUILTIN_PSUBB,
12610 IX86_BUILTIN_PSUBW,
12611 IX86_BUILTIN_PSUBD,
12612 IX86_BUILTIN_PSUBQ,
12613 IX86_BUILTIN_PSUBSB,
12614 IX86_BUILTIN_PSUBSW,
12615 IX86_BUILTIN_PSUBUSB,
12616 IX86_BUILTIN_PSUBUSW,
12619 IX86_BUILTIN_PANDN,
12623 IX86_BUILTIN_PAVGB,
12624 IX86_BUILTIN_PAVGW,
12626 IX86_BUILTIN_PCMPEQB,
12627 IX86_BUILTIN_PCMPEQW,
12628 IX86_BUILTIN_PCMPEQD,
12629 IX86_BUILTIN_PCMPGTB,
12630 IX86_BUILTIN_PCMPGTW,
12631 IX86_BUILTIN_PCMPGTD,
12633 IX86_BUILTIN_PMADDWD,
12635 IX86_BUILTIN_PMAXSW,
12636 IX86_BUILTIN_PMAXUB,
12637 IX86_BUILTIN_PMINSW,
12638 IX86_BUILTIN_PMINUB,
12640 IX86_BUILTIN_PMULHUW,
12641 IX86_BUILTIN_PMULHW,
12642 IX86_BUILTIN_PMULLW,
12644 IX86_BUILTIN_PSADBW,
12645 IX86_BUILTIN_PSHUFW,
12647 IX86_BUILTIN_PSLLW,
12648 IX86_BUILTIN_PSLLD,
12649 IX86_BUILTIN_PSLLQ,
12650 IX86_BUILTIN_PSRAW,
12651 IX86_BUILTIN_PSRAD,
12652 IX86_BUILTIN_PSRLW,
12653 IX86_BUILTIN_PSRLD,
12654 IX86_BUILTIN_PSRLQ,
12655 IX86_BUILTIN_PSLLWI,
12656 IX86_BUILTIN_PSLLDI,
12657 IX86_BUILTIN_PSLLQI,
12658 IX86_BUILTIN_PSRAWI,
12659 IX86_BUILTIN_PSRADI,
12660 IX86_BUILTIN_PSRLWI,
12661 IX86_BUILTIN_PSRLDI,
12662 IX86_BUILTIN_PSRLQI,
12664 IX86_BUILTIN_PUNPCKHBW,
12665 IX86_BUILTIN_PUNPCKHWD,
12666 IX86_BUILTIN_PUNPCKHDQ,
12667 IX86_BUILTIN_PUNPCKLBW,
12668 IX86_BUILTIN_PUNPCKLWD,
12669 IX86_BUILTIN_PUNPCKLDQ,
12671 IX86_BUILTIN_SHUFPS,
12673 IX86_BUILTIN_RCPPS,
12674 IX86_BUILTIN_RCPSS,
12675 IX86_BUILTIN_RSQRTPS,
12676 IX86_BUILTIN_RSQRTSS,
12677 IX86_BUILTIN_SQRTPS,
12678 IX86_BUILTIN_SQRTSS,
12680 IX86_BUILTIN_UNPCKHPS,
12681 IX86_BUILTIN_UNPCKLPS,
12683 IX86_BUILTIN_ANDPS,
12684 IX86_BUILTIN_ANDNPS,
12686 IX86_BUILTIN_XORPS,
12689 IX86_BUILTIN_LDMXCSR,
12690 IX86_BUILTIN_STMXCSR,
12691 IX86_BUILTIN_SFENCE,
12693 /* 3DNow! Original */
12694 IX86_BUILTIN_FEMMS,
12695 IX86_BUILTIN_PAVGUSB,
12696 IX86_BUILTIN_PF2ID,
12697 IX86_BUILTIN_PFACC,
12698 IX86_BUILTIN_PFADD,
12699 IX86_BUILTIN_PFCMPEQ,
12700 IX86_BUILTIN_PFCMPGE,
12701 IX86_BUILTIN_PFCMPGT,
12702 IX86_BUILTIN_PFMAX,
12703 IX86_BUILTIN_PFMIN,
12704 IX86_BUILTIN_PFMUL,
12705 IX86_BUILTIN_PFRCP,
12706 IX86_BUILTIN_PFRCPIT1,
12707 IX86_BUILTIN_PFRCPIT2,
12708 IX86_BUILTIN_PFRSQIT1,
12709 IX86_BUILTIN_PFRSQRT,
12710 IX86_BUILTIN_PFSUB,
12711 IX86_BUILTIN_PFSUBR,
12712 IX86_BUILTIN_PI2FD,
12713 IX86_BUILTIN_PMULHRW,
12715 /* 3DNow! Athlon Extensions */
12716 IX86_BUILTIN_PF2IW,
12717 IX86_BUILTIN_PFNACC,
12718 IX86_BUILTIN_PFPNACC,
12719 IX86_BUILTIN_PI2FW,
12720 IX86_BUILTIN_PSWAPDSI,
12721 IX86_BUILTIN_PSWAPDSF,
12724 IX86_BUILTIN_ADDPD,
12725 IX86_BUILTIN_ADDSD,
12726 IX86_BUILTIN_DIVPD,
12727 IX86_BUILTIN_DIVSD,
12728 IX86_BUILTIN_MULPD,
12729 IX86_BUILTIN_MULSD,
12730 IX86_BUILTIN_SUBPD,
12731 IX86_BUILTIN_SUBSD,
12733 IX86_BUILTIN_CMPEQPD,
12734 IX86_BUILTIN_CMPLTPD,
12735 IX86_BUILTIN_CMPLEPD,
12736 IX86_BUILTIN_CMPGTPD,
12737 IX86_BUILTIN_CMPGEPD,
12738 IX86_BUILTIN_CMPNEQPD,
12739 IX86_BUILTIN_CMPNLTPD,
12740 IX86_BUILTIN_CMPNLEPD,
12741 IX86_BUILTIN_CMPNGTPD,
12742 IX86_BUILTIN_CMPNGEPD,
12743 IX86_BUILTIN_CMPORDPD,
12744 IX86_BUILTIN_CMPUNORDPD,
12745 IX86_BUILTIN_CMPNEPD,
12746 IX86_BUILTIN_CMPEQSD,
12747 IX86_BUILTIN_CMPLTSD,
12748 IX86_BUILTIN_CMPLESD,
12749 IX86_BUILTIN_CMPNEQSD,
12750 IX86_BUILTIN_CMPNLTSD,
12751 IX86_BUILTIN_CMPNLESD,
12752 IX86_BUILTIN_CMPORDSD,
12753 IX86_BUILTIN_CMPUNORDSD,
12754 IX86_BUILTIN_CMPNESD,
12756 IX86_BUILTIN_COMIEQSD,
12757 IX86_BUILTIN_COMILTSD,
12758 IX86_BUILTIN_COMILESD,
12759 IX86_BUILTIN_COMIGTSD,
12760 IX86_BUILTIN_COMIGESD,
12761 IX86_BUILTIN_COMINEQSD,
12762 IX86_BUILTIN_UCOMIEQSD,
12763 IX86_BUILTIN_UCOMILTSD,
12764 IX86_BUILTIN_UCOMILESD,
12765 IX86_BUILTIN_UCOMIGTSD,
12766 IX86_BUILTIN_UCOMIGESD,
12767 IX86_BUILTIN_UCOMINEQSD,
12769 IX86_BUILTIN_MAXPD,
12770 IX86_BUILTIN_MAXSD,
12771 IX86_BUILTIN_MINPD,
12772 IX86_BUILTIN_MINSD,
12774 IX86_BUILTIN_ANDPD,
12775 IX86_BUILTIN_ANDNPD,
12777 IX86_BUILTIN_XORPD,
12779 IX86_BUILTIN_SQRTPD,
12780 IX86_BUILTIN_SQRTSD,
12782 IX86_BUILTIN_UNPCKHPD,
12783 IX86_BUILTIN_UNPCKLPD,
12785 IX86_BUILTIN_SHUFPD,
12787 IX86_BUILTIN_LOADUPD,
12788 IX86_BUILTIN_STOREUPD,
12789 IX86_BUILTIN_MOVSD,
12791 IX86_BUILTIN_LOADHPD,
12792 IX86_BUILTIN_LOADLPD,
12794 IX86_BUILTIN_CVTDQ2PD,
12795 IX86_BUILTIN_CVTDQ2PS,
12797 IX86_BUILTIN_CVTPD2DQ,
12798 IX86_BUILTIN_CVTPD2PI,
12799 IX86_BUILTIN_CVTPD2PS,
12800 IX86_BUILTIN_CVTTPD2DQ,
12801 IX86_BUILTIN_CVTTPD2PI,
12803 IX86_BUILTIN_CVTPI2PD,
12804 IX86_BUILTIN_CVTSI2SD,
12805 IX86_BUILTIN_CVTSI642SD,
12807 IX86_BUILTIN_CVTSD2SI,
12808 IX86_BUILTIN_CVTSD2SI64,
12809 IX86_BUILTIN_CVTSD2SS,
12810 IX86_BUILTIN_CVTSS2SD,
12811 IX86_BUILTIN_CVTTSD2SI,
12812 IX86_BUILTIN_CVTTSD2SI64,
12814 IX86_BUILTIN_CVTPS2DQ,
12815 IX86_BUILTIN_CVTPS2PD,
12816 IX86_BUILTIN_CVTTPS2DQ,
12818 IX86_BUILTIN_MOVNTI,
12819 IX86_BUILTIN_MOVNTPD,
12820 IX86_BUILTIN_MOVNTDQ,
12823 IX86_BUILTIN_MASKMOVDQU,
12824 IX86_BUILTIN_MOVMSKPD,
12825 IX86_BUILTIN_PMOVMSKB128,
12827 IX86_BUILTIN_PACKSSWB128,
12828 IX86_BUILTIN_PACKSSDW128,
12829 IX86_BUILTIN_PACKUSWB128,
12831 IX86_BUILTIN_PADDB128,
12832 IX86_BUILTIN_PADDW128,
12833 IX86_BUILTIN_PADDD128,
12834 IX86_BUILTIN_PADDQ128,
12835 IX86_BUILTIN_PADDSB128,
12836 IX86_BUILTIN_PADDSW128,
12837 IX86_BUILTIN_PADDUSB128,
12838 IX86_BUILTIN_PADDUSW128,
12839 IX86_BUILTIN_PSUBB128,
12840 IX86_BUILTIN_PSUBW128,
12841 IX86_BUILTIN_PSUBD128,
12842 IX86_BUILTIN_PSUBQ128,
12843 IX86_BUILTIN_PSUBSB128,
12844 IX86_BUILTIN_PSUBSW128,
12845 IX86_BUILTIN_PSUBUSB128,
12846 IX86_BUILTIN_PSUBUSW128,
12848 IX86_BUILTIN_PAND128,
12849 IX86_BUILTIN_PANDN128,
12850 IX86_BUILTIN_POR128,
12851 IX86_BUILTIN_PXOR128,
12853 IX86_BUILTIN_PAVGB128,
12854 IX86_BUILTIN_PAVGW128,
12856 IX86_BUILTIN_PCMPEQB128,
12857 IX86_BUILTIN_PCMPEQW128,
12858 IX86_BUILTIN_PCMPEQD128,
12859 IX86_BUILTIN_PCMPGTB128,
12860 IX86_BUILTIN_PCMPGTW128,
12861 IX86_BUILTIN_PCMPGTD128,
12863 IX86_BUILTIN_PMADDWD128,
12865 IX86_BUILTIN_PMAXSW128,
12866 IX86_BUILTIN_PMAXUB128,
12867 IX86_BUILTIN_PMINSW128,
12868 IX86_BUILTIN_PMINUB128,
12870 IX86_BUILTIN_PMULUDQ,
12871 IX86_BUILTIN_PMULUDQ128,
12872 IX86_BUILTIN_PMULHUW128,
12873 IX86_BUILTIN_PMULHW128,
12874 IX86_BUILTIN_PMULLW128,
12876 IX86_BUILTIN_PSADBW128,
12877 IX86_BUILTIN_PSHUFHW,
12878 IX86_BUILTIN_PSHUFLW,
12879 IX86_BUILTIN_PSHUFD,
12881 IX86_BUILTIN_PSLLW128,
12882 IX86_BUILTIN_PSLLD128,
12883 IX86_BUILTIN_PSLLQ128,
12884 IX86_BUILTIN_PSRAW128,
12885 IX86_BUILTIN_PSRAD128,
12886 IX86_BUILTIN_PSRLW128,
12887 IX86_BUILTIN_PSRLD128,
12888 IX86_BUILTIN_PSRLQ128,
12889 IX86_BUILTIN_PSLLDQI128,
12890 IX86_BUILTIN_PSLLWI128,
12891 IX86_BUILTIN_PSLLDI128,
12892 IX86_BUILTIN_PSLLQI128,
12893 IX86_BUILTIN_PSRAWI128,
12894 IX86_BUILTIN_PSRADI128,
12895 IX86_BUILTIN_PSRLDQI128,
12896 IX86_BUILTIN_PSRLWI128,
12897 IX86_BUILTIN_PSRLDI128,
12898 IX86_BUILTIN_PSRLQI128,
12900 IX86_BUILTIN_PUNPCKHBW128,
12901 IX86_BUILTIN_PUNPCKHWD128,
12902 IX86_BUILTIN_PUNPCKHDQ128,
12903 IX86_BUILTIN_PUNPCKHQDQ128,
12904 IX86_BUILTIN_PUNPCKLBW128,
12905 IX86_BUILTIN_PUNPCKLWD128,
12906 IX86_BUILTIN_PUNPCKLDQ128,
12907 IX86_BUILTIN_PUNPCKLQDQ128,
12909 IX86_BUILTIN_CLFLUSH,
12910 IX86_BUILTIN_MFENCE,
12911 IX86_BUILTIN_LFENCE,
12913 /* Prescott New Instructions. */
12914 IX86_BUILTIN_ADDSUBPS,
12915 IX86_BUILTIN_HADDPS,
12916 IX86_BUILTIN_HSUBPS,
12917 IX86_BUILTIN_MOVSHDUP,
12918 IX86_BUILTIN_MOVSLDUP,
12919 IX86_BUILTIN_ADDSUBPD,
12920 IX86_BUILTIN_HADDPD,
12921 IX86_BUILTIN_HSUBPD,
12922 IX86_BUILTIN_LDDQU,
12924 IX86_BUILTIN_MONITOR,
12925 IX86_BUILTIN_MWAIT,
12927 IX86_BUILTIN_VEC_INIT_V2SI,
12928 IX86_BUILTIN_VEC_INIT_V4HI,
12929 IX86_BUILTIN_VEC_INIT_V8QI,
12930 IX86_BUILTIN_VEC_EXT_V2DF,
12931 IX86_BUILTIN_VEC_EXT_V2DI,
12932 IX86_BUILTIN_VEC_EXT_V4SF,
12933 IX86_BUILTIN_VEC_EXT_V4SI,
12934 IX86_BUILTIN_VEC_EXT_V8HI,
12935 IX86_BUILTIN_VEC_EXT_V2SI,
12936 IX86_BUILTIN_VEC_EXT_V4HI,
12937 IX86_BUILTIN_VEC_SET_V8HI,
12938 IX86_BUILTIN_VEC_SET_V4HI,
12943 #define def_builtin(MASK, NAME, TYPE, CODE) \
12945 if ((MASK) & target_flags \
12946 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12947 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12948 NULL, NULL_TREE); \
12951 /* Bits for builtin_description.flag. */
12953 /* Set when we don't support the comparison natively, and should
12954 swap_comparison in order to support it. */
12955 #define BUILTIN_DESC_SWAP_OPERANDS 1
12957 struct builtin_description
12959 const unsigned int mask;
12960 const enum insn_code icode;
12961 const char *const name;
12962 const enum ix86_builtins code;
12963 const enum rtx_code comparison;
12964 const unsigned int flag;
12967 static const struct builtin_description bdesc_comi[] =
12969 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12970 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12971 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12972 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12973 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12974 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12975 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12976 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12977 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12978 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12979 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12980 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12982 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12983 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12986 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12989 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12990 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12991 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12992 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12995 static const struct builtin_description bdesc_2arg[] =
12998 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12999 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
13000 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
13001 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
13002 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
13003 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
13004 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
13005 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
13007 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
13008 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
13009 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
13010 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
13011 BUILTIN_DESC_SWAP_OPERANDS },
13012 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
13013 BUILTIN_DESC_SWAP_OPERANDS },
13014 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
13015 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
13016 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
13017 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
13018 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
13019 BUILTIN_DESC_SWAP_OPERANDS },
13020 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
13021 BUILTIN_DESC_SWAP_OPERANDS },
13022 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
13023 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
13024 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
13025 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
13026 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
13027 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
13028 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
13029 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
13030 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
13031 BUILTIN_DESC_SWAP_OPERANDS },
13032 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
13033 BUILTIN_DESC_SWAP_OPERANDS },
13034 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
13036 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
13037 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
13038 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
13039 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
13041 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
13042 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
13043 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
13044 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
13046 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
13047 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
13048 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
13049 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
13050 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13053 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13054 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13055 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13056 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13057 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13058 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13059 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13060 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13062 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13063 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13064 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13065 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13066 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13067 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13068 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13069 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13071 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13072 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13073 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13075 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13076 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13077 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13078 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13080 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13081 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13083 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13084 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13085 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13086 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13087 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13088 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13090 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13091 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13092 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13093 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13095 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13096 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13097 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13098 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13099 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13100 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13103 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13104 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13105 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13107 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13108 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13109 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13111 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13112 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13113 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13114 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13115 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13116 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13118 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13119 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13120 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13121 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13122 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13123 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13125 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13126 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13127 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13128 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13130 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13131 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13134 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13135 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13136 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13137 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13138 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13139 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13140 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13141 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13143 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13144 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13145 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13146 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13147 BUILTIN_DESC_SWAP_OPERANDS },
13148 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13149 BUILTIN_DESC_SWAP_OPERANDS },
13150 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13151 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13152 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13153 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13154 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13155 BUILTIN_DESC_SWAP_OPERANDS },
13156 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13157 BUILTIN_DESC_SWAP_OPERANDS },
13158 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13159 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13160 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13161 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13162 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13163 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13164 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13165 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13166 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13168 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13169 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13170 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13171 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13173 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13174 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13175 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13176 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13178 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13179 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13180 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13183 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13184 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13185 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13186 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13187 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13188 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13189 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13190 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13192 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13193 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13194 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13195 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13196 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13197 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13198 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13199 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13201 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13202 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13204 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13205 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13206 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13207 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13209 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13210 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13212 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13213 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13214 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13215 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13216 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13217 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13219 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13220 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13221 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13222 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13224 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13225 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13226 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13227 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13228 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13229 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13230 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13231 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13233 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13234 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13235 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13237 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13238 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13240 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13241 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13243 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13244 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13245 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13247 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13248 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13249 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13251 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13252 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13254 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13256 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13257 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13258 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13259 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13262 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13263 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13264 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13265 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13266 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13267 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13270 static const struct builtin_description bdesc_1arg[] =
13272 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13273 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13275 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13276 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13277 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13279 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13280 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13281 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13282 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13283 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13284 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13286 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13287 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13289 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13291 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13292 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13294 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13295 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13296 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13297 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13298 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13300 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13302 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13303 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13304 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13305 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13307 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13308 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13309 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13312 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13313 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13317 ix86_init_builtins (void)
13320 ix86_init_mmx_sse_builtins ();
13323 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13324 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13327 ix86_init_mmx_sse_builtins (void)
13329 const struct builtin_description * d;
13332 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13333 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13334 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13335 tree V2DI_type_node
13336 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13337 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13338 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13339 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13340 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13341 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13342 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13344 tree pchar_type_node = build_pointer_type (char_type_node);
13345 tree pcchar_type_node = build_pointer_type (
13346 build_type_variant (char_type_node, 1, 0));
13347 tree pfloat_type_node = build_pointer_type (float_type_node);
13348 tree pcfloat_type_node = build_pointer_type (
13349 build_type_variant (float_type_node, 1, 0));
13350 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13351 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13352 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13355 tree int_ftype_v4sf_v4sf
13356 = build_function_type_list (integer_type_node,
13357 V4SF_type_node, V4SF_type_node, NULL_TREE);
13358 tree v4si_ftype_v4sf_v4sf
13359 = build_function_type_list (V4SI_type_node,
13360 V4SF_type_node, V4SF_type_node, NULL_TREE);
13361 /* MMX/SSE/integer conversions. */
13362 tree int_ftype_v4sf
13363 = build_function_type_list (integer_type_node,
13364 V4SF_type_node, NULL_TREE);
13365 tree int64_ftype_v4sf
13366 = build_function_type_list (long_long_integer_type_node,
13367 V4SF_type_node, NULL_TREE);
13368 tree int_ftype_v8qi
13369 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13370 tree v4sf_ftype_v4sf_int
13371 = build_function_type_list (V4SF_type_node,
13372 V4SF_type_node, integer_type_node, NULL_TREE);
13373 tree v4sf_ftype_v4sf_int64
13374 = build_function_type_list (V4SF_type_node,
13375 V4SF_type_node, long_long_integer_type_node,
13377 tree v4sf_ftype_v4sf_v2si
13378 = build_function_type_list (V4SF_type_node,
13379 V4SF_type_node, V2SI_type_node, NULL_TREE);
13381 /* Miscellaneous. */
13382 tree v8qi_ftype_v4hi_v4hi
13383 = build_function_type_list (V8QI_type_node,
13384 V4HI_type_node, V4HI_type_node, NULL_TREE);
13385 tree v4hi_ftype_v2si_v2si
13386 = build_function_type_list (V4HI_type_node,
13387 V2SI_type_node, V2SI_type_node, NULL_TREE);
13388 tree v4sf_ftype_v4sf_v4sf_int
13389 = build_function_type_list (V4SF_type_node,
13390 V4SF_type_node, V4SF_type_node,
13391 integer_type_node, NULL_TREE);
13392 tree v2si_ftype_v4hi_v4hi
13393 = build_function_type_list (V2SI_type_node,
13394 V4HI_type_node, V4HI_type_node, NULL_TREE);
13395 tree v4hi_ftype_v4hi_int
13396 = build_function_type_list (V4HI_type_node,
13397 V4HI_type_node, integer_type_node, NULL_TREE);
13398 tree v4hi_ftype_v4hi_di
13399 = build_function_type_list (V4HI_type_node,
13400 V4HI_type_node, long_long_unsigned_type_node,
13402 tree v2si_ftype_v2si_di
13403 = build_function_type_list (V2SI_type_node,
13404 V2SI_type_node, long_long_unsigned_type_node,
13406 tree void_ftype_void
13407 = build_function_type (void_type_node, void_list_node);
13408 tree void_ftype_unsigned
13409 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13410 tree void_ftype_unsigned_unsigned
13411 = build_function_type_list (void_type_node, unsigned_type_node,
13412 unsigned_type_node, NULL_TREE);
13413 tree void_ftype_pcvoid_unsigned_unsigned
13414 = build_function_type_list (void_type_node, const_ptr_type_node,
13415 unsigned_type_node, unsigned_type_node,
13417 tree unsigned_ftype_void
13418 = build_function_type (unsigned_type_node, void_list_node);
13419 tree v2si_ftype_v4sf
13420 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13421 /* Loads/stores. */
13422 tree void_ftype_v8qi_v8qi_pchar
13423 = build_function_type_list (void_type_node,
13424 V8QI_type_node, V8QI_type_node,
13425 pchar_type_node, NULL_TREE);
13426 tree v4sf_ftype_pcfloat
13427 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13428 /* @@@ the type is bogus */
13429 tree v4sf_ftype_v4sf_pv2si
13430 = build_function_type_list (V4SF_type_node,
13431 V4SF_type_node, pv2si_type_node, NULL_TREE);
13432 tree void_ftype_pv2si_v4sf
13433 = build_function_type_list (void_type_node,
13434 pv2si_type_node, V4SF_type_node, NULL_TREE);
13435 tree void_ftype_pfloat_v4sf
13436 = build_function_type_list (void_type_node,
13437 pfloat_type_node, V4SF_type_node, NULL_TREE);
13438 tree void_ftype_pdi_di
13439 = build_function_type_list (void_type_node,
13440 pdi_type_node, long_long_unsigned_type_node,
13442 tree void_ftype_pv2di_v2di
13443 = build_function_type_list (void_type_node,
13444 pv2di_type_node, V2DI_type_node, NULL_TREE);
13445 /* Normal vector unops. */
13446 tree v4sf_ftype_v4sf
13447 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13449 /* Normal vector binops. */
13450 tree v4sf_ftype_v4sf_v4sf
13451 = build_function_type_list (V4SF_type_node,
13452 V4SF_type_node, V4SF_type_node, NULL_TREE);
13453 tree v8qi_ftype_v8qi_v8qi
13454 = build_function_type_list (V8QI_type_node,
13455 V8QI_type_node, V8QI_type_node, NULL_TREE);
13456 tree v4hi_ftype_v4hi_v4hi
13457 = build_function_type_list (V4HI_type_node,
13458 V4HI_type_node, V4HI_type_node, NULL_TREE);
13459 tree v2si_ftype_v2si_v2si
13460 = build_function_type_list (V2SI_type_node,
13461 V2SI_type_node, V2SI_type_node, NULL_TREE);
13462 tree di_ftype_di_di
13463 = build_function_type_list (long_long_unsigned_type_node,
13464 long_long_unsigned_type_node,
13465 long_long_unsigned_type_node, NULL_TREE);
13467 tree v2si_ftype_v2sf
13468 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13469 tree v2sf_ftype_v2si
13470 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13471 tree v2si_ftype_v2si
13472 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13473 tree v2sf_ftype_v2sf
13474 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13475 tree v2sf_ftype_v2sf_v2sf
13476 = build_function_type_list (V2SF_type_node,
13477 V2SF_type_node, V2SF_type_node, NULL_TREE);
13478 tree v2si_ftype_v2sf_v2sf
13479 = build_function_type_list (V2SI_type_node,
13480 V2SF_type_node, V2SF_type_node, NULL_TREE);
13481 tree pint_type_node = build_pointer_type (integer_type_node);
13482 tree pdouble_type_node = build_pointer_type (double_type_node);
13483 tree pcdouble_type_node = build_pointer_type (
13484 build_type_variant (double_type_node, 1, 0));
13485 tree int_ftype_v2df_v2df
13486 = build_function_type_list (integer_type_node,
13487 V2DF_type_node, V2DF_type_node, NULL_TREE);
13489 tree ti_ftype_ti_ti
13490 = build_function_type_list (intTI_type_node,
13491 intTI_type_node, intTI_type_node, NULL_TREE);
13492 tree void_ftype_pcvoid
13493 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13494 tree v4sf_ftype_v4si
13495 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13496 tree v4si_ftype_v4sf
13497 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13498 tree v2df_ftype_v4si
13499 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13500 tree v4si_ftype_v2df
13501 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13502 tree v2si_ftype_v2df
13503 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13504 tree v4sf_ftype_v2df
13505 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13506 tree v2df_ftype_v2si
13507 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13508 tree v2df_ftype_v4sf
13509 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13510 tree int_ftype_v2df
13511 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13512 tree int64_ftype_v2df
13513 = build_function_type_list (long_long_integer_type_node,
13514 V2DF_type_node, NULL_TREE);
13515 tree v2df_ftype_v2df_int
13516 = build_function_type_list (V2DF_type_node,
13517 V2DF_type_node, integer_type_node, NULL_TREE);
13518 tree v2df_ftype_v2df_int64
13519 = build_function_type_list (V2DF_type_node,
13520 V2DF_type_node, long_long_integer_type_node,
13522 tree v4sf_ftype_v4sf_v2df
13523 = build_function_type_list (V4SF_type_node,
13524 V4SF_type_node, V2DF_type_node, NULL_TREE);
13525 tree v2df_ftype_v2df_v4sf
13526 = build_function_type_list (V2DF_type_node,
13527 V2DF_type_node, V4SF_type_node, NULL_TREE);
13528 tree v2df_ftype_v2df_v2df_int
13529 = build_function_type_list (V2DF_type_node,
13530 V2DF_type_node, V2DF_type_node,
13533 tree v2df_ftype_v2df_pcdouble
13534 = build_function_type_list (V2DF_type_node,
13535 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13536 tree void_ftype_pdouble_v2df
13537 = build_function_type_list (void_type_node,
13538 pdouble_type_node, V2DF_type_node, NULL_TREE);
13539 tree void_ftype_pint_int
13540 = build_function_type_list (void_type_node,
13541 pint_type_node, integer_type_node, NULL_TREE);
13542 tree void_ftype_v16qi_v16qi_pchar
13543 = build_function_type_list (void_type_node,
13544 V16QI_type_node, V16QI_type_node,
13545 pchar_type_node, NULL_TREE);
13546 tree v2df_ftype_pcdouble
13547 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13548 tree v2df_ftype_v2df_v2df
13549 = build_function_type_list (V2DF_type_node,
13550 V2DF_type_node, V2DF_type_node, NULL_TREE);
13551 tree v16qi_ftype_v16qi_v16qi
13552 = build_function_type_list (V16QI_type_node,
13553 V16QI_type_node, V16QI_type_node, NULL_TREE);
13554 tree v8hi_ftype_v8hi_v8hi
13555 = build_function_type_list (V8HI_type_node,
13556 V8HI_type_node, V8HI_type_node, NULL_TREE);
13557 tree v4si_ftype_v4si_v4si
13558 = build_function_type_list (V4SI_type_node,
13559 V4SI_type_node, V4SI_type_node, NULL_TREE);
13560 tree v2di_ftype_v2di_v2di
13561 = build_function_type_list (V2DI_type_node,
13562 V2DI_type_node, V2DI_type_node, NULL_TREE);
13563 tree v2di_ftype_v2df_v2df
13564 = build_function_type_list (V2DI_type_node,
13565 V2DF_type_node, V2DF_type_node, NULL_TREE);
13566 tree v2df_ftype_v2df
13567 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13568 tree v2di_ftype_v2di_int
13569 = build_function_type_list (V2DI_type_node,
13570 V2DI_type_node, integer_type_node, NULL_TREE);
13571 tree v4si_ftype_v4si_int
13572 = build_function_type_list (V4SI_type_node,
13573 V4SI_type_node, integer_type_node, NULL_TREE);
13574 tree v8hi_ftype_v8hi_int
13575 = build_function_type_list (V8HI_type_node,
13576 V8HI_type_node, integer_type_node, NULL_TREE);
13577 tree v8hi_ftype_v8hi_v2di
13578 = build_function_type_list (V8HI_type_node,
13579 V8HI_type_node, V2DI_type_node, NULL_TREE);
13580 tree v4si_ftype_v4si_v2di
13581 = build_function_type_list (V4SI_type_node,
13582 V4SI_type_node, V2DI_type_node, NULL_TREE);
13583 tree v4si_ftype_v8hi_v8hi
13584 = build_function_type_list (V4SI_type_node,
13585 V8HI_type_node, V8HI_type_node, NULL_TREE);
13586 tree di_ftype_v8qi_v8qi
13587 = build_function_type_list (long_long_unsigned_type_node,
13588 V8QI_type_node, V8QI_type_node, NULL_TREE);
13589 tree di_ftype_v2si_v2si
13590 = build_function_type_list (long_long_unsigned_type_node,
13591 V2SI_type_node, V2SI_type_node, NULL_TREE);
13592 tree v2di_ftype_v16qi_v16qi
13593 = build_function_type_list (V2DI_type_node,
13594 V16QI_type_node, V16QI_type_node, NULL_TREE);
13595 tree v2di_ftype_v4si_v4si
13596 = build_function_type_list (V2DI_type_node,
13597 V4SI_type_node, V4SI_type_node, NULL_TREE);
13598 tree int_ftype_v16qi
13599 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13600 tree v16qi_ftype_pcchar
13601 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13602 tree void_ftype_pchar_v16qi
13603 = build_function_type_list (void_type_node,
13604 pchar_type_node, V16QI_type_node, NULL_TREE);
13607 tree float128_type;
13610 /* The __float80 type. */
13611 if (TYPE_MODE (long_double_type_node) == XFmode)
13612 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13616 /* The __float80 type. */
13617 float80_type = make_node (REAL_TYPE);
13618 TYPE_PRECISION (float80_type) = 80;
13619 layout_type (float80_type);
13620 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13623 float128_type = make_node (REAL_TYPE);
13624 TYPE_PRECISION (float128_type) = 128;
13625 layout_type (float128_type);
13626 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13628 /* Add all builtins that are more or less simple operations on two
13630 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13632 /* Use one of the operands; the target can have a different mode for
13633 mask-generating compares. */
13634 enum machine_mode mode;
13639 mode = insn_data[d->icode].operand[1].mode;
13644 type = v16qi_ftype_v16qi_v16qi;
13647 type = v8hi_ftype_v8hi_v8hi;
13650 type = v4si_ftype_v4si_v4si;
13653 type = v2di_ftype_v2di_v2di;
13656 type = v2df_ftype_v2df_v2df;
13659 type = ti_ftype_ti_ti;
13662 type = v4sf_ftype_v4sf_v4sf;
13665 type = v8qi_ftype_v8qi_v8qi;
13668 type = v4hi_ftype_v4hi_v4hi;
13671 type = v2si_ftype_v2si_v2si;
13674 type = di_ftype_di_di;
13681 /* Override for comparisons. */
13682 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13683 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13684 type = v4si_ftype_v4sf_v4sf;
13686 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13687 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13688 type = v2di_ftype_v2df_v2df;
13690 def_builtin (d->mask, d->name, type, d->code);
13693 /* Add the remaining MMX insns with somewhat more complicated types. */
13694 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13695 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13696 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13697 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13699 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13700 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13701 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13703 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13704 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13706 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13707 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13709 /* comi/ucomi insns. */
13710 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13711 if (d->mask == MASK_SSE2)
13712 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13714 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13716 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13717 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13718 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13720 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13721 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13722 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13723 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13724 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13725 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13726 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13727 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13728 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13729 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13730 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13732 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13734 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13735 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13737 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13738 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13739 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13740 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13742 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13743 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13744 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13745 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13747 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13749 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13751 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13752 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13753 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13754 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13755 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13756 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13758 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13760 /* Original 3DNow! */
13761 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13762 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13763 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13764 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13765 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13766 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13767 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13768 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13769 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13770 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13771 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13772 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13773 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13774 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13775 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13776 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13777 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13778 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13779 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13780 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13782 /* 3DNow! extension as used in the Athlon CPU. */
13783 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13784 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13785 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13786 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13787 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13788 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13791 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13793 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13794 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13796 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
13797 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
13799 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13800 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13801 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13802 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13803 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13805 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13806 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13807 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13808 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13810 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13811 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13813 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13815 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13816 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13818 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13819 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13820 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13821 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13822 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13824 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13826 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13827 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13828 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13829 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13831 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13832 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13833 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13835 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13836 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13837 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13838 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13840 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13841 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13842 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13844 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13845 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13847 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13848 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13850 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13851 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13852 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13854 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13855 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13856 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13858 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13859 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13861 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13862 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13863 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13864 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13866 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13867 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13868 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13869 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13871 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13872 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13874 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13876 /* Prescott New Instructions. */
13877 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13878 void_ftype_pcvoid_unsigned_unsigned,
13879 IX86_BUILTIN_MONITOR);
13880 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13881 void_ftype_unsigned_unsigned,
13882 IX86_BUILTIN_MWAIT);
13883 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13885 IX86_BUILTIN_MOVSHDUP);
13886 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13888 IX86_BUILTIN_MOVSLDUP);
13889 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13890 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13892 /* Access to the vec_init patterns. */
13893 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
13894 integer_type_node, NULL_TREE);
13895 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
13896 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
13898 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
13899 short_integer_type_node,
13900 short_integer_type_node,
13901 short_integer_type_node, NULL_TREE);
13902 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
13903 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
13905 ftype = build_function_type_list (V8QI_type_node, char_type_node,
13906 char_type_node, char_type_node,
13907 char_type_node, char_type_node,
13908 char_type_node, char_type_node,
13909 char_type_node, NULL_TREE);
13910 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
13911 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
13913 /* Access to the vec_extract patterns. */
13914 ftype = build_function_type_list (double_type_node, V2DF_type_node,
13915 integer_type_node, NULL_TREE);
13916 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
13917 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
13919 ftype = build_function_type_list (long_long_integer_type_node,
13920 V2DI_type_node, integer_type_node,
13922 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
13923 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
13925 ftype = build_function_type_list (float_type_node, V4SF_type_node,
13926 integer_type_node, NULL_TREE);
13927 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
13928 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
13930 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
13931 integer_type_node, NULL_TREE);
13932 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
13933 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
13935 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
13936 integer_type_node, NULL_TREE);
13937 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
13938 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
13940 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
13941 integer_type_node, NULL_TREE);
13942 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
13943 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
13945 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
13946 integer_type_node, NULL_TREE);
13947 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
13948 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
13950 /* Access to the vec_set patterns. */
13951 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
13953 integer_type_node, NULL_TREE);
13954 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
13955 ftype, IX86_BUILTIN_VEC_SET_V8HI);
13957 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
13959 integer_type_node, NULL_TREE);
13960 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
13961 ftype, IX86_BUILTIN_VEC_SET_V4HI);
13964 /* Errors in the source file can cause expand_expr to return const0_rtx
13965 where we expect a vector. To avoid crashing, use one of the vector
13966 clear instructions. */
13968 safe_vector_operand (rtx x, enum machine_mode mode)
13970 if (x == const0_rtx)
13971 x = CONST0_RTX (mode);
13975 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13978 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13981 tree arg0 = TREE_VALUE (arglist);
13982 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13983 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13984 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13985 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13986 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13987 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13989 if (VECTOR_MODE_P (mode0))
13990 op0 = safe_vector_operand (op0, mode0);
13991 if (VECTOR_MODE_P (mode1))
13992 op1 = safe_vector_operand (op1, mode1);
13994 if (optimize || !target
13995 || GET_MODE (target) != tmode
13996 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13997 target = gen_reg_rtx (tmode);
13999 if (GET_MODE (op1) == SImode && mode1 == TImode)
14001 rtx x = gen_reg_rtx (V4SImode);
14002 emit_insn (gen_sse2_loadd (x, op1));
14003 op1 = gen_lowpart (TImode, x);
14006 /* In case the insn wants input operands in modes different from
14007 the result, abort. */
14008 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
14009 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
14012 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14013 op0 = copy_to_mode_reg (mode0, op0);
14014 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14015 op1 = copy_to_mode_reg (mode1, op1);
14017 /* ??? Using ix86_fixup_binary_operands is problematic when
14018 we've got mismatched modes. Fake it. */
14024 if (tmode == mode0 && tmode == mode1)
14026 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
14030 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
14032 op0 = force_reg (mode0, op0);
14033 op1 = force_reg (mode1, op1);
14034 target = gen_reg_rtx (tmode);
14037 pat = GEN_FCN (icode) (target, op0, op1);
14044 /* Subroutine of ix86_expand_builtin to take care of stores. */
14047 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
14050 tree arg0 = TREE_VALUE (arglist);
14051 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14052 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14053 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14054 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14055 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14057 if (VECTOR_MODE_P (mode1))
14058 op1 = safe_vector_operand (op1, mode1);
14060 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14061 op1 = copy_to_mode_reg (mode1, op1);
14063 pat = GEN_FCN (icode) (op0, op1);
14069 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14072 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14073 rtx target, int do_load)
14076 tree arg0 = TREE_VALUE (arglist);
14077 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14078 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14079 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14081 if (optimize || !target
14082 || GET_MODE (target) != tmode
14083 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14084 target = gen_reg_rtx (tmode);
14086 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14089 if (VECTOR_MODE_P (mode0))
14090 op0 = safe_vector_operand (op0, mode0);
14092 if ((optimize && !register_operand (op0, mode0))
14093 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14094 op0 = copy_to_mode_reg (mode0, op0);
14097 pat = GEN_FCN (icode) (target, op0);
14104 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14105 sqrtss, rsqrtss, rcpss. */
14108 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14111 tree arg0 = TREE_VALUE (arglist);
14112 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14113 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14114 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14116 if (optimize || !target
14117 || GET_MODE (target) != tmode
14118 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14119 target = gen_reg_rtx (tmode);
14121 if (VECTOR_MODE_P (mode0))
14122 op0 = safe_vector_operand (op0, mode0);
14124 if ((optimize && !register_operand (op0, mode0))
14125 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14126 op0 = copy_to_mode_reg (mode0, op0);
14129 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14130 op1 = copy_to_mode_reg (mode0, op1);
14132 pat = GEN_FCN (icode) (target, op0, op1);
14139 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14142 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14146 tree arg0 = TREE_VALUE (arglist);
14147 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14148 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14149 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14151 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14152 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14153 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14154 enum rtx_code comparison = d->comparison;
14156 if (VECTOR_MODE_P (mode0))
14157 op0 = safe_vector_operand (op0, mode0);
14158 if (VECTOR_MODE_P (mode1))
14159 op1 = safe_vector_operand (op1, mode1);
14161 /* Swap operands if we have a comparison that isn't available in
14163 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14165 rtx tmp = gen_reg_rtx (mode1);
14166 emit_move_insn (tmp, op1);
14171 if (optimize || !target
14172 || GET_MODE (target) != tmode
14173 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14174 target = gen_reg_rtx (tmode);
14176 if ((optimize && !register_operand (op0, mode0))
14177 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14178 op0 = copy_to_mode_reg (mode0, op0);
14179 if ((optimize && !register_operand (op1, mode1))
14180 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14181 op1 = copy_to_mode_reg (mode1, op1);
14183 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14184 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14191 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14194 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14198 tree arg0 = TREE_VALUE (arglist);
14199 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14200 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14201 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14203 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14204 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14205 enum rtx_code comparison = d->comparison;
14207 if (VECTOR_MODE_P (mode0))
14208 op0 = safe_vector_operand (op0, mode0);
14209 if (VECTOR_MODE_P (mode1))
14210 op1 = safe_vector_operand (op1, mode1);
14212 /* Swap operands if we have a comparison that isn't available in
14214 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14221 target = gen_reg_rtx (SImode);
14222 emit_move_insn (target, const0_rtx);
14223 target = gen_rtx_SUBREG (QImode, target, 0);
14225 if ((optimize && !register_operand (op0, mode0))
14226 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14227 op0 = copy_to_mode_reg (mode0, op0);
14228 if ((optimize && !register_operand (op1, mode1))
14229 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14230 op1 = copy_to_mode_reg (mode1, op1);
14232 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14233 pat = GEN_FCN (d->icode) (op0, op1);
14237 emit_insn (gen_rtx_SET (VOIDmode,
14238 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14239 gen_rtx_fmt_ee (comparison, QImode,
14243 return SUBREG_REG (target);
14246 /* Return the integer constant in ARG. Constrain it to be in the range
14247 of the subparts of VEC_TYPE; issue an error if not. */
14250 get_element_number (tree vec_type, tree arg)
14252 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14254 if (!host_integerp (arg, 1)
14255 || (elt = tree_low_cst (arg, 1), elt > max))
14257 error ("selector must be an integer constant in the range 0..%i", max);
14264 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14265 ix86_expand_vector_init. We DO have language-level syntax for this, in
14266 the form of (type){ init-list }. Except that since we can't place emms
14267 instructions from inside the compiler, we can't allow the use of MMX
14268 registers unless the user explicitly asks for it. So we do *not* define
14269 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14270 we have builtins invoked by mmintrin.h that gives us license to emit
14271 these sorts of instructions. */
14274 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14276 enum machine_mode tmode = TYPE_MODE (type);
14277 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14278 int i, n_elt = GET_MODE_NUNITS (tmode);
14279 rtvec v = rtvec_alloc (n_elt);
14281 gcc_assert (VECTOR_MODE_P (tmode));
14283 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14285 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14286 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14289 gcc_assert (arglist == NULL);
14291 if (!target || !register_operand (target, tmode))
14292 target = gen_reg_rtx (tmode);
14294 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14298 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14299 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14300 had a language-level syntax for referencing vector elements. */
14303 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14305 enum machine_mode tmode, mode0;
14310 arg0 = TREE_VALUE (arglist);
14311 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14313 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14314 elt = get_element_number (TREE_TYPE (arg0), arg1);
14316 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14317 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14318 gcc_assert (VECTOR_MODE_P (mode0));
14320 op0 = force_reg (mode0, op0);
14322 if (optimize || !target || !register_operand (target, tmode))
14323 target = gen_reg_rtx (tmode);
14325 ix86_expand_vector_extract (true, target, op0, elt);
14330 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14331 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14332 a language-level syntax for referencing vector elements. */
14335 ix86_expand_vec_set_builtin (tree arglist)
14337 enum machine_mode tmode, mode1;
14338 tree arg0, arg1, arg2;
14342 arg0 = TREE_VALUE (arglist);
14343 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14344 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14346 tmode = TYPE_MODE (TREE_TYPE (arg0));
14347 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14348 gcc_assert (VECTOR_MODE_P (tmode));
14350 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14351 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14352 elt = get_element_number (TREE_TYPE (arg0), arg2);
14354 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14355 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14357 op0 = force_reg (tmode, op0);
14358 op1 = force_reg (mode1, op1);
14360 ix86_expand_vector_set (true, op0, op1, elt);
14365 /* Expand an expression EXP that calls a built-in function,
14366 with result going to TARGET if that's convenient
14367 (and in mode MODE if that's convenient).
14368 SUBTARGET may be used as the target for computing one of EXP's operands.
14369 IGNORE is nonzero if the value is to be ignored. */
14372 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14373 enum machine_mode mode ATTRIBUTE_UNUSED,
14374 int ignore ATTRIBUTE_UNUSED)
14376 const struct builtin_description *d;
14378 enum insn_code icode;
14379 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14380 tree arglist = TREE_OPERAND (exp, 1);
14381 tree arg0, arg1, arg2;
14382 rtx op0, op1, op2, pat;
14383 enum machine_mode tmode, mode0, mode1, mode2;
14384 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14388 case IX86_BUILTIN_EMMS:
14389 emit_insn (gen_mmx_emms ());
14392 case IX86_BUILTIN_SFENCE:
14393 emit_insn (gen_sse_sfence ());
14396 case IX86_BUILTIN_MASKMOVQ:
14397 case IX86_BUILTIN_MASKMOVDQU:
14398 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14399 ? CODE_FOR_mmx_maskmovq
14400 : CODE_FOR_sse2_maskmovdqu);
14401 /* Note the arg order is different from the operand order. */
14402 arg1 = TREE_VALUE (arglist);
14403 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14404 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14405 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14406 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14407 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14408 mode0 = insn_data[icode].operand[0].mode;
14409 mode1 = insn_data[icode].operand[1].mode;
14410 mode2 = insn_data[icode].operand[2].mode;
14412 op0 = force_reg (Pmode, op0);
14413 op0 = gen_rtx_MEM (mode1, op0);
14415 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14416 op0 = copy_to_mode_reg (mode0, op0);
14417 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14418 op1 = copy_to_mode_reg (mode1, op1);
14419 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14420 op2 = copy_to_mode_reg (mode2, op2);
14421 pat = GEN_FCN (icode) (op0, op1, op2);
14427 case IX86_BUILTIN_SQRTSS:
14428 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14429 case IX86_BUILTIN_RSQRTSS:
14430 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14431 case IX86_BUILTIN_RCPSS:
14432 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14434 case IX86_BUILTIN_LOADUPS:
14435 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14437 case IX86_BUILTIN_STOREUPS:
14438 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14440 case IX86_BUILTIN_LOADHPS:
14441 case IX86_BUILTIN_LOADLPS:
14442 case IX86_BUILTIN_LOADHPD:
14443 case IX86_BUILTIN_LOADLPD:
14444 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14445 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14446 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14447 : CODE_FOR_sse2_loadlpd);
14448 arg0 = TREE_VALUE (arglist);
14449 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14450 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14451 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14452 tmode = insn_data[icode].operand[0].mode;
14453 mode0 = insn_data[icode].operand[1].mode;
14454 mode1 = insn_data[icode].operand[2].mode;
14456 op0 = force_reg (mode0, op0);
14457 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14458 if (optimize || target == 0
14459 || GET_MODE (target) != tmode
14460 || !register_operand (target, tmode))
14461 target = gen_reg_rtx (tmode);
14462 pat = GEN_FCN (icode) (target, op0, op1);
14468 case IX86_BUILTIN_STOREHPS:
14469 case IX86_BUILTIN_STORELPS:
14470 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14471 : CODE_FOR_sse_storelps);
14472 arg0 = TREE_VALUE (arglist);
14473 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14474 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14475 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14476 mode0 = insn_data[icode].operand[0].mode;
14477 mode1 = insn_data[icode].operand[1].mode;
14479 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14480 op1 = force_reg (mode1, op1);
14482 pat = GEN_FCN (icode) (op0, op1);
14488 case IX86_BUILTIN_MOVNTPS:
14489 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14490 case IX86_BUILTIN_MOVNTQ:
14491 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14493 case IX86_BUILTIN_LDMXCSR:
14494 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14495 target = assign_386_stack_local (SImode, 0);
14496 emit_move_insn (target, op0);
14497 emit_insn (gen_sse_ldmxcsr (target));
14500 case IX86_BUILTIN_STMXCSR:
14501 target = assign_386_stack_local (SImode, 0);
14502 emit_insn (gen_sse_stmxcsr (target));
14503 return copy_to_mode_reg (SImode, target);
14505 case IX86_BUILTIN_SHUFPS:
14506 case IX86_BUILTIN_SHUFPD:
14507 icode = (fcode == IX86_BUILTIN_SHUFPS
14508 ? CODE_FOR_sse_shufps
14509 : CODE_FOR_sse2_shufpd);
14510 arg0 = TREE_VALUE (arglist);
14511 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14512 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14513 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14514 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14515 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14516 tmode = insn_data[icode].operand[0].mode;
14517 mode0 = insn_data[icode].operand[1].mode;
14518 mode1 = insn_data[icode].operand[2].mode;
14519 mode2 = insn_data[icode].operand[3].mode;
14521 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14522 op0 = copy_to_mode_reg (mode0, op0);
14523 if ((optimize && !register_operand (op1, mode1))
14524 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14525 op1 = copy_to_mode_reg (mode1, op1);
14526 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14528 /* @@@ better error message */
14529 error ("mask must be an immediate");
14530 return gen_reg_rtx (tmode);
14532 if (optimize || target == 0
14533 || GET_MODE (target) != tmode
14534 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14535 target = gen_reg_rtx (tmode);
14536 pat = GEN_FCN (icode) (target, op0, op1, op2);
14542 case IX86_BUILTIN_PSHUFW:
14543 case IX86_BUILTIN_PSHUFD:
14544 case IX86_BUILTIN_PSHUFHW:
14545 case IX86_BUILTIN_PSHUFLW:
14546 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14547 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14548 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14549 : CODE_FOR_mmx_pshufw);
14550 arg0 = TREE_VALUE (arglist);
14551 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14552 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14553 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14554 tmode = insn_data[icode].operand[0].mode;
14555 mode1 = insn_data[icode].operand[1].mode;
14556 mode2 = insn_data[icode].operand[2].mode;
14558 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14559 op0 = copy_to_mode_reg (mode1, op0);
14560 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14562 /* @@@ better error message */
14563 error ("mask must be an immediate");
14567 || GET_MODE (target) != tmode
14568 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14569 target = gen_reg_rtx (tmode);
14570 pat = GEN_FCN (icode) (target, op0, op1);
14576 case IX86_BUILTIN_PSLLDQI128:
14577 case IX86_BUILTIN_PSRLDQI128:
14578 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14579 : CODE_FOR_sse2_lshrti3);
14580 arg0 = TREE_VALUE (arglist);
14581 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14582 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14583 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14584 tmode = insn_data[icode].operand[0].mode;
14585 mode1 = insn_data[icode].operand[1].mode;
14586 mode2 = insn_data[icode].operand[2].mode;
14588 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14590 op0 = copy_to_reg (op0);
14591 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14593 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14595 error ("shift must be an immediate");
14598 target = gen_reg_rtx (V2DImode);
14599 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14605 case IX86_BUILTIN_FEMMS:
14606 emit_insn (gen_mmx_femms ());
14609 case IX86_BUILTIN_PAVGUSB:
14610 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14612 case IX86_BUILTIN_PF2ID:
14613 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14615 case IX86_BUILTIN_PFACC:
14616 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14618 case IX86_BUILTIN_PFADD:
14619 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14621 case IX86_BUILTIN_PFCMPEQ:
14622 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14624 case IX86_BUILTIN_PFCMPGE:
14625 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14627 case IX86_BUILTIN_PFCMPGT:
14628 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14630 case IX86_BUILTIN_PFMAX:
14631 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14633 case IX86_BUILTIN_PFMIN:
14634 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14636 case IX86_BUILTIN_PFMUL:
14637 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14639 case IX86_BUILTIN_PFRCP:
14640 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14642 case IX86_BUILTIN_PFRCPIT1:
14643 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14645 case IX86_BUILTIN_PFRCPIT2:
14646 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14648 case IX86_BUILTIN_PFRSQIT1:
14649 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14651 case IX86_BUILTIN_PFRSQRT:
14652 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14654 case IX86_BUILTIN_PFSUB:
14655 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14657 case IX86_BUILTIN_PFSUBR:
14658 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14660 case IX86_BUILTIN_PI2FD:
14661 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14663 case IX86_BUILTIN_PMULHRW:
14664 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14666 case IX86_BUILTIN_PF2IW:
14667 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14669 case IX86_BUILTIN_PFNACC:
14670 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14672 case IX86_BUILTIN_PFPNACC:
14673 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14675 case IX86_BUILTIN_PI2FW:
14676 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14678 case IX86_BUILTIN_PSWAPDSI:
14679 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14681 case IX86_BUILTIN_PSWAPDSF:
14682 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14684 case IX86_BUILTIN_SQRTSD:
14685 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14686 case IX86_BUILTIN_LOADUPD:
14687 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14688 case IX86_BUILTIN_STOREUPD:
14689 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14691 case IX86_BUILTIN_MFENCE:
14692 emit_insn (gen_sse2_mfence ());
14694 case IX86_BUILTIN_LFENCE:
14695 emit_insn (gen_sse2_lfence ());
14698 case IX86_BUILTIN_CLFLUSH:
14699 arg0 = TREE_VALUE (arglist);
14700 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14701 icode = CODE_FOR_sse2_clflush;
14702 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14703 op0 = copy_to_mode_reg (Pmode, op0);
14705 emit_insn (gen_sse2_clflush (op0));
14708 case IX86_BUILTIN_MOVNTPD:
14709 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14710 case IX86_BUILTIN_MOVNTDQ:
14711 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14712 case IX86_BUILTIN_MOVNTI:
14713 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14715 case IX86_BUILTIN_LOADDQU:
14716 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14717 case IX86_BUILTIN_STOREDQU:
14718 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14720 case IX86_BUILTIN_MONITOR:
14721 arg0 = TREE_VALUE (arglist);
14722 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14723 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14724 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14725 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14726 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14728 op0 = copy_to_mode_reg (SImode, op0);
14730 op1 = copy_to_mode_reg (SImode, op1);
14732 op2 = copy_to_mode_reg (SImode, op2);
14733 emit_insn (gen_sse3_monitor (op0, op1, op2));
14736 case IX86_BUILTIN_MWAIT:
14737 arg0 = TREE_VALUE (arglist);
14738 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14739 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14740 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14742 op0 = copy_to_mode_reg (SImode, op0);
14744 op1 = copy_to_mode_reg (SImode, op1);
14745 emit_insn (gen_sse3_mwait (op0, op1));
14748 case IX86_BUILTIN_LDDQU:
14749 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
14752 case IX86_BUILTIN_VEC_INIT_V2SI:
14753 case IX86_BUILTIN_VEC_INIT_V4HI:
14754 case IX86_BUILTIN_VEC_INIT_V8QI:
14755 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
14757 case IX86_BUILTIN_VEC_EXT_V2DF:
14758 case IX86_BUILTIN_VEC_EXT_V2DI:
14759 case IX86_BUILTIN_VEC_EXT_V4SF:
14760 case IX86_BUILTIN_VEC_EXT_V4SI:
14761 case IX86_BUILTIN_VEC_EXT_V8HI:
14762 case IX86_BUILTIN_VEC_EXT_V2SI:
14763 case IX86_BUILTIN_VEC_EXT_V4HI:
14764 return ix86_expand_vec_ext_builtin (arglist, target);
14766 case IX86_BUILTIN_VEC_SET_V8HI:
14767 case IX86_BUILTIN_VEC_SET_V4HI:
14768 return ix86_expand_vec_set_builtin (arglist);
14774 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14775 if (d->code == fcode)
14777 /* Compares are treated specially. */
14778 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14779 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
14780 || d->icode == CODE_FOR_sse2_maskcmpv2df3
14781 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14782 return ix86_expand_sse_compare (d, arglist, target);
14784 return ix86_expand_binop_builtin (d->icode, arglist, target);
14787 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14788 if (d->code == fcode)
14789 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14791 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14792 if (d->code == fcode)
14793 return ix86_expand_sse_comi (d, arglist, target);
14795 gcc_unreachable ();
14798 /* Store OPERAND to the memory after reload is completed. This means
14799 that we can't easily use assign_stack_local. */
14801 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14804 if (!reload_completed)
14806 if (TARGET_RED_ZONE)
14808 result = gen_rtx_MEM (mode,
14809 gen_rtx_PLUS (Pmode,
14811 GEN_INT (-RED_ZONE_SIZE)));
14812 emit_move_insn (result, operand);
14814 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14820 operand = gen_lowpart (DImode, operand);
14824 gen_rtx_SET (VOIDmode,
14825 gen_rtx_MEM (DImode,
14826 gen_rtx_PRE_DEC (DImode,
14827 stack_pointer_rtx)),
14833 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14842 split_di (&operand, 1, operands, operands + 1);
14844 gen_rtx_SET (VOIDmode,
14845 gen_rtx_MEM (SImode,
14846 gen_rtx_PRE_DEC (Pmode,
14847 stack_pointer_rtx)),
14850 gen_rtx_SET (VOIDmode,
14851 gen_rtx_MEM (SImode,
14852 gen_rtx_PRE_DEC (Pmode,
14853 stack_pointer_rtx)),
14858 /* It is better to store HImodes as SImodes. */
14859 if (!TARGET_PARTIAL_REG_STALL)
14860 operand = gen_lowpart (SImode, operand);
14864 gen_rtx_SET (VOIDmode,
14865 gen_rtx_MEM (GET_MODE (operand),
14866 gen_rtx_PRE_DEC (SImode,
14867 stack_pointer_rtx)),
14873 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14878 /* Free operand from the memory. */
14880 ix86_free_from_memory (enum machine_mode mode)
14882 if (!TARGET_RED_ZONE)
14886 if (mode == DImode || TARGET_64BIT)
14888 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14892 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14893 to pop or add instruction if registers are available. */
14894 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14895 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14900 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14901 QImode must go into class Q_REGS.
14902 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14903 movdf to do mem-to-mem moves through integer regs. */
14905 ix86_preferred_reload_class (rtx x, enum reg_class class)
14907 /* We're only allowed to return a subclass of CLASS. Many of the
14908 following checks fail for NO_REGS, so eliminate that early. */
14909 if (class == NO_REGS)
14912 /* All classes can load zeros. */
14913 if (x == CONST0_RTX (GET_MODE (x)))
14916 /* Floating-point constants need more complex checks. */
14917 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14919 /* General regs can load everything. */
14920 if (reg_class_subset_p (class, GENERAL_REGS))
14923 /* Floats can load 0 and 1 plus some others. Note that we eliminated
14924 zero above. We only want to wind up preferring 80387 registers if
14925 we plan on doing computation with them. */
14927 && (TARGET_MIX_SSE_I387
14928 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
14929 && standard_80387_constant_p (x))
14931 /* Limit class to non-sse. */
14932 if (class == FLOAT_SSE_REGS)
14934 if (class == FP_TOP_SSE_REGS)
14936 if (class == FP_SECOND_SSE_REGS)
14937 return FP_SECOND_REG;
14938 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
14944 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14946 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
14949 /* Generally when we see PLUS here, it's the function invariant
14950 (plus soft-fp const_int). Which can only be computed into general
14952 if (GET_CODE (x) == PLUS)
14953 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
14955 /* QImode constants are easy to load, but non-constant QImode data
14956 must go into Q_REGS. */
14957 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
14959 if (reg_class_subset_p (class, Q_REGS))
14961 if (reg_class_subset_p (Q_REGS, class))
14969 /* If we are copying between general and FP registers, we need a memory
14970 location. The same is true for SSE and MMX registers.
14972 The macro can't work reliably when one of the CLASSES is class containing
14973 registers from multiple units (SSE, MMX, integer). We avoid this by never
14974 combining those units in single alternative in the machine description.
14975 Ensure that this constraint holds to avoid unexpected surprises.
14977 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14978 enforce these sanity checks. */
14981 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14982 enum machine_mode mode, int strict)
14984 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14985 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14986 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14987 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14988 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14989 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14996 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
14999 /* ??? This is a lie. We do have moves between mmx/general, and for
15000 mmx/sse2. But by saying we need secondary memory we discourage the
15001 register allocator from using the mmx registers unless needed. */
15002 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
15005 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15007 /* SSE1 doesn't have any direct moves from other classes. */
15011 /* If the target says that inter-unit moves are more expensive
15012 than moving through memory, then don't generate them. */
15013 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
15016 /* Between SSE and general, we have moves no larger than word size. */
15017 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15020 /* ??? For the cost of one register reformat penalty, we could use
15021 the same instructions to move SFmode and DFmode data, but the
15022 relevant move patterns don't support those alternatives. */
15023 if (mode == SFmode || mode == DFmode)
15030 /* Return the cost of moving data from a register in class CLASS1 to
15031 one in class CLASS2.
15033 It is not required that the cost always equal 2 when FROM is the same as TO;
15034 on some machines it is expensive to move between registers if they are not
15035 general registers. */
15038 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
15039 enum reg_class class2)
15041 /* In case we require secondary memory, compute cost of the store followed
15042 by load. In order to avoid bad register allocation choices, we need
15043 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15045 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
15049 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
15050 MEMORY_MOVE_COST (mode, class1, 1));
15051 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15052 MEMORY_MOVE_COST (mode, class2, 1));
15054 /* In case of copying from general_purpose_register we may emit multiple
15055 stores followed by single load causing memory size mismatch stall.
15056 Count this as arbitrarily high cost of 20. */
15057 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15060 /* In the case of FP/MMX moves, the registers actually overlap, and we
15061 have to switch modes in order to treat them differently. */
15062 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15063 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15069 /* Moves between SSE/MMX and integer unit are expensive. */
15070 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15071 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15072 return ix86_cost->mmxsse_to_integer;
15073 if (MAYBE_FLOAT_CLASS_P (class1))
15074 return ix86_cost->fp_move;
15075 if (MAYBE_SSE_CLASS_P (class1))
15076 return ix86_cost->sse_move;
15077 if (MAYBE_MMX_CLASS_P (class1))
15078 return ix86_cost->mmx_move;
15082 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15085 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15087 /* Flags and only flags can only hold CCmode values. */
15088 if (CC_REGNO_P (regno))
15089 return GET_MODE_CLASS (mode) == MODE_CC;
15090 if (GET_MODE_CLASS (mode) == MODE_CC
15091 || GET_MODE_CLASS (mode) == MODE_RANDOM
15092 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15094 if (FP_REGNO_P (regno))
15095 return VALID_FP_MODE_P (mode);
15096 if (SSE_REGNO_P (regno))
15098 /* We implement the move patterns for all vector modes into and
15099 out of SSE registers, even when no operation instructions
15101 return (VALID_SSE_REG_MODE (mode)
15102 || VALID_SSE2_REG_MODE (mode)
15103 || VALID_MMX_REG_MODE (mode)
15104 || VALID_MMX_REG_MODE_3DNOW (mode));
15106 if (MMX_REGNO_P (regno))
15108 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15109 so if the register is available at all, then we can move data of
15110 the given mode into or out of it. */
15111 return (VALID_MMX_REG_MODE (mode)
15112 || VALID_MMX_REG_MODE_3DNOW (mode));
15114 /* We handle both integer and floats in the general purpose registers.
15115 In future we should be able to handle vector modes as well. */
15116 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
15118 /* Take care for QImode values - they can be in non-QI regs, but then
15119 they do cause partial register stalls. */
15120 if (regno < 4 || mode != QImode || TARGET_64BIT)
15122 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
15125 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15126 tieable integer mode. */
15129 ix86_tieable_integer_mode_p (enum machine_mode mode)
15138 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
15141 return TARGET_64BIT;
15148 /* Return true if MODE1 is accessible in a register that can hold MODE2
15149 without copying. That is, all register classes that can hold MODE2
15150 can also hold MODE1. */
15153 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
15155 if (mode1 == mode2)
15158 if (ix86_tieable_integer_mode_p (mode1)
15159 && ix86_tieable_integer_mode_p (mode2))
15162 /* MODE2 being XFmode implies fp stack or general regs, which means we
15163 can tie any smaller floating point modes to it. Note that we do not
15164 tie this with TFmode. */
15165 if (mode2 == XFmode)
15166 return mode1 == SFmode || mode1 == DFmode;
15168 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15169 that we can tie it with SFmode. */
15170 if (mode2 == DFmode)
15171 return mode1 == SFmode;
15173 /* If MODE2 is only appropriate for an SSE register, then tie with
15174 any other mode acceptable to SSE registers. */
15175 if (SSE_REG_MODE_P (mode2))
15176 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
15178 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15179 with any other mode acceptable to MMX registers. */
15180 if (MMX_REG_MODE_P (mode2))
15181 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
15186 /* Return the cost of moving data of mode M between a
15187 register and memory. A value of 2 is the default; this cost is
15188 relative to those in `REGISTER_MOVE_COST'.
15190 If moving between registers and memory is more expensive than
15191 between two registers, you should define this macro to express the
15194 Model also increased moving costs of QImode registers in non
15198 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
15200 if (FLOAT_CLASS_P (class))
15217 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15219 if (SSE_CLASS_P (class))
15222 switch (GET_MODE_SIZE (mode))
15236 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15238 if (MMX_CLASS_P (class))
15241 switch (GET_MODE_SIZE (mode))
15252 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15254 switch (GET_MODE_SIZE (mode))
15258 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15259 : ix86_cost->movzbl_load);
15261 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15262 : ix86_cost->int_store[0] + 4);
15265 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15267 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15268 if (mode == TFmode)
15270 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15271 * (((int) GET_MODE_SIZE (mode)
15272 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15276 /* Compute a (partial) cost for rtx X. Return true if the complete
15277 cost has been computed, and false if subexpressions should be
15278 scanned. In either case, *TOTAL contains the cost result. */
15281 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15283 enum machine_mode mode = GET_MODE (x);
15291 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15293 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15295 else if (flag_pic && SYMBOLIC_CONST (x)
15297 || (!GET_CODE (x) != LABEL_REF
15298 && (GET_CODE (x) != SYMBOL_REF
15299 || !SYMBOL_REF_LOCAL_P (x)))))
15306 if (mode == VOIDmode)
15309 switch (standard_80387_constant_p (x))
15314 default: /* Other constants */
15319 /* Start with (MEM (SYMBOL_REF)), since that's where
15320 it'll probably end up. Add a penalty for size. */
15321 *total = (COSTS_N_INSNS (1)
15322 + (flag_pic != 0 && !TARGET_64BIT)
15323 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15329 /* The zero extensions is often completely free on x86_64, so make
15330 it as cheap as possible. */
15331 if (TARGET_64BIT && mode == DImode
15332 && GET_MODE (XEXP (x, 0)) == SImode)
15334 else if (TARGET_ZERO_EXTEND_WITH_AND)
15335 *total = COSTS_N_INSNS (ix86_cost->add);
15337 *total = COSTS_N_INSNS (ix86_cost->movzx);
15341 *total = COSTS_N_INSNS (ix86_cost->movsx);
15345 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15346 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15348 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15351 *total = COSTS_N_INSNS (ix86_cost->add);
15354 if ((value == 2 || value == 3)
15355 && ix86_cost->lea <= ix86_cost->shift_const)
15357 *total = COSTS_N_INSNS (ix86_cost->lea);
15367 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15369 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15371 if (INTVAL (XEXP (x, 1)) > 32)
15372 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15374 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15378 if (GET_CODE (XEXP (x, 1)) == AND)
15379 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15381 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15386 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15387 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15389 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15394 if (FLOAT_MODE_P (mode))
15396 *total = COSTS_N_INSNS (ix86_cost->fmul);
15401 rtx op0 = XEXP (x, 0);
15402 rtx op1 = XEXP (x, 1);
15404 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15406 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15407 for (nbits = 0; value != 0; value &= value - 1)
15411 /* This is arbitrary. */
15414 /* Compute costs correctly for widening multiplication. */
15415 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15416 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15417 == GET_MODE_SIZE (mode))
15419 int is_mulwiden = 0;
15420 enum machine_mode inner_mode = GET_MODE (op0);
15422 if (GET_CODE (op0) == GET_CODE (op1))
15423 is_mulwiden = 1, op1 = XEXP (op1, 0);
15424 else if (GET_CODE (op1) == CONST_INT)
15426 if (GET_CODE (op0) == SIGN_EXTEND)
15427 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15430 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15434 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15437 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15438 + nbits * ix86_cost->mult_bit)
15439 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15448 if (FLOAT_MODE_P (mode))
15449 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15451 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15455 if (FLOAT_MODE_P (mode))
15456 *total = COSTS_N_INSNS (ix86_cost->fadd);
15457 else if (GET_MODE_CLASS (mode) == MODE_INT
15458 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15460 if (GET_CODE (XEXP (x, 0)) == PLUS
15461 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15462 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15463 && CONSTANT_P (XEXP (x, 1)))
15465 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15466 if (val == 2 || val == 4 || val == 8)
15468 *total = COSTS_N_INSNS (ix86_cost->lea);
15469 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15470 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15472 *total += rtx_cost (XEXP (x, 1), outer_code);
15476 else if (GET_CODE (XEXP (x, 0)) == MULT
15477 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15479 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15480 if (val == 2 || val == 4 || val == 8)
15482 *total = COSTS_N_INSNS (ix86_cost->lea);
15483 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15484 *total += rtx_cost (XEXP (x, 1), outer_code);
15488 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15490 *total = COSTS_N_INSNS (ix86_cost->lea);
15491 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15492 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15493 *total += rtx_cost (XEXP (x, 1), outer_code);
15500 if (FLOAT_MODE_P (mode))
15502 *total = COSTS_N_INSNS (ix86_cost->fadd);
15510 if (!TARGET_64BIT && mode == DImode)
15512 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15513 + (rtx_cost (XEXP (x, 0), outer_code)
15514 << (GET_MODE (XEXP (x, 0)) != DImode))
15515 + (rtx_cost (XEXP (x, 1), outer_code)
15516 << (GET_MODE (XEXP (x, 1)) != DImode)));
15522 if (FLOAT_MODE_P (mode))
15524 *total = COSTS_N_INSNS (ix86_cost->fchs);
15530 if (!TARGET_64BIT && mode == DImode)
15531 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15533 *total = COSTS_N_INSNS (ix86_cost->add);
15537 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15538 && XEXP (XEXP (x, 0), 1) == const1_rtx
15539 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15540 && XEXP (x, 1) == const0_rtx)
15542 /* This kind of construct is implemented using test[bwl].
15543 Treat it as if we had an AND. */
15544 *total = (COSTS_N_INSNS (ix86_cost->add)
15545 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15546 + rtx_cost (const1_rtx, outer_code));
15552 if (!TARGET_SSE_MATH
15554 || (mode == DFmode && !TARGET_SSE2))
15559 if (FLOAT_MODE_P (mode))
15560 *total = COSTS_N_INSNS (ix86_cost->fabs);
15564 if (FLOAT_MODE_P (mode))
15565 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15569 if (XINT (x, 1) == UNSPEC_TP)
15580 static int current_machopic_label_num;
15582 /* Given a symbol name and its associated stub, write out the
15583 definition of the stub. */
15586 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15588 unsigned int length;
15589 char *binder_name, *symbol_name, lazy_ptr_name[32];
15590 int label = ++current_machopic_label_num;
15592 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15593 symb = (*targetm.strip_name_encoding) (symb);
15595 length = strlen (stub);
15596 binder_name = alloca (length + 32);
15597 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15599 length = strlen (symb);
15600 symbol_name = alloca (length + 32);
15601 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15603 sprintf (lazy_ptr_name, "L%d$lz", label);
15606 machopic_picsymbol_stub_section ();
15608 machopic_symbol_stub_section ();
15610 fprintf (file, "%s:\n", stub);
15611 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15615 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15616 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15617 fprintf (file, "\tjmp %%edx\n");
15620 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15622 fprintf (file, "%s:\n", binder_name);
15626 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15627 fprintf (file, "\tpushl %%eax\n");
15630 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15632 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15634 machopic_lazy_symbol_ptr_section ();
15635 fprintf (file, "%s:\n", lazy_ptr_name);
15636 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15637 fprintf (file, "\t.long %s\n", binder_name);
15639 #endif /* TARGET_MACHO */
15641 /* Order the registers for register allocator. */
15644 x86_order_regs_for_local_alloc (void)
15649 /* First allocate the local general purpose registers. */
15650 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15651 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15652 reg_alloc_order [pos++] = i;
15654 /* Global general purpose registers. */
15655 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15656 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15657 reg_alloc_order [pos++] = i;
15659 /* x87 registers come first in case we are doing FP math
15661 if (!TARGET_SSE_MATH)
15662 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15663 reg_alloc_order [pos++] = i;
15665 /* SSE registers. */
15666 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15667 reg_alloc_order [pos++] = i;
15668 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15669 reg_alloc_order [pos++] = i;
15671 /* x87 registers. */
15672 if (TARGET_SSE_MATH)
15673 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15674 reg_alloc_order [pos++] = i;
15676 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15677 reg_alloc_order [pos++] = i;
15679 /* Initialize the rest of array as we do not allocate some registers
15681 while (pos < FIRST_PSEUDO_REGISTER)
15682 reg_alloc_order [pos++] = 0;
15685 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15686 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15689 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15690 struct attribute_spec.handler. */
15692 ix86_handle_struct_attribute (tree *node, tree name,
15693 tree args ATTRIBUTE_UNUSED,
15694 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15697 if (DECL_P (*node))
15699 if (TREE_CODE (*node) == TYPE_DECL)
15700 type = &TREE_TYPE (*node);
15705 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15706 || TREE_CODE (*type) == UNION_TYPE)))
15708 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
15709 *no_add_attrs = true;
15712 else if ((is_attribute_p ("ms_struct", name)
15713 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15714 || ((is_attribute_p ("gcc_struct", name)
15715 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15717 warning ("%qs incompatible attribute ignored",
15718 IDENTIFIER_POINTER (name));
15719 *no_add_attrs = true;
15726 ix86_ms_bitfield_layout_p (tree record_type)
15728 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15729 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15730 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15733 /* Returns an expression indicating where the this parameter is
15734 located on entry to the FUNCTION. */
15737 x86_this_parameter (tree function)
15739 tree type = TREE_TYPE (function);
15743 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15744 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15747 if (ix86_function_regparm (type, function) > 0)
15751 parm = TYPE_ARG_TYPES (type);
15752 /* Figure out whether or not the function has a variable number of
15754 for (; parm; parm = TREE_CHAIN (parm))
15755 if (TREE_VALUE (parm) == void_type_node)
15757 /* If not, the this parameter is in the first argument. */
15761 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15763 return gen_rtx_REG (SImode, regno);
15767 if (aggregate_value_p (TREE_TYPE (type), type))
15768 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15770 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15773 /* Determine whether x86_output_mi_thunk can succeed. */
15776 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15777 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15778 HOST_WIDE_INT vcall_offset, tree function)
15780 /* 64-bit can handle anything. */
15784 /* For 32-bit, everything's fine if we have one free register. */
15785 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15788 /* Need a free register for vcall_offset. */
15792 /* Need a free register for GOT references. */
15793 if (flag_pic && !(*targetm.binds_local_p) (function))
15796 /* Otherwise ok. */
15800 /* Output the assembler code for a thunk function. THUNK_DECL is the
15801 declaration for the thunk function itself, FUNCTION is the decl for
15802 the target function. DELTA is an immediate constant offset to be
15803 added to THIS. If VCALL_OFFSET is nonzero, the word at
15804 *(*this + vcall_offset) should be added to THIS. */
15807 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15808 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15809 HOST_WIDE_INT vcall_offset, tree function)
15812 rtx this = x86_this_parameter (function);
15815 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15816 pull it in now and let DELTA benefit. */
15819 else if (vcall_offset)
15821 /* Put the this parameter into %eax. */
15823 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15824 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15827 this_reg = NULL_RTX;
15829 /* Adjust the this parameter by a fixed constant. */
15832 xops[0] = GEN_INT (delta);
15833 xops[1] = this_reg ? this_reg : this;
15836 if (!x86_64_general_operand (xops[0], DImode))
15838 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15840 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15844 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15847 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15850 /* Adjust the this parameter by a value stored in the vtable. */
15854 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15857 int tmp_regno = 2 /* ECX */;
15858 if (lookup_attribute ("fastcall",
15859 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15860 tmp_regno = 0 /* EAX */;
15861 tmp = gen_rtx_REG (SImode, tmp_regno);
15864 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15867 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15869 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15871 /* Adjust the this parameter. */
15872 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15873 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15875 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15876 xops[0] = GEN_INT (vcall_offset);
15878 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15879 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15881 xops[1] = this_reg;
15883 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15885 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15888 /* If necessary, drop THIS back to its stack slot. */
15889 if (this_reg && this_reg != this)
15891 xops[0] = this_reg;
15893 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15896 xops[0] = XEXP (DECL_RTL (function), 0);
15899 if (!flag_pic || (*targetm.binds_local_p) (function))
15900 output_asm_insn ("jmp\t%P0", xops);
15903 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15904 tmp = gen_rtx_CONST (Pmode, tmp);
15905 tmp = gen_rtx_MEM (QImode, tmp);
15907 output_asm_insn ("jmp\t%A0", xops);
15912 if (!flag_pic || (*targetm.binds_local_p) (function))
15913 output_asm_insn ("jmp\t%P0", xops);
15918 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15919 tmp = (gen_rtx_SYMBOL_REF
15921 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15922 tmp = gen_rtx_MEM (QImode, tmp);
15924 output_asm_insn ("jmp\t%0", xops);
15927 #endif /* TARGET_MACHO */
15929 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15930 output_set_got (tmp);
15933 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15934 output_asm_insn ("jmp\t{*}%1", xops);
15940 x86_file_start (void)
15942 default_file_start ();
15943 if (X86_FILE_START_VERSION_DIRECTIVE)
15944 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15945 if (X86_FILE_START_FLTUSED)
15946 fputs ("\t.global\t__fltused\n", asm_out_file);
15947 if (ix86_asm_dialect == ASM_INTEL)
15948 fputs ("\t.intel_syntax\n", asm_out_file);
15952 x86_field_alignment (tree field, int computed)
15954 enum machine_mode mode;
15955 tree type = TREE_TYPE (field);
15957 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15959 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15960 ? get_inner_array_type (type) : type);
15961 if (mode == DFmode || mode == DCmode
15962 || GET_MODE_CLASS (mode) == MODE_INT
15963 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15964 return MIN (32, computed);
15968 /* Output assembler code to FILE to increment profiler label # LABELNO
15969 for profiling a function entry. */
15971 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15976 #ifndef NO_PROFILE_COUNTERS
15977 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15979 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15983 #ifndef NO_PROFILE_COUNTERS
15984 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15986 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15990 #ifndef NO_PROFILE_COUNTERS
15991 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15992 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15994 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15998 #ifndef NO_PROFILE_COUNTERS
15999 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
16000 PROFILE_COUNT_REGISTER);
16002 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16006 /* We don't have exact information about the insn sizes, but we may assume
16007 quite safely that we are informed about all 1 byte insns and memory
16008 address sizes. This is enough to eliminate unnecessary padding in
16012 min_insn_size (rtx insn)
16016 if (!INSN_P (insn) || !active_insn_p (insn))
16019 /* Discard alignments we've emit and jump instructions. */
16020 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
16021 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
16023 if (GET_CODE (insn) == JUMP_INSN
16024 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
16025 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
16028 /* Important case - calls are always 5 bytes.
16029 It is common to have many calls in the row. */
16030 if (GET_CODE (insn) == CALL_INSN
16031 && symbolic_reference_mentioned_p (PATTERN (insn))
16032 && !SIBLING_CALL_P (insn))
16034 if (get_attr_length (insn) <= 1)
16037 /* For normal instructions we may rely on the sizes of addresses
16038 and the presence of symbol to require 4 bytes of encoding.
16039 This is not the case for jumps where references are PC relative. */
16040 if (GET_CODE (insn) != JUMP_INSN)
16042 l = get_attr_length_address (insn);
16043 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16052 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16056 ix86_avoid_jump_misspredicts (void)
16058 rtx insn, start = get_insns ();
16059 int nbytes = 0, njumps = 0;
16062 /* Look for all minimal intervals of instructions containing 4 jumps.
16063 The intervals are bounded by START and INSN. NBYTES is the total
16064 size of instructions in the interval including INSN and not including
16065 START. When the NBYTES is smaller than 16 bytes, it is possible
16066 that the end of START and INSN ends up in the same 16byte page.
16068 The smallest offset in the page INSN can start is the case where START
16069 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16070 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16072 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16075 nbytes += min_insn_size (insn);
16077 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16078 INSN_UID (insn), min_insn_size (insn));
16079 if ((GET_CODE (insn) == JUMP_INSN
16080 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16081 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16082 || GET_CODE (insn) == CALL_INSN)
16089 start = NEXT_INSN (start);
16090 if ((GET_CODE (start) == JUMP_INSN
16091 && GET_CODE (PATTERN (start)) != ADDR_VEC
16092 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16093 || GET_CODE (start) == CALL_INSN)
16094 njumps--, isjump = 1;
16097 nbytes -= min_insn_size (start);
16102 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16103 INSN_UID (start), INSN_UID (insn), nbytes);
16105 if (njumps == 3 && isjump && nbytes < 16)
16107 int padsize = 15 - nbytes + min_insn_size (insn);
16110 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16111 INSN_UID (insn), padsize);
16112 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16117 /* AMD Athlon works faster
16118 when RET is not destination of conditional jump or directly preceded
16119 by other jump instruction. We avoid the penalty by inserting NOP just
16120 before the RET instructions in such cases. */
16122 ix86_pad_returns (void)
16127 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16129 basic_block bb = e->src;
16130 rtx ret = BB_END (bb);
16132 bool replace = false;
16134 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16135 || !maybe_hot_bb_p (bb))
16137 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16138 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16140 if (prev && GET_CODE (prev) == CODE_LABEL)
16145 FOR_EACH_EDGE (e, ei, bb->preds)
16146 if (EDGE_FREQUENCY (e) && e->src->index >= 0
16147 && !(e->flags & EDGE_FALLTHRU))
16152 prev = prev_active_insn (ret);
16154 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
16155 || GET_CODE (prev) == CALL_INSN))
16157 /* Empty functions get branch mispredict even when the jump destination
16158 is not visible to us. */
16159 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
16164 emit_insn_before (gen_return_internal_long (), ret);
16170 /* Implement machine specific optimizations. We implement padding of returns
16171 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16175 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
16176 ix86_pad_returns ();
16177 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
16178 ix86_avoid_jump_misspredicts ();
16181 /* Return nonzero when QImode register that must be represented via REX prefix
16184 x86_extended_QIreg_mentioned_p (rtx insn)
16187 extract_insn_cached (insn);
16188 for (i = 0; i < recog_data.n_operands; i++)
16189 if (REG_P (recog_data.operand[i])
16190 && REGNO (recog_data.operand[i]) >= 4)
16195 /* Return nonzero when P points to register encoded via REX prefix.
16196 Called via for_each_rtx. */
16198 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
16200 unsigned int regno;
16203 regno = REGNO (*p);
16204 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
16207 /* Return true when INSN mentions register that must be encoded using REX
16210 x86_extended_reg_mentioned_p (rtx insn)
16212 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
16215 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16216 optabs would emit if we didn't have TFmode patterns. */
16219 x86_emit_floatuns (rtx operands[2])
16221 rtx neglab, donelab, i0, i1, f0, in, out;
16222 enum machine_mode mode, inmode;
16224 inmode = GET_MODE (operands[1]);
16225 if (inmode != SImode
16226 && inmode != DImode)
16230 in = force_reg (inmode, operands[1]);
16231 mode = GET_MODE (out);
16232 neglab = gen_label_rtx ();
16233 donelab = gen_label_rtx ();
16234 i1 = gen_reg_rtx (Pmode);
16235 f0 = gen_reg_rtx (mode);
16237 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16239 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16240 emit_jump_insn (gen_jump (donelab));
16243 emit_label (neglab);
16245 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16246 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16247 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16248 expand_float (f0, i0, 0);
16249 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16251 emit_label (donelab);
16254 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16255 with all elements equal to VAR. Return true if successful. */
16258 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16259 rtx target, rtx val)
16261 enum machine_mode smode, wsmode, wvmode;
16268 if (!mmx_ok && !TARGET_SSE)
16276 val = force_reg (GET_MODE_INNER (mode), val);
16277 x = gen_rtx_VEC_DUPLICATE (mode, val);
16278 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16284 if (TARGET_SSE || TARGET_3DNOW_A)
16286 val = gen_lowpart (SImode, val);
16287 x = gen_rtx_TRUNCATE (HImode, val);
16288 x = gen_rtx_VEC_DUPLICATE (mode, x);
16289 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16318 /* Replicate the value once into the next wider mode and recurse. */
16319 val = convert_modes (wsmode, smode, val, true);
16320 x = expand_simple_binop (wsmode, ASHIFT, val,
16321 GEN_INT (GET_MODE_BITSIZE (smode)),
16322 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16323 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16325 x = gen_reg_rtx (wvmode);
16326 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16327 gcc_unreachable ();
16328 emit_move_insn (target, gen_lowpart (mode, x));
16336 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16337 whose low element is VAR, and other elements are zero. Return true
16341 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16342 rtx target, rtx var)
16344 enum machine_mode vsimode;
16351 if (!mmx_ok && !TARGET_SSE)
16357 var = force_reg (GET_MODE_INNER (mode), var);
16358 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16359 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16364 var = force_reg (GET_MODE_INNER (mode), var);
16365 x = gen_rtx_VEC_DUPLICATE (mode, var);
16366 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16367 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16372 vsimode = V4SImode;
16378 vsimode = V2SImode;
16381 /* Zero extend the variable element to SImode and recurse. */
16382 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16384 x = gen_reg_rtx (vsimode);
16385 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16386 gcc_unreachable ();
16388 emit_move_insn (target, gen_lowpart (mode, x));
16396 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16397 consisting of the values in VALS. It is known that all elements
16398 except ONE_VAR are constants. Return true if successful. */
16401 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16402 rtx target, rtx vals, int one_var)
16404 rtx var = XVECEXP (vals, 0, one_var);
16405 enum machine_mode wmode;
16408 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16409 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16417 /* For the two element vectors, it's just as easy to use
16418 the general case. */
16434 /* There's no way to set one QImode entry easily. Combine
16435 the variable value with its adjacent constant value, and
16436 promote to an HImode set. */
16437 x = XVECEXP (vals, 0, one_var ^ 1);
16440 var = convert_modes (HImode, QImode, var, true);
16441 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16442 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16443 x = GEN_INT (INTVAL (x) & 0xff);
16447 var = convert_modes (HImode, QImode, var, true);
16448 x = gen_int_mode (INTVAL (x) << 8, HImode);
16450 if (x != const0_rtx)
16451 var = expand_simple_binop (HImode, IOR, var, x, var,
16452 1, OPTAB_LIB_WIDEN);
16454 x = gen_reg_rtx (wmode);
16455 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16456 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
16458 emit_move_insn (target, gen_lowpart (mode, x));
16465 emit_move_insn (target, const_vec);
16466 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16470 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16471 all values variable, and none identical. */
16474 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16475 rtx target, rtx vals)
16477 enum machine_mode half_mode = GET_MODE_INNER (mode);
16478 rtx op0 = NULL, op1 = NULL;
16479 bool use_vec_concat = false;
16485 if (!mmx_ok && !TARGET_SSE)
16491 /* For the two element vectors, we always implement VEC_CONCAT. */
16492 op0 = XVECEXP (vals, 0, 0);
16493 op1 = XVECEXP (vals, 0, 1);
16494 use_vec_concat = true;
16498 half_mode = V2SFmode;
16501 half_mode = V2SImode;
16507 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16508 Recurse to load the two halves. */
16510 op0 = gen_reg_rtx (half_mode);
16511 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16512 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16514 op1 = gen_reg_rtx (half_mode);
16515 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16516 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16518 use_vec_concat = true;
16529 gcc_unreachable ();
16532 if (use_vec_concat)
16534 if (!register_operand (op0, half_mode))
16535 op0 = force_reg (half_mode, op0);
16536 if (!register_operand (op1, half_mode))
16537 op1 = force_reg (half_mode, op1);
16539 emit_insn (gen_rtx_SET (VOIDmode, target,
16540 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16544 int i, j, n_elts, n_words, n_elt_per_word;
16545 enum machine_mode inner_mode;
16546 rtx words[4], shift;
16548 inner_mode = GET_MODE_INNER (mode);
16549 n_elts = GET_MODE_NUNITS (mode);
16550 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16551 n_elt_per_word = n_elts / n_words;
16552 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16554 for (i = 0; i < n_words; ++i)
16556 rtx word = NULL_RTX;
16558 for (j = 0; j < n_elt_per_word; ++j)
16560 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16561 elt = convert_modes (word_mode, inner_mode, elt, true);
16567 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16568 word, 1, OPTAB_LIB_WIDEN);
16569 word = expand_simple_binop (word_mode, IOR, word, elt,
16570 word, 1, OPTAB_LIB_WIDEN);
16578 emit_move_insn (target, gen_lowpart (mode, words[0]));
16579 else if (n_words == 2)
16581 rtx tmp = gen_reg_rtx (mode);
16582 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16583 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16584 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16585 emit_move_insn (target, tmp);
16587 else if (n_words == 4)
16589 rtx tmp = gen_reg_rtx (V4SImode);
16590 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16591 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
16592 emit_move_insn (target, gen_lowpart (mode, tmp));
16595 gcc_unreachable ();
16599 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16600 instructions unless MMX_OK is true. */
16603 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16605 enum machine_mode mode = GET_MODE (target);
16606 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16607 int n_elts = GET_MODE_NUNITS (mode);
16608 int n_var = 0, one_var = -1;
16609 bool all_same = true, all_const_zero = true;
16613 for (i = 0; i < n_elts; ++i)
16615 x = XVECEXP (vals, 0, i);
16616 if (!CONSTANT_P (x))
16617 n_var++, one_var = i;
16618 else if (x != CONST0_RTX (inner_mode))
16619 all_const_zero = false;
16620 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16624 /* Constants are best loaded from the constant pool. */
16627 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16631 /* If all values are identical, broadcast the value. */
16633 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16634 XVECEXP (vals, 0, 0)))
16637 /* Values where only one field is non-constant are best loaded from
16638 the pool and overwritten via move later. */
16641 if (all_const_zero && one_var == 0
16642 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16643 XVECEXP (vals, 0, 0)))
16646 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16650 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16654 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16656 enum machine_mode mode = GET_MODE (target);
16657 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16658 bool use_vec_merge = false;
16667 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
16668 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
16670 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
16672 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
16673 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16683 /* For the two element vectors, we implement a VEC_CONCAT with
16684 the extraction of the other element. */
16686 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
16687 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
16690 op0 = val, op1 = tmp;
16692 op0 = tmp, op1 = val;
16694 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
16695 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16703 use_vec_merge = true;
16707 /* tmp = op0 = A B C D */
16708 tmp = copy_to_reg (target);
16710 /* op0 = C C D D */
16711 emit_insn (gen_sse_unpcklps (target, target, target));
16713 /* op0 = C C D X */
16714 ix86_expand_vector_set (false, target, val, 0);
16716 /* op0 = A B X D */
16717 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16718 GEN_INT (1), GEN_INT (0),
16719 GEN_INT (2+4), GEN_INT (3+4)));
16723 tmp = copy_to_reg (target);
16724 ix86_expand_vector_set (false, target, val, 0);
16725 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16726 GEN_INT (0), GEN_INT (1),
16727 GEN_INT (0+4), GEN_INT (3+4)));
16731 tmp = copy_to_reg (target);
16732 ix86_expand_vector_set (false, target, val, 0);
16733 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16734 GEN_INT (0), GEN_INT (1),
16735 GEN_INT (2+4), GEN_INT (0+4)));
16739 gcc_unreachable ();
16744 /* Element 0 handled by vec_merge below. */
16747 use_vec_merge = true;
16753 /* With SSE2, use integer shuffles to swap element 0 and ELT,
16754 store into element 0, then shuffle them back. */
16758 order[0] = GEN_INT (elt);
16759 order[1] = const1_rtx;
16760 order[2] = const2_rtx;
16761 order[3] = GEN_INT (3);
16762 order[elt] = const0_rtx;
16764 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16765 order[1], order[2], order[3]));
16767 ix86_expand_vector_set (false, target, val, 0);
16769 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16770 order[1], order[2], order[3]));
16774 /* For SSE1, we have to reuse the V4SF code. */
16775 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
16776 gen_lowpart (SFmode, val), elt);
16781 use_vec_merge = TARGET_SSE2;
16784 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16795 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
16796 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
16797 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16801 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16803 emit_move_insn (mem, target);
16805 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16806 emit_move_insn (tmp, val);
16808 emit_move_insn (target, mem);
16813 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
16815 enum machine_mode mode = GET_MODE (vec);
16816 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16817 bool use_vec_extr = false;
16830 use_vec_extr = true;
16842 tmp = gen_reg_rtx (mode);
16843 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
16844 GEN_INT (elt), GEN_INT (elt),
16845 GEN_INT (elt+4), GEN_INT (elt+4)));
16849 tmp = gen_reg_rtx (mode);
16850 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
16854 gcc_unreachable ();
16857 use_vec_extr = true;
16872 tmp = gen_reg_rtx (mode);
16873 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
16874 GEN_INT (elt), GEN_INT (elt),
16875 GEN_INT (elt), GEN_INT (elt)));
16879 tmp = gen_reg_rtx (mode);
16880 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
16884 gcc_unreachable ();
16887 use_vec_extr = true;
16892 /* For SSE1, we have to reuse the V4SF code. */
16893 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
16894 gen_lowpart (V4SFmode, vec), elt);
16900 use_vec_extr = TARGET_SSE2;
16903 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16908 /* ??? Could extract the appropriate HImode element and shift. */
16915 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
16916 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
16918 /* Let the rtl optimizers know about the zero extension performed. */
16919 if (inner_mode == HImode)
16921 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
16922 target = gen_lowpart (SImode, target);
16925 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16929 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16931 emit_move_insn (mem, vec);
16933 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16934 emit_move_insn (target, tmp);
16938 /* Implements target hook vector_mode_supported_p. */
16940 ix86_vector_mode_supported_p (enum machine_mode mode)
16942 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
16944 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
16946 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
16948 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
16953 /* Worker function for TARGET_MD_ASM_CLOBBERS.
16955 We do this in the new i386 backend to maintain source compatibility
16956 with the old cc0-based compiler. */
16959 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
16960 tree inputs ATTRIBUTE_UNUSED,
16963 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
16965 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
16967 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
16972 /* Worker function for REVERSE_CONDITION. */
16975 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
16977 return (mode != CCFPmode && mode != CCFPUmode
16978 ? reverse_condition (code)
16979 : reverse_condition_maybe_unordered (code));
16982 /* Output code to perform an x87 FP register move, from OPERANDS[1]
16986 output_387_reg_move (rtx insn, rtx *operands)
16988 if (REG_P (operands[1])
16989 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16991 if (REGNO (operands[0]) == FIRST_STACK_REG
16992 && TARGET_USE_FFREEP)
16993 return "ffreep\t%y0";
16994 return "fstp\t%y0";
16996 if (STACK_TOP_P (operands[0]))
16997 return "fld%z1\t%y1";
17001 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17002 FP status register is set. */
17005 ix86_emit_fp_unordered_jump (rtx label)
17007 rtx reg = gen_reg_rtx (HImode);
17010 emit_insn (gen_x86_fnstsw_1 (reg));
17012 if (TARGET_USE_SAHF)
17014 emit_insn (gen_x86_sahf_1 (reg));
17016 temp = gen_rtx_REG (CCmode, FLAGS_REG);
17017 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
17021 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
17023 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17024 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
17027 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
17028 gen_rtx_LABEL_REF (VOIDmode, label),
17030 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
17031 emit_jump_insn (temp);
17034 /* Output code to perform a log1p XFmode calculation. */
17036 void ix86_emit_i387_log1p (rtx op0, rtx op1)
17038 rtx label1 = gen_label_rtx ();
17039 rtx label2 = gen_label_rtx ();
17041 rtx tmp = gen_reg_rtx (XFmode);
17042 rtx tmp2 = gen_reg_rtx (XFmode);
17044 emit_insn (gen_absxf2 (tmp, op1));
17045 emit_insn (gen_cmpxf (tmp,
17046 CONST_DOUBLE_FROM_REAL_VALUE (
17047 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
17049 emit_jump_insn (gen_bge (label1));
17051 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17052 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17053 emit_jump (label2);
17055 emit_label (label1);
17056 emit_move_insn (tmp, CONST1_RTX (XFmode));
17057 emit_insn (gen_addxf3 (tmp, op1, tmp));
17058 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17059 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17061 emit_label (label2);
17064 /* Solaris named-section hook. Parameters are as for
17065 named_section_real. */
17068 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17071 /* With Binutils 2.15, the "@unwind" marker must be specified on
17072 every occurrence of the ".eh_frame" section, not just the first
17075 && strcmp (name, ".eh_frame") == 0)
17077 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
17078 flags & SECTION_WRITE ? "aw" : "a");
17081 default_elf_asm_named_section (name, flags, decl);
17084 #include "gt-i386.h"