1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_fisttp = m_NOCONA;
529 const int x86_3dnow_a = m_ATHLON_K8;
530 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints = 0;
536 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
537 const int x86_partial_reg_stall = m_PPRO;
538 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
539 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
540 const int x86_use_mov0 = m_K6;
541 const int x86_use_cltd = ~(m_PENT | m_K6);
542 const int x86_read_modify_write = ~m_PENT;
543 const int x86_read_modify = ~(m_PENT | m_PPRO);
544 const int x86_split_long_moves = m_PPRO;
545 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
546 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
547 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
548 const int x86_qimode_math = ~(0);
549 const int x86_promote_qi_regs = 0;
550 const int x86_himode_math = ~(m_PPRO);
551 const int x86_promote_hi_regs = m_PPRO;
552 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
553 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
556 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
557 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
560 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
563 const int x86_shift1 = ~m_486;
564 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
565 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
566 /* Set for machines where the type and dependencies are resolved on SSE
567 register parts instead of whole registers, so we may maintain just
568 lower part of scalar values in proper format leaving the upper part
570 const int x86_sse_split_regs = m_ATHLON_K8;
571 const int x86_sse_typeless_stores = m_ATHLON_K8;
572 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
573 const int x86_use_ffreep = m_ATHLON_K8;
574 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
577 integer data in xmm registers. Which results in pretty abysmal code. */
578 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
580 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
581 /* Some CPU cores are not able to predict more than 4 branch instructions in
582 the 16 byte window. */
583 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
584 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
585 const int x86_use_bt = m_ATHLON_K8;
586 /* Compare and exchange was added for 80486. */
587 const int x86_cmpxchg = ~m_386;
588 /* Exchange and add was added for 80486. */
589 const int x86_xadd = ~m_386;
591 /* In case the average insn count for single function invocation is
592 lower than this constant, emit fast (but longer) prologue and
594 #define FAST_PROLOGUE_INSN_COUNT 20
596 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
597 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
598 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
599 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
601 /* Array of the smallest class containing reg number REGNO, indexed by
602 REGNO. Used by REGNO_REG_CLASS in i386.h. */
604 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
607 AREG, DREG, CREG, BREG,
609 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
611 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
612 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
615 /* flags, fpsr, dirflag, frame */
616 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
617 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
619 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
621 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
622 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
627 /* The "default" register map used in 32bit mode. */
629 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
631 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
632 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
633 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
634 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
635 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
636 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
637 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
640 static int const x86_64_int_parameter_registers[6] =
642 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
643 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
646 static int const x86_64_int_return_registers[4] =
648 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
651 /* The "default" register map used in 64bit mode. */
652 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
654 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
655 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
656 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
657 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
658 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
659 8,9,10,11,12,13,14,15, /* extended integer registers */
660 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
663 /* Define the register numbers to be used in Dwarf debugging information.
664 The SVR4 reference port C compiler uses the following register numbers
665 in its Dwarf output code:
666 0 for %eax (gcc regno = 0)
667 1 for %ecx (gcc regno = 2)
668 2 for %edx (gcc regno = 1)
669 3 for %ebx (gcc regno = 3)
670 4 for %esp (gcc regno = 7)
671 5 for %ebp (gcc regno = 6)
672 6 for %esi (gcc regno = 4)
673 7 for %edi (gcc regno = 5)
674 The following three DWARF register numbers are never generated by
675 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
676 believes these numbers have these meanings.
677 8 for %eip (no gcc equivalent)
678 9 for %eflags (gcc regno = 17)
679 10 for %trapno (no gcc equivalent)
680 It is not at all clear how we should number the FP stack registers
681 for the x86 architecture. If the version of SDB on x86/svr4 were
682 a bit less brain dead with respect to floating-point then we would
683 have a precedent to follow with respect to DWARF register numbers
684 for x86 FP registers, but the SDB on x86/svr4 is so completely
685 broken with respect to FP registers that it is hardly worth thinking
686 of it as something to strive for compatibility with.
687 The version of x86/svr4 SDB I have at the moment does (partially)
688 seem to believe that DWARF register number 11 is associated with
689 the x86 register %st(0), but that's about all. Higher DWARF
690 register numbers don't seem to be associated with anything in
691 particular, and even for DWARF regno 11, SDB only seems to under-
692 stand that it should say that a variable lives in %st(0) (when
693 asked via an `=' command) if we said it was in DWARF regno 11,
694 but SDB still prints garbage when asked for the value of the
695 variable in question (via a `/' command).
696 (Also note that the labels SDB prints for various FP stack regs
697 when doing an `x' command are all wrong.)
698 Note that these problems generally don't affect the native SVR4
699 C compiler because it doesn't allow the use of -O with -g and
700 because when it is *not* optimizing, it allocates a memory
701 location for each floating-point variable, and the memory
702 location is what gets described in the DWARF AT_location
703 attribute for the variable in question.
704 Regardless of the severe mental illness of the x86/svr4 SDB, we
705 do something sensible here and we use the following DWARF
706 register numbers. Note that these are all stack-top-relative
708 11 for %st(0) (gcc regno = 8)
709 12 for %st(1) (gcc regno = 9)
710 13 for %st(2) (gcc regno = 10)
711 14 for %st(3) (gcc regno = 11)
712 15 for %st(4) (gcc regno = 12)
713 16 for %st(5) (gcc regno = 13)
714 17 for %st(6) (gcc regno = 14)
715 18 for %st(7) (gcc regno = 15)
717 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
719 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
720 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
721 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
722 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
723 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
724 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
725 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
728 /* Test and compare insns in i386.md store the information needed to
729 generate branch and scc insns here. */
731 rtx ix86_compare_op0 = NULL_RTX;
732 rtx ix86_compare_op1 = NULL_RTX;
733 rtx ix86_compare_emitted = NULL_RTX;
735 /* Size of the register save area. */
736 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
738 /* Define the structure for the machine field in struct function. */
740 struct stack_local_entry GTY(())
745 struct stack_local_entry *next;
748 /* Structure describing stack frame layout.
749 Stack grows downward:
755 saved frame pointer if frame_pointer_needed
756 <- HARD_FRAME_POINTER
762 > to_allocate <- FRAME_POINTER
774 int outgoing_arguments_size;
777 HOST_WIDE_INT to_allocate;
778 /* The offsets relative to ARG_POINTER. */
779 HOST_WIDE_INT frame_pointer_offset;
780 HOST_WIDE_INT hard_frame_pointer_offset;
781 HOST_WIDE_INT stack_pointer_offset;
783 /* When save_regs_using_mov is set, emit prologue using
784 move instead of push instructions. */
785 bool save_regs_using_mov;
788 /* Code model option. */
789 enum cmodel ix86_cmodel;
791 enum asm_dialect ix86_asm_dialect = ASM_ATT;
793 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
795 /* Which unit we are generating floating point math for. */
796 enum fpmath_unit ix86_fpmath;
798 /* Which cpu are we scheduling for. */
799 enum processor_type ix86_tune;
800 /* Which instruction set architecture to use. */
801 enum processor_type ix86_arch;
803 /* true if sse prefetch instruction is not NOOP. */
804 int x86_prefetch_sse;
806 /* ix86_regparm_string as a number */
807 static int ix86_regparm;
809 /* Preferred alignment for stack boundary in bits. */
810 unsigned int ix86_preferred_stack_boundary;
812 /* Values 1-5: see jump.c */
813 int ix86_branch_cost;
815 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
816 char internal_label_prefix[16];
817 int internal_label_prefix_len;
819 static bool ix86_handle_option (size_t, const char *, int);
820 static void output_pic_addr_const (FILE *, rtx, int);
821 static void put_condition_code (enum rtx_code, enum machine_mode,
823 static const char *get_some_local_dynamic_name (void);
824 static int get_some_local_dynamic_name_1 (rtx *, void *);
825 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
826 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
828 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
829 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
831 static rtx get_thread_pointer (int);
832 static rtx legitimize_tls_address (rtx, enum tls_model, int);
833 static void get_pc_thunk_name (char [32], unsigned int);
834 static rtx gen_push (rtx);
835 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
836 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
837 static struct machine_function * ix86_init_machine_status (void);
838 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
839 static int ix86_nsaved_regs (void);
840 static void ix86_emit_save_regs (void);
841 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
842 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
843 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
844 static HOST_WIDE_INT ix86_GOT_alias_set (void);
845 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
846 static rtx ix86_expand_aligntest (rtx, int);
847 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
848 static int ix86_issue_rate (void);
849 static int ix86_adjust_cost (rtx, rtx, rtx, int);
850 static int ia32_multipass_dfa_lookahead (void);
851 static void ix86_init_mmx_sse_builtins (void);
852 static rtx x86_this_parameter (tree);
853 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
854 HOST_WIDE_INT, tree);
855 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
856 static void x86_file_start (void);
857 static void ix86_reorg (void);
858 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
859 static tree ix86_build_builtin_va_list (void);
860 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
862 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
863 static bool ix86_vector_mode_supported_p (enum machine_mode);
865 static int ix86_address_cost (rtx);
866 static bool ix86_cannot_force_const_mem (rtx);
867 static rtx ix86_delegitimize_address (rtx);
869 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
871 struct builtin_description;
872 static rtx ix86_expand_sse_comi (const struct builtin_description *,
874 static rtx ix86_expand_sse_compare (const struct builtin_description *,
876 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
877 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
878 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
879 static rtx ix86_expand_store_builtin (enum insn_code, tree);
880 static rtx safe_vector_operand (rtx, enum machine_mode);
881 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
882 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
883 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
884 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
885 static int ix86_fp_comparison_cost (enum rtx_code code);
886 static unsigned int ix86_select_alt_pic_regnum (void);
887 static int ix86_save_reg (unsigned int, int);
888 static void ix86_compute_frame_layout (struct ix86_frame *);
889 static int ix86_comp_type_attributes (tree, tree);
890 static int ix86_function_regparm (tree, tree);
891 const struct attribute_spec ix86_attribute_table[];
892 static bool ix86_function_ok_for_sibcall (tree, tree);
893 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
894 static int ix86_value_regno (enum machine_mode, tree);
895 static bool contains_128bit_aligned_vector_p (tree);
896 static rtx ix86_struct_value_rtx (tree, int);
897 static bool ix86_ms_bitfield_layout_p (tree);
898 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
899 static int extended_reg_mentioned_1 (rtx *, void *);
900 static bool ix86_rtx_costs (rtx, int, int, int *);
901 static int min_insn_size (rtx);
902 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
903 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
904 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
906 static void ix86_init_builtins (void);
907 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
908 static const char *ix86_mangle_fundamental_type (tree);
909 static tree ix86_stack_protect_fail (void);
911 /* This function is only used on Solaris. */
912 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
915 /* Register class used for passing given 64bit part of the argument.
916 These represent classes as documented by the PS ABI, with the exception
917 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
918 use SF or DFmode move instead of DImode to avoid reformatting penalties.
920 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
921 whenever possible (upper half does contain padding).
923 enum x86_64_reg_class
926 X86_64_INTEGER_CLASS,
927 X86_64_INTEGERSI_CLASS,
934 X86_64_COMPLEX_X87_CLASS,
937 static const char * const x86_64_reg_class_name[] = {
938 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
939 "sseup", "x87", "x87up", "cplx87", "no"
942 #define MAX_CLASSES 4
944 /* Table of constants used by fldpi, fldln2, etc.... */
945 static REAL_VALUE_TYPE ext_80387_constants_table [5];
946 static bool ext_80387_constants_init = 0;
947 static void init_ext_80387_constants (void);
949 /* Initialize the GCC target structure. */
950 #undef TARGET_ATTRIBUTE_TABLE
951 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
952 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
953 # undef TARGET_MERGE_DECL_ATTRIBUTES
954 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
957 #undef TARGET_COMP_TYPE_ATTRIBUTES
958 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
960 #undef TARGET_INIT_BUILTINS
961 #define TARGET_INIT_BUILTINS ix86_init_builtins
962 #undef TARGET_EXPAND_BUILTIN
963 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
965 #undef TARGET_ASM_FUNCTION_EPILOGUE
966 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
968 #undef TARGET_ASM_OPEN_PAREN
969 #define TARGET_ASM_OPEN_PAREN ""
970 #undef TARGET_ASM_CLOSE_PAREN
971 #define TARGET_ASM_CLOSE_PAREN ""
973 #undef TARGET_ASM_ALIGNED_HI_OP
974 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
975 #undef TARGET_ASM_ALIGNED_SI_OP
976 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
978 #undef TARGET_ASM_ALIGNED_DI_OP
979 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
982 #undef TARGET_ASM_UNALIGNED_HI_OP
983 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
984 #undef TARGET_ASM_UNALIGNED_SI_OP
985 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
986 #undef TARGET_ASM_UNALIGNED_DI_OP
987 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
989 #undef TARGET_SCHED_ADJUST_COST
990 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
991 #undef TARGET_SCHED_ISSUE_RATE
992 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
993 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
994 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
995 ia32_multipass_dfa_lookahead
997 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
998 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1001 #undef TARGET_HAVE_TLS
1002 #define TARGET_HAVE_TLS true
1004 #undef TARGET_CANNOT_FORCE_CONST_MEM
1005 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1007 #undef TARGET_DELEGITIMIZE_ADDRESS
1008 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1010 #undef TARGET_MS_BITFIELD_LAYOUT_P
1011 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1014 #undef TARGET_BINDS_LOCAL_P
1015 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1018 #undef TARGET_ASM_OUTPUT_MI_THUNK
1019 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1020 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1021 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1023 #undef TARGET_ASM_FILE_START
1024 #define TARGET_ASM_FILE_START x86_file_start
1026 #undef TARGET_DEFAULT_TARGET_FLAGS
1027 #define TARGET_DEFAULT_TARGET_FLAGS \
1029 | TARGET_64BIT_DEFAULT \
1030 | TARGET_SUBTARGET_DEFAULT \
1031 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1033 #undef TARGET_HANDLE_OPTION
1034 #define TARGET_HANDLE_OPTION ix86_handle_option
1036 #undef TARGET_RTX_COSTS
1037 #define TARGET_RTX_COSTS ix86_rtx_costs
1038 #undef TARGET_ADDRESS_COST
1039 #define TARGET_ADDRESS_COST ix86_address_cost
1041 #undef TARGET_FIXED_CONDITION_CODE_REGS
1042 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1043 #undef TARGET_CC_MODES_COMPATIBLE
1044 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1046 #undef TARGET_MACHINE_DEPENDENT_REORG
1047 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1049 #undef TARGET_BUILD_BUILTIN_VA_LIST
1050 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1052 #undef TARGET_MD_ASM_CLOBBERS
1053 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1055 #undef TARGET_PROMOTE_PROTOTYPES
1056 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1057 #undef TARGET_STRUCT_VALUE_RTX
1058 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1059 #undef TARGET_SETUP_INCOMING_VARARGS
1060 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1061 #undef TARGET_MUST_PASS_IN_STACK
1062 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1063 #undef TARGET_PASS_BY_REFERENCE
1064 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1066 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1067 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1069 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1070 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1073 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1074 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1077 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1078 #undef TARGET_INSERT_ATTRIBUTES
1079 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1082 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1083 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1085 #undef TARGET_STACK_PROTECT_FAIL
1086 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1088 struct gcc_target targetm = TARGET_INITIALIZER;
1091 /* The svr4 ABI for the i386 says that records and unions are returned
1093 #ifndef DEFAULT_PCC_STRUCT_RETURN
1094 #define DEFAULT_PCC_STRUCT_RETURN 1
1097 /* Implement TARGET_HANDLE_OPTION. */
1100 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1107 target_flags &= ~MASK_3DNOW_A;
1108 target_flags_explicit |= MASK_3DNOW_A;
1115 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1116 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1123 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1124 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1131 target_flags &= ~MASK_SSE3;
1132 target_flags_explicit |= MASK_SSE3;
1141 /* Sometimes certain combinations of command options do not make
1142 sense on a particular target machine. You can define a macro
1143 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1144 defined, is executed once just after all the command options have
1147 Don't use this macro to turn on various extra optimizations for
1148 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1151 override_options (void)
1154 int ix86_tune_defaulted = 0;
1156 /* Comes from final.c -- no real reason to change it. */
1157 #define MAX_CODE_ALIGN 16
1161 const struct processor_costs *cost; /* Processor costs */
1162 const int target_enable; /* Target flags to enable. */
1163 const int target_disable; /* Target flags to disable. */
1164 const int align_loop; /* Default alignments. */
1165 const int align_loop_max_skip;
1166 const int align_jump;
1167 const int align_jump_max_skip;
1168 const int align_func;
1170 const processor_target_table[PROCESSOR_max] =
1172 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1173 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1174 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1175 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1176 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1177 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1178 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1179 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1180 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1183 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1186 const char *const name; /* processor name or nickname. */
1187 const enum processor_type processor;
1188 const enum pta_flags
1194 PTA_PREFETCH_SSE = 16,
1200 const processor_alias_table[] =
1202 {"i386", PROCESSOR_I386, 0},
1203 {"i486", PROCESSOR_I486, 0},
1204 {"i586", PROCESSOR_PENTIUM, 0},
1205 {"pentium", PROCESSOR_PENTIUM, 0},
1206 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1207 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1208 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1209 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1210 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1211 {"i686", PROCESSOR_PENTIUMPRO, 0},
1212 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1213 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1214 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1215 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1216 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1217 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1218 | PTA_MMX | PTA_PREFETCH_SSE},
1219 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1220 | PTA_MMX | PTA_PREFETCH_SSE},
1221 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1222 | PTA_MMX | PTA_PREFETCH_SSE},
1223 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1224 | PTA_MMX | PTA_PREFETCH_SSE},
1225 {"k6", PROCESSOR_K6, PTA_MMX},
1226 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1227 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1228 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1230 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1231 | PTA_3DNOW | PTA_3DNOW_A},
1232 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1233 | PTA_3DNOW_A | PTA_SSE},
1234 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1235 | PTA_3DNOW_A | PTA_SSE},
1236 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1237 | PTA_3DNOW_A | PTA_SSE},
1238 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1239 | PTA_SSE | PTA_SSE2 },
1240 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1241 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1242 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1243 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1244 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1245 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1246 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1247 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1250 int const pta_size = ARRAY_SIZE (processor_alias_table);
1252 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1253 SUBTARGET_OVERRIDE_OPTIONS;
1256 /* Set the default values for switches whose default depends on TARGET_64BIT
1257 in case they weren't overwritten by command line options. */
1260 if (flag_omit_frame_pointer == 2)
1261 flag_omit_frame_pointer = 1;
1262 if (flag_asynchronous_unwind_tables == 2)
1263 flag_asynchronous_unwind_tables = 1;
1264 if (flag_pcc_struct_return == 2)
1265 flag_pcc_struct_return = 0;
1269 if (flag_omit_frame_pointer == 2)
1270 flag_omit_frame_pointer = 0;
1271 if (flag_asynchronous_unwind_tables == 2)
1272 flag_asynchronous_unwind_tables = 0;
1273 if (flag_pcc_struct_return == 2)
1274 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1277 if (!ix86_tune_string && ix86_arch_string)
1278 ix86_tune_string = ix86_arch_string;
1279 if (!ix86_tune_string)
1281 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1282 ix86_tune_defaulted = 1;
1284 if (!ix86_arch_string)
1285 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1287 if (ix86_cmodel_string != 0)
1289 if (!strcmp (ix86_cmodel_string, "small"))
1290 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1292 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1293 else if (!strcmp (ix86_cmodel_string, "32"))
1294 ix86_cmodel = CM_32;
1295 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1296 ix86_cmodel = CM_KERNEL;
1297 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1298 ix86_cmodel = CM_MEDIUM;
1299 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1300 ix86_cmodel = CM_LARGE;
1302 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1306 ix86_cmodel = CM_32;
1308 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1310 if (ix86_asm_string != 0)
1312 if (!strcmp (ix86_asm_string, "intel"))
1313 ix86_asm_dialect = ASM_INTEL;
1314 else if (!strcmp (ix86_asm_string, "att"))
1315 ix86_asm_dialect = ASM_ATT;
1317 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1319 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1320 error ("code model %qs not supported in the %s bit mode",
1321 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1322 if (ix86_cmodel == CM_LARGE)
1323 sorry ("code model %<large%> not supported yet");
1324 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1325 sorry ("%i-bit mode not compiled in",
1326 (target_flags & MASK_64BIT) ? 64 : 32);
1328 for (i = 0; i < pta_size; i++)
1329 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1331 ix86_arch = processor_alias_table[i].processor;
1332 /* Default cpu tuning to the architecture. */
1333 ix86_tune = ix86_arch;
1334 if (processor_alias_table[i].flags & PTA_MMX
1335 && !(target_flags_explicit & MASK_MMX))
1336 target_flags |= MASK_MMX;
1337 if (processor_alias_table[i].flags & PTA_3DNOW
1338 && !(target_flags_explicit & MASK_3DNOW))
1339 target_flags |= MASK_3DNOW;
1340 if (processor_alias_table[i].flags & PTA_3DNOW_A
1341 && !(target_flags_explicit & MASK_3DNOW_A))
1342 target_flags |= MASK_3DNOW_A;
1343 if (processor_alias_table[i].flags & PTA_SSE
1344 && !(target_flags_explicit & MASK_SSE))
1345 target_flags |= MASK_SSE;
1346 if (processor_alias_table[i].flags & PTA_SSE2
1347 && !(target_flags_explicit & MASK_SSE2))
1348 target_flags |= MASK_SSE2;
1349 if (processor_alias_table[i].flags & PTA_SSE3
1350 && !(target_flags_explicit & MASK_SSE3))
1351 target_flags |= MASK_SSE3;
1352 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1353 x86_prefetch_sse = true;
1354 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1355 error ("CPU you selected does not support x86-64 "
1361 error ("bad value (%s) for -march= switch", ix86_arch_string);
1363 for (i = 0; i < pta_size; i++)
1364 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1366 ix86_tune = processor_alias_table[i].processor;
1367 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1369 if (ix86_tune_defaulted)
1371 ix86_tune_string = "x86-64";
1372 for (i = 0; i < pta_size; i++)
1373 if (! strcmp (ix86_tune_string,
1374 processor_alias_table[i].name))
1376 ix86_tune = processor_alias_table[i].processor;
1379 error ("CPU you selected does not support x86-64 "
1382 /* Intel CPUs have always interpreted SSE prefetch instructions as
1383 NOPs; so, we can enable SSE prefetch instructions even when
1384 -mtune (rather than -march) points us to a processor that has them.
1385 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1386 higher processors. */
1387 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1388 x86_prefetch_sse = true;
1392 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1395 ix86_cost = &size_cost;
1397 ix86_cost = processor_target_table[ix86_tune].cost;
1398 target_flags |= processor_target_table[ix86_tune].target_enable;
1399 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1401 /* Arrange to set up i386_stack_locals for all functions. */
1402 init_machine_status = ix86_init_machine_status;
1404 /* Validate -mregparm= value. */
1405 if (ix86_regparm_string)
1407 i = atoi (ix86_regparm_string);
1408 if (i < 0 || i > REGPARM_MAX)
1409 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1415 ix86_regparm = REGPARM_MAX;
1417 /* If the user has provided any of the -malign-* options,
1418 warn and use that value only if -falign-* is not set.
1419 Remove this code in GCC 3.2 or later. */
1420 if (ix86_align_loops_string)
1422 warning (0, "-malign-loops is obsolete, use -falign-loops");
1423 if (align_loops == 0)
1425 i = atoi (ix86_align_loops_string);
1426 if (i < 0 || i > MAX_CODE_ALIGN)
1427 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1429 align_loops = 1 << i;
1433 if (ix86_align_jumps_string)
1435 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1436 if (align_jumps == 0)
1438 i = atoi (ix86_align_jumps_string);
1439 if (i < 0 || i > MAX_CODE_ALIGN)
1440 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1442 align_jumps = 1 << i;
1446 if (ix86_align_funcs_string)
1448 warning (0, "-malign-functions is obsolete, use -falign-functions");
1449 if (align_functions == 0)
1451 i = atoi (ix86_align_funcs_string);
1452 if (i < 0 || i > MAX_CODE_ALIGN)
1453 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1455 align_functions = 1 << i;
1459 /* Default align_* from the processor table. */
1460 if (align_loops == 0)
1462 align_loops = processor_target_table[ix86_tune].align_loop;
1463 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1465 if (align_jumps == 0)
1467 align_jumps = processor_target_table[ix86_tune].align_jump;
1468 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1470 if (align_functions == 0)
1472 align_functions = processor_target_table[ix86_tune].align_func;
1475 /* Validate -mpreferred-stack-boundary= value, or provide default.
1476 The default of 128 bits is for Pentium III's SSE __m128, but we
1477 don't want additional code to keep the stack aligned when
1478 optimizing for code size. */
1479 ix86_preferred_stack_boundary = (optimize_size
1480 ? TARGET_64BIT ? 128 : 32
1482 if (ix86_preferred_stack_boundary_string)
1484 i = atoi (ix86_preferred_stack_boundary_string);
1485 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1486 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1487 TARGET_64BIT ? 4 : 2);
1489 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1492 /* Validate -mbranch-cost= value, or provide default. */
1493 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1494 if (ix86_branch_cost_string)
1496 i = atoi (ix86_branch_cost_string);
1498 error ("-mbranch-cost=%d is not between 0 and 5", i);
1500 ix86_branch_cost = i;
1503 if (ix86_tls_dialect_string)
1505 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1506 ix86_tls_dialect = TLS_DIALECT_GNU;
1507 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1508 ix86_tls_dialect = TLS_DIALECT_SUN;
1510 error ("bad value (%s) for -mtls-dialect= switch",
1511 ix86_tls_dialect_string);
1514 /* Keep nonleaf frame pointers. */
1515 if (flag_omit_frame_pointer)
1516 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1517 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1518 flag_omit_frame_pointer = 1;
1520 /* If we're doing fast math, we don't care about comparison order
1521 wrt NaNs. This lets us use a shorter comparison sequence. */
1522 if (flag_unsafe_math_optimizations)
1523 target_flags &= ~MASK_IEEE_FP;
1525 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1526 since the insns won't need emulation. */
1527 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1528 target_flags &= ~MASK_NO_FANCY_MATH_387;
1530 /* Likewise, if the target doesn't have a 387, or we've specified
1531 software floating point, don't use 387 inline intrinsics. */
1533 target_flags |= MASK_NO_FANCY_MATH_387;
1535 /* Turn on SSE2 builtins for -msse3. */
1537 target_flags |= MASK_SSE2;
1539 /* Turn on SSE builtins for -msse2. */
1541 target_flags |= MASK_SSE;
1543 /* Turn on MMX builtins for -msse. */
1546 target_flags |= MASK_MMX & ~target_flags_explicit;
1547 x86_prefetch_sse = true;
1550 /* Turn on MMX builtins for 3Dnow. */
1552 target_flags |= MASK_MMX;
1556 if (TARGET_ALIGN_DOUBLE)
1557 error ("-malign-double makes no sense in the 64bit mode");
1559 error ("-mrtd calling convention not supported in the 64bit mode");
1561 /* Enable by default the SSE and MMX builtins. Do allow the user to
1562 explicitly disable any of these. In particular, disabling SSE and
1563 MMX for kernel code is extremely useful. */
1565 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1566 & ~target_flags_explicit);
1570 /* i386 ABI does not specify red zone. It still makes sense to use it
1571 when programmer takes care to stack from being destroyed. */
1572 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1573 target_flags |= MASK_NO_RED_ZONE;
1576 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1578 if (ix86_fpmath_string != 0)
1580 if (! strcmp (ix86_fpmath_string, "387"))
1581 ix86_fpmath = FPMATH_387;
1582 else if (! strcmp (ix86_fpmath_string, "sse"))
1586 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1587 ix86_fpmath = FPMATH_387;
1590 ix86_fpmath = FPMATH_SSE;
1592 else if (! strcmp (ix86_fpmath_string, "387,sse")
1593 || ! strcmp (ix86_fpmath_string, "sse,387"))
1597 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1598 ix86_fpmath = FPMATH_387;
1600 else if (!TARGET_80387)
1602 warning (0, "387 instruction set disabled, using SSE arithmetics");
1603 ix86_fpmath = FPMATH_SSE;
1606 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1609 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1612 /* If the i387 is disabled, then do not return values in it. */
1614 target_flags &= ~MASK_FLOAT_RETURNS;
1616 if ((x86_accumulate_outgoing_args & TUNEMASK)
1617 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1619 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1621 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1624 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1625 p = strchr (internal_label_prefix, 'X');
1626 internal_label_prefix_len = p - internal_label_prefix;
1630 /* When scheduling description is not available, disable scheduler pass
1631 so it won't slow down the compilation and make x87 code slower. */
1632 if (!TARGET_SCHEDULE)
1633 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1637 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1639 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1640 make the problem with not enough registers even worse. */
1641 #ifdef INSN_SCHEDULING
1643 flag_schedule_insns = 0;
1647 /* The Darwin libraries never set errno, so we might as well
1648 avoid calling them when that's the only reason we would. */
1649 flag_errno_math = 0;
1651 /* The default values of these switches depend on the TARGET_64BIT
1652 that is not known at this moment. Mark these values with 2 and
1653 let user the to override these. In case there is no command line option
1654 specifying them, we will set the defaults in override_options. */
1656 flag_omit_frame_pointer = 2;
1657 flag_pcc_struct_return = 2;
1658 flag_asynchronous_unwind_tables = 2;
1659 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1660 SUBTARGET_OPTIMIZATION_OPTIONS;
1664 /* Table of valid machine attributes. */
1665 const struct attribute_spec ix86_attribute_table[] =
1667 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1668 /* Stdcall attribute says callee is responsible for popping arguments
1669 if they are not variable. */
1670 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1671 /* Fastcall attribute says callee is responsible for popping arguments
1672 if they are not variable. */
1673 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1674 /* Cdecl attribute says the callee is a normal C declaration */
1675 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1676 /* Regparm attribute specifies how many integer arguments are to be
1677 passed in registers. */
1678 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
1679 /* Sseregparm attribute says we are using x86_64 calling conventions
1680 for FP arguments. */
1681 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1682 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1683 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1684 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1685 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1687 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1688 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1689 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1690 SUBTARGET_ATTRIBUTE_TABLE,
1692 { NULL, 0, 0, false, false, false, NULL }
1695 /* Decide whether we can make a sibling call to a function. DECL is the
1696 declaration of the function being targeted by the call and EXP is the
1697 CALL_EXPR representing the call. */
1700 ix86_function_ok_for_sibcall (tree decl, tree exp)
1704 /* If we are generating position-independent code, we cannot sibcall
1705 optimize any indirect call, or a direct call to a global function,
1706 as the PLT requires %ebx be live. */
1707 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1715 /* If we are returning floats on the 80387 register stack, we cannot
1716 make a sibcall from a function that doesn't return a float to a
1717 function that does or, conversely, from a function that does return
1718 a float to a function that doesn't; the necessary stack adjustment
1719 would not be executed. */
1720 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
1721 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1725 /* If this call is indirect, we'll need to be able to use a call-clobbered
1726 register for the address of the target function. Make sure that all
1727 such registers are not used for passing parameters. */
1728 if (!decl && !TARGET_64BIT)
1732 /* We're looking at the CALL_EXPR, we need the type of the function. */
1733 type = TREE_OPERAND (exp, 0); /* pointer expression */
1734 type = TREE_TYPE (type); /* pointer type */
1735 type = TREE_TYPE (type); /* function type */
1737 if (ix86_function_regparm (type, NULL) >= 3)
1739 /* ??? Need to count the actual number of registers to be used,
1740 not the possible number of registers. Fix later. */
1745 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1746 /* Dllimport'd functions are also called indirectly. */
1747 if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
1748 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1752 /* Otherwise okay. That also includes certain types of indirect calls. */
1756 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
1757 calling convention attributes;
1758 arguments as in struct attribute_spec.handler. */
1761 ix86_handle_cconv_attribute (tree *node, tree name,
1763 int flags ATTRIBUTE_UNUSED,
1766 if (TREE_CODE (*node) != FUNCTION_TYPE
1767 && TREE_CODE (*node) != METHOD_TYPE
1768 && TREE_CODE (*node) != FIELD_DECL
1769 && TREE_CODE (*node) != TYPE_DECL)
1771 warning (OPT_Wattributes, "%qs attribute only applies to functions",
1772 IDENTIFIER_POINTER (name));
1773 *no_add_attrs = true;
1777 /* Can combine regparm with all attributes but fastcall. */
1778 if (is_attribute_p ("regparm", name))
1782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1784 error ("fastcall and regparm attributes are not compatible");
1787 cst = TREE_VALUE (args);
1788 if (TREE_CODE (cst) != INTEGER_CST)
1790 warning (OPT_Wattributes,
1791 "%qs attribute requires an integer constant argument",
1792 IDENTIFIER_POINTER (name));
1793 *no_add_attrs = true;
1795 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1797 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
1798 IDENTIFIER_POINTER (name), REGPARM_MAX);
1799 *no_add_attrs = true;
1807 warning (OPT_Wattributes, "%qs attribute ignored",
1808 IDENTIFIER_POINTER (name));
1809 *no_add_attrs = true;
1813 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
1814 if (is_attribute_p ("fastcall", name))
1816 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
1818 error ("fastcall and cdecl attributes are not compatible");
1820 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1822 error ("fastcall and stdcall attributes are not compatible");
1824 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1826 error ("fastcall and regparm attributes are not compatible");
1830 /* Can combine stdcall with fastcall (redundant), regparm and
1832 else if (is_attribute_p ("stdcall", name))
1834 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
1836 error ("stdcall and cdecl attributes are not compatible");
1838 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1840 error ("stdcall and fastcall attributes are not compatible");
1844 /* Can combine cdecl with regparm and sseregparm. */
1845 else if (is_attribute_p ("cdecl", name))
1847 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1849 error ("stdcall and cdecl attributes are not compatible");
1851 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1853 error ("fastcall and cdecl attributes are not compatible");
1857 /* Can combine sseregparm with all attributes. */
1862 /* Return 0 if the attributes for two types are incompatible, 1 if they
1863 are compatible, and 2 if they are nearly compatible (which causes a
1864 warning to be generated). */
1867 ix86_comp_type_attributes (tree type1, tree type2)
1869 /* Check for mismatch of non-default calling convention. */
1870 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1872 if (TREE_CODE (type1) != FUNCTION_TYPE)
1875 /* Check for mismatched fastcall/regparm types. */
1876 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1877 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1878 || (ix86_function_regparm (type1, NULL)
1879 != ix86_function_regparm (type2, NULL)))
1882 /* Check for mismatched sseregparm types. */
1883 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
1884 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
1887 /* Check for mismatched return types (cdecl vs stdcall). */
1888 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1889 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1895 /* Return the regparm value for a function with the indicated TYPE and DECL.
1896 DECL may be NULL when calling function indirectly
1897 or considering a libcall. */
1900 ix86_function_regparm (tree type, tree decl)
1903 int regparm = ix86_regparm;
1904 bool user_convention = false;
1908 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1911 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1912 user_convention = true;
1915 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1918 user_convention = true;
1921 /* Use register calling convention for local functions when possible. */
1922 if (!TARGET_64BIT && !user_convention && decl
1923 && flag_unit_at_a_time && !profile_flag)
1925 struct cgraph_local_info *i = cgraph_local_info (decl);
1928 /* We can't use regparm(3) for nested functions as these use
1929 static chain pointer in third argument. */
1930 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1940 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
1941 in SSE registers for a function with the indicated TYPE and DECL.
1942 DECL may be NULL when calling function indirectly
1943 or considering a libcall. Otherwise return 0. */
1946 ix86_function_sseregparm (tree type, tree decl)
1948 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1949 by the sseregparm attribute. */
1951 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))
1956 error ("Calling %qD with attribute sseregparm without "
1957 "SSE/SSE2 enabled", decl);
1959 error ("Calling %qT with attribute sseregparm without "
1960 "SSE/SSE2 enabled", type);
1967 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
1968 in SSE registers even for 32-bit mode and not just 3, but up to
1969 8 SSE arguments in registers. */
1970 if (!TARGET_64BIT && decl
1971 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
1973 struct cgraph_local_info *i = cgraph_local_info (decl);
1975 return TARGET_SSE2 ? 2 : 1;
1981 /* Return true if EAX is live at the start of the function. Used by
1982 ix86_expand_prologue to determine if we need special help before
1983 calling allocate_stack_worker. */
1986 ix86_eax_live_at_start_p (void)
1988 /* Cheat. Don't bother working forward from ix86_function_regparm
1989 to the function type to whether an actual argument is located in
1990 eax. Instead just look at cfg info, which is still close enough
1991 to correct at this point. This gives false positives for broken
1992 functions that might use uninitialized data that happens to be
1993 allocated in eax, but who cares? */
1994 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
1997 /* Value is the number of bytes of arguments automatically
1998 popped when returning from a subroutine call.
1999 FUNDECL is the declaration node of the function (as a tree),
2000 FUNTYPE is the data type of the function (as a tree),
2001 or for a library call it is an identifier node for the subroutine name.
2002 SIZE is the number of bytes of arguments passed on the stack.
2004 On the 80386, the RTD insn may be used to pop them if the number
2005 of args is fixed, but if the number is variable then the caller
2006 must pop them all. RTD can't be used for library calls now
2007 because the library is compiled with the Unix compiler.
2008 Use of RTD is a selectable option, since it is incompatible with
2009 standard Unix calling sequences. If the option is not selected,
2010 the caller must always pop the args.
2012 The attribute stdcall is equivalent to RTD on a per module basis. */
2015 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2017 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2019 /* Cdecl functions override -mrtd, and never pop the stack. */
2020 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2022 /* Stdcall and fastcall functions will pop the stack if not
2024 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2025 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2029 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2030 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2031 == void_type_node)))
2035 /* Lose any fake structure return argument if it is passed on the stack. */
2036 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2038 && !KEEP_AGGREGATE_RETURN_POINTER)
2040 int nregs = ix86_function_regparm (funtype, fundecl);
2043 return GET_MODE_SIZE (Pmode);
2049 /* Argument support functions. */
2051 /* Return true when register may be used to pass function parameters. */
2053 ix86_function_arg_regno_p (int regno)
2057 return (regno < REGPARM_MAX
2058 || (TARGET_MMX && MMX_REGNO_P (regno)
2059 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2060 || (TARGET_SSE && SSE_REGNO_P (regno)
2061 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2063 if (TARGET_SSE && SSE_REGNO_P (regno)
2064 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2066 /* RAX is used as hidden argument to va_arg functions. */
2069 for (i = 0; i < REGPARM_MAX; i++)
2070 if (regno == x86_64_int_parameter_registers[i])
2075 /* Return if we do not know how to pass TYPE solely in registers. */
2078 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2080 if (must_pass_in_stack_var_size_or_pad (mode, type))
2083 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2084 The layout_type routine is crafty and tries to trick us into passing
2085 currently unsupported vector types on the stack by using TImode. */
2086 return (!TARGET_64BIT && mode == TImode
2087 && type && TREE_CODE (type) != VECTOR_TYPE);
2090 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2091 for a call to a function whose data type is FNTYPE.
2092 For a library call, FNTYPE is 0. */
2095 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2096 tree fntype, /* tree ptr for function decl */
2097 rtx libname, /* SYMBOL_REF of library name or 0 */
2100 static CUMULATIVE_ARGS zero_cum;
2101 tree param, next_param;
2103 if (TARGET_DEBUG_ARG)
2105 fprintf (stderr, "\ninit_cumulative_args (");
2107 fprintf (stderr, "fntype code = %s, ret code = %s",
2108 tree_code_name[(int) TREE_CODE (fntype)],
2109 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2111 fprintf (stderr, "no fntype");
2114 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2119 /* Set up the number of registers to use for passing arguments. */
2120 cum->nregs = ix86_regparm;
2122 cum->sse_nregs = SSE_REGPARM_MAX;
2124 cum->mmx_nregs = MMX_REGPARM_MAX;
2125 cum->warn_sse = true;
2126 cum->warn_mmx = true;
2127 cum->maybe_vaarg = false;
2129 /* Use ecx and edx registers if function has fastcall attribute,
2130 else look for regparm information. */
2131 if (fntype && !TARGET_64BIT)
2133 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2139 cum->nregs = ix86_function_regparm (fntype, fndecl);
2142 /* Set up the number of SSE registers used for passing SFmode
2143 and DFmode arguments. Warn for mismatching ABI. */
2144 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2146 /* Determine if this function has variable arguments. This is
2147 indicated by the last argument being 'void_type_mode' if there
2148 are no variable arguments. If there are variable arguments, then
2149 we won't pass anything in registers in 32-bit mode. */
2151 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2153 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2154 param != 0; param = next_param)
2156 next_param = TREE_CHAIN (param);
2157 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2167 cum->float_in_sse = 0;
2169 cum->maybe_vaarg = true;
2173 if ((!fntype && !libname)
2174 || (fntype && !TYPE_ARG_TYPES (fntype)))
2175 cum->maybe_vaarg = true;
2177 if (TARGET_DEBUG_ARG)
2178 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2183 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2184 But in the case of vector types, it is some vector mode.
2186 When we have only some of our vector isa extensions enabled, then there
2187 are some modes for which vector_mode_supported_p is false. For these
2188 modes, the generic vector support in gcc will choose some non-vector mode
2189 in order to implement the type. By computing the natural mode, we'll
2190 select the proper ABI location for the operand and not depend on whatever
2191 the middle-end decides to do with these vector types. */
2193 static enum machine_mode
2194 type_natural_mode (tree type)
2196 enum machine_mode mode = TYPE_MODE (type);
2198 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2200 HOST_WIDE_INT size = int_size_in_bytes (type);
2201 if ((size == 8 || size == 16)
2202 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2203 && TYPE_VECTOR_SUBPARTS (type) > 1)
2205 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2207 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2208 mode = MIN_MODE_VECTOR_FLOAT;
2210 mode = MIN_MODE_VECTOR_INT;
2212 /* Get the mode which has this inner mode and number of units. */
2213 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2214 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2215 && GET_MODE_INNER (mode) == innermode)
2225 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2226 this may not agree with the mode that the type system has chosen for the
2227 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2228 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2231 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2236 if (orig_mode != BLKmode)
2237 tmp = gen_rtx_REG (orig_mode, regno);
2240 tmp = gen_rtx_REG (mode, regno);
2241 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2242 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2248 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2249 of this code is to classify each 8bytes of incoming argument by the register
2250 class and assign registers accordingly. */
2252 /* Return the union class of CLASS1 and CLASS2.
2253 See the x86-64 PS ABI for details. */
2255 static enum x86_64_reg_class
2256 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2258 /* Rule #1: If both classes are equal, this is the resulting class. */
2259 if (class1 == class2)
2262 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2264 if (class1 == X86_64_NO_CLASS)
2266 if (class2 == X86_64_NO_CLASS)
2269 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2270 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2271 return X86_64_MEMORY_CLASS;
2273 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2274 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2275 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2276 return X86_64_INTEGERSI_CLASS;
2277 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2278 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2279 return X86_64_INTEGER_CLASS;
2281 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2283 if (class1 == X86_64_X87_CLASS
2284 || class1 == X86_64_X87UP_CLASS
2285 || class1 == X86_64_COMPLEX_X87_CLASS
2286 || class2 == X86_64_X87_CLASS
2287 || class2 == X86_64_X87UP_CLASS
2288 || class2 == X86_64_COMPLEX_X87_CLASS)
2289 return X86_64_MEMORY_CLASS;
2291 /* Rule #6: Otherwise class SSE is used. */
2292 return X86_64_SSE_CLASS;
2295 /* Classify the argument of type TYPE and mode MODE.
2296 CLASSES will be filled by the register class used to pass each word
2297 of the operand. The number of words is returned. In case the parameter
2298 should be passed in memory, 0 is returned. As a special case for zero
2299 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2301 BIT_OFFSET is used internally for handling records and specifies offset
2302 of the offset in bits modulo 256 to avoid overflow cases.
2304 See the x86-64 PS ABI for details.
2308 classify_argument (enum machine_mode mode, tree type,
2309 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2311 HOST_WIDE_INT bytes =
2312 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2313 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2315 /* Variable sized entities are always passed/returned in memory. */
2319 if (mode != VOIDmode
2320 && targetm.calls.must_pass_in_stack (mode, type))
2323 if (type && AGGREGATE_TYPE_P (type))
2327 enum x86_64_reg_class subclasses[MAX_CLASSES];
2329 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2333 for (i = 0; i < words; i++)
2334 classes[i] = X86_64_NO_CLASS;
2336 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2337 signalize memory class, so handle it as special case. */
2340 classes[0] = X86_64_NO_CLASS;
2344 /* Classify each field of record and merge classes. */
2345 switch (TREE_CODE (type))
2348 /* For classes first merge in the field of the subclasses. */
2349 if (TYPE_BINFO (type))
2351 tree binfo, base_binfo;
2354 for (binfo = TYPE_BINFO (type), basenum = 0;
2355 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2358 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2359 tree type = BINFO_TYPE (base_binfo);
2361 num = classify_argument (TYPE_MODE (type),
2363 (offset + bit_offset) % 256);
2366 for (i = 0; i < num; i++)
2368 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2370 merge_classes (subclasses[i], classes[i + pos]);
2374 /* And now merge the fields of structure. */
2375 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2377 if (TREE_CODE (field) == FIELD_DECL)
2381 /* Bitfields are always classified as integer. Handle them
2382 early, since later code would consider them to be
2383 misaligned integers. */
2384 if (DECL_BIT_FIELD (field))
2386 for (i = int_bit_position (field) / 8 / 8;
2387 i < (int_bit_position (field)
2388 + tree_low_cst (DECL_SIZE (field), 0)
2391 merge_classes (X86_64_INTEGER_CLASS,
2396 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2397 TREE_TYPE (field), subclasses,
2398 (int_bit_position (field)
2399 + bit_offset) % 256);
2402 for (i = 0; i < num; i++)
2405 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2407 merge_classes (subclasses[i], classes[i + pos]);
2415 /* Arrays are handled as small records. */
2418 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2419 TREE_TYPE (type), subclasses, bit_offset);
2423 /* The partial classes are now full classes. */
2424 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2425 subclasses[0] = X86_64_SSE_CLASS;
2426 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2427 subclasses[0] = X86_64_INTEGER_CLASS;
2429 for (i = 0; i < words; i++)
2430 classes[i] = subclasses[i % num];
2435 case QUAL_UNION_TYPE:
2436 /* Unions are similar to RECORD_TYPE but offset is always 0.
2439 /* Unions are not derived. */
2440 gcc_assert (!TYPE_BINFO (type)
2441 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2442 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2444 if (TREE_CODE (field) == FIELD_DECL)
2447 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2448 TREE_TYPE (field), subclasses,
2452 for (i = 0; i < num; i++)
2453 classes[i] = merge_classes (subclasses[i], classes[i]);
2462 /* Final merger cleanup. */
2463 for (i = 0; i < words; i++)
2465 /* If one class is MEMORY, everything should be passed in
2467 if (classes[i] == X86_64_MEMORY_CLASS)
2470 /* The X86_64_SSEUP_CLASS should be always preceded by
2471 X86_64_SSE_CLASS. */
2472 if (classes[i] == X86_64_SSEUP_CLASS
2473 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2474 classes[i] = X86_64_SSE_CLASS;
2476 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2477 if (classes[i] == X86_64_X87UP_CLASS
2478 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2479 classes[i] = X86_64_SSE_CLASS;
2484 /* Compute alignment needed. We align all types to natural boundaries with
2485 exception of XFmode that is aligned to 64bits. */
2486 if (mode != VOIDmode && mode != BLKmode)
2488 int mode_alignment = GET_MODE_BITSIZE (mode);
2491 mode_alignment = 128;
2492 else if (mode == XCmode)
2493 mode_alignment = 256;
2494 if (COMPLEX_MODE_P (mode))
2495 mode_alignment /= 2;
2496 /* Misaligned fields are always returned in memory. */
2497 if (bit_offset % mode_alignment)
2501 /* for V1xx modes, just use the base mode */
2502 if (VECTOR_MODE_P (mode)
2503 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2504 mode = GET_MODE_INNER (mode);
2506 /* Classification of atomic types. */
2516 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2517 classes[0] = X86_64_INTEGERSI_CLASS;
2519 classes[0] = X86_64_INTEGER_CLASS;
2523 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2528 if (!(bit_offset % 64))
2529 classes[0] = X86_64_SSESF_CLASS;
2531 classes[0] = X86_64_SSE_CLASS;
2534 classes[0] = X86_64_SSEDF_CLASS;
2537 classes[0] = X86_64_X87_CLASS;
2538 classes[1] = X86_64_X87UP_CLASS;
2541 classes[0] = X86_64_SSE_CLASS;
2542 classes[1] = X86_64_SSEUP_CLASS;
2545 classes[0] = X86_64_SSE_CLASS;
2548 classes[0] = X86_64_SSEDF_CLASS;
2549 classes[1] = X86_64_SSEDF_CLASS;
2552 classes[0] = X86_64_COMPLEX_X87_CLASS;
2555 /* This modes is larger than 16 bytes. */
2563 classes[0] = X86_64_SSE_CLASS;
2564 classes[1] = X86_64_SSEUP_CLASS;
2570 classes[0] = X86_64_SSE_CLASS;
2576 gcc_assert (VECTOR_MODE_P (mode));
2581 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2583 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2584 classes[0] = X86_64_INTEGERSI_CLASS;
2586 classes[0] = X86_64_INTEGER_CLASS;
2587 classes[1] = X86_64_INTEGER_CLASS;
2588 return 1 + (bytes > 8);
2592 /* Examine the argument and return set number of register required in each
2593 class. Return 0 iff parameter should be passed in memory. */
2595 examine_argument (enum machine_mode mode, tree type, int in_return,
2596 int *int_nregs, int *sse_nregs)
2598 enum x86_64_reg_class class[MAX_CLASSES];
2599 int n = classify_argument (mode, type, class, 0);
2605 for (n--; n >= 0; n--)
2608 case X86_64_INTEGER_CLASS:
2609 case X86_64_INTEGERSI_CLASS:
2612 case X86_64_SSE_CLASS:
2613 case X86_64_SSESF_CLASS:
2614 case X86_64_SSEDF_CLASS:
2617 case X86_64_NO_CLASS:
2618 case X86_64_SSEUP_CLASS:
2620 case X86_64_X87_CLASS:
2621 case X86_64_X87UP_CLASS:
2625 case X86_64_COMPLEX_X87_CLASS:
2626 return in_return ? 2 : 0;
2627 case X86_64_MEMORY_CLASS:
2633 /* Construct container for the argument used by GCC interface. See
2634 FUNCTION_ARG for the detailed description. */
2637 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2638 tree type, int in_return, int nintregs, int nsseregs,
2639 const int *intreg, int sse_regno)
2641 enum machine_mode tmpmode;
2643 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2644 enum x86_64_reg_class class[MAX_CLASSES];
2648 int needed_sseregs, needed_intregs;
2649 rtx exp[MAX_CLASSES];
2652 n = classify_argument (mode, type, class, 0);
2653 if (TARGET_DEBUG_ARG)
2656 fprintf (stderr, "Memory class\n");
2659 fprintf (stderr, "Classes:");
2660 for (i = 0; i < n; i++)
2662 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2664 fprintf (stderr, "\n");
2669 if (!examine_argument (mode, type, in_return, &needed_intregs,
2672 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2675 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2676 some less clueful developer tries to use floating-point anyway. */
2677 if (needed_sseregs && !TARGET_SSE)
2679 static bool issued_error;
2682 issued_error = true;
2684 error ("SSE register return with SSE disabled");
2686 error ("SSE register argument with SSE disabled");
2691 /* First construct simple cases. Avoid SCmode, since we want to use
2692 single register to pass this type. */
2693 if (n == 1 && mode != SCmode)
2696 case X86_64_INTEGER_CLASS:
2697 case X86_64_INTEGERSI_CLASS:
2698 return gen_rtx_REG (mode, intreg[0]);
2699 case X86_64_SSE_CLASS:
2700 case X86_64_SSESF_CLASS:
2701 case X86_64_SSEDF_CLASS:
2702 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2703 case X86_64_X87_CLASS:
2704 case X86_64_COMPLEX_X87_CLASS:
2705 return gen_rtx_REG (mode, FIRST_STACK_REG);
2706 case X86_64_NO_CLASS:
2707 /* Zero sized array, struct or class. */
2712 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2714 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2716 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2717 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2718 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2719 && class[1] == X86_64_INTEGER_CLASS
2720 && (mode == CDImode || mode == TImode || mode == TFmode)
2721 && intreg[0] + 1 == intreg[1])
2722 return gen_rtx_REG (mode, intreg[0]);
2724 /* Otherwise figure out the entries of the PARALLEL. */
2725 for (i = 0; i < n; i++)
2729 case X86_64_NO_CLASS:
2731 case X86_64_INTEGER_CLASS:
2732 case X86_64_INTEGERSI_CLASS:
2733 /* Merge TImodes on aligned occasions here too. */
2734 if (i * 8 + 8 > bytes)
2735 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2736 else if (class[i] == X86_64_INTEGERSI_CLASS)
2740 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2741 if (tmpmode == BLKmode)
2743 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2744 gen_rtx_REG (tmpmode, *intreg),
2748 case X86_64_SSESF_CLASS:
2749 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2750 gen_rtx_REG (SFmode,
2751 SSE_REGNO (sse_regno)),
2755 case X86_64_SSEDF_CLASS:
2756 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2757 gen_rtx_REG (DFmode,
2758 SSE_REGNO (sse_regno)),
2762 case X86_64_SSE_CLASS:
2763 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2767 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2768 gen_rtx_REG (tmpmode,
2769 SSE_REGNO (sse_regno)),
2771 if (tmpmode == TImode)
2780 /* Empty aligned struct, union or class. */
2784 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2785 for (i = 0; i < nexps; i++)
2786 XVECEXP (ret, 0, i) = exp [i];
2790 /* Update the data in CUM to advance over an argument
2791 of mode MODE and data type TYPE.
2792 (TYPE is null for libcalls where that information may not be available.) */
2795 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2796 tree type, int named)
2799 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2800 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2803 mode = type_natural_mode (type);
2805 if (TARGET_DEBUG_ARG)
2806 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2807 "mode=%s, named=%d)\n\n",
2808 words, cum->words, cum->nregs, cum->sse_nregs,
2809 GET_MODE_NAME (mode), named);
2813 int int_nregs, sse_nregs;
2814 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2815 cum->words += words;
2816 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2818 cum->nregs -= int_nregs;
2819 cum->sse_nregs -= sse_nregs;
2820 cum->regno += int_nregs;
2821 cum->sse_regno += sse_nregs;
2824 cum->words += words;
2842 cum->words += words;
2843 cum->nregs -= words;
2844 cum->regno += words;
2846 if (cum->nregs <= 0)
2854 if (cum->float_in_sse < 2)
2857 if (cum->float_in_sse < 1)
2868 if (!type || !AGGREGATE_TYPE_P (type))
2870 cum->sse_words += words;
2871 cum->sse_nregs -= 1;
2872 cum->sse_regno += 1;
2873 if (cum->sse_nregs <= 0)
2885 if (!type || !AGGREGATE_TYPE_P (type))
2887 cum->mmx_words += words;
2888 cum->mmx_nregs -= 1;
2889 cum->mmx_regno += 1;
2890 if (cum->mmx_nregs <= 0)
2901 /* Define where to put the arguments to a function.
2902 Value is zero to push the argument on the stack,
2903 or a hard register in which to store the argument.
2905 MODE is the argument's machine mode.
2906 TYPE is the data type of the argument (as a tree).
2907 This is null for libcalls where that information may
2909 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2910 the preceding args and about the function being called.
2911 NAMED is nonzero if this argument is a named parameter
2912 (otherwise it is an extra parameter matching an ellipsis). */
2915 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2916 tree type, int named)
2918 enum machine_mode mode = orig_mode;
2921 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2922 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2923 static bool warnedsse, warnedmmx;
2925 /* To simplify the code below, represent vector types with a vector mode
2926 even if MMX/SSE are not active. */
2927 if (type && TREE_CODE (type) == VECTOR_TYPE)
2928 mode = type_natural_mode (type);
2930 /* Handle a hidden AL argument containing number of registers for varargs
2931 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2933 if (mode == VOIDmode)
2936 return GEN_INT (cum->maybe_vaarg
2937 ? (cum->sse_nregs < 0
2945 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2947 &x86_64_int_parameter_registers [cum->regno],
2952 /* For now, pass fp/complex values on the stack. */
2964 if (words <= cum->nregs)
2966 int regno = cum->regno;
2968 /* Fastcall allocates the first two DWORD (SImode) or
2969 smaller arguments to ECX and EDX. */
2972 if (mode == BLKmode || mode == DImode)
2975 /* ECX not EAX is the first allocated register. */
2979 ret = gen_rtx_REG (mode, regno);
2983 if (cum->float_in_sse < 2)
2986 if (cum->float_in_sse < 1)
2996 if (!type || !AGGREGATE_TYPE_P (type))
2998 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3001 warning (0, "SSE vector argument without SSE enabled "
3005 ret = gen_reg_or_parallel (mode, orig_mode,
3006 cum->sse_regno + FIRST_SSE_REG);
3013 if (!type || !AGGREGATE_TYPE_P (type))
3015 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3018 warning (0, "MMX vector argument without MMX enabled "
3022 ret = gen_reg_or_parallel (mode, orig_mode,
3023 cum->mmx_regno + FIRST_MMX_REG);
3028 if (TARGET_DEBUG_ARG)
3031 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3032 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3035 print_simple_rtl (stderr, ret);
3037 fprintf (stderr, ", stack");
3039 fprintf (stderr, " )\n");
3045 /* A C expression that indicates when an argument must be passed by
3046 reference. If nonzero for an argument, a copy of that argument is
3047 made in memory and a pointer to the argument is passed instead of
3048 the argument itself. The pointer is passed in whatever way is
3049 appropriate for passing a pointer to that type. */
3052 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3053 enum machine_mode mode ATTRIBUTE_UNUSED,
3054 tree type, bool named ATTRIBUTE_UNUSED)
3059 if (type && int_size_in_bytes (type) == -1)
3061 if (TARGET_DEBUG_ARG)
3062 fprintf (stderr, "function_arg_pass_by_reference\n");
3069 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3070 ABI. Only called if TARGET_SSE. */
3072 contains_128bit_aligned_vector_p (tree type)
3074 enum machine_mode mode = TYPE_MODE (type);
3075 if (SSE_REG_MODE_P (mode)
3076 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3078 if (TYPE_ALIGN (type) < 128)
3081 if (AGGREGATE_TYPE_P (type))
3083 /* Walk the aggregates recursively. */
3084 switch (TREE_CODE (type))
3088 case QUAL_UNION_TYPE:
3092 if (TYPE_BINFO (type))
3094 tree binfo, base_binfo;
3097 for (binfo = TYPE_BINFO (type), i = 0;
3098 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3099 if (contains_128bit_aligned_vector_p
3100 (BINFO_TYPE (base_binfo)))
3103 /* And now merge the fields of structure. */
3104 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3106 if (TREE_CODE (field) == FIELD_DECL
3107 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3114 /* Just for use if some languages passes arrays by value. */
3115 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3125 /* Gives the alignment boundary, in bits, of an argument with the
3126 specified mode and type. */
3129 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3133 align = TYPE_ALIGN (type);
3135 align = GET_MODE_ALIGNMENT (mode);
3136 if (align < PARM_BOUNDARY)
3137 align = PARM_BOUNDARY;
3140 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3141 make an exception for SSE modes since these require 128bit
3144 The handling here differs from field_alignment. ICC aligns MMX
3145 arguments to 4 byte boundaries, while structure fields are aligned
3146 to 8 byte boundaries. */
3148 align = PARM_BOUNDARY;
3151 if (!SSE_REG_MODE_P (mode))
3152 align = PARM_BOUNDARY;
3156 if (!contains_128bit_aligned_vector_p (type))
3157 align = PARM_BOUNDARY;
3165 /* Return true if N is a possible register number of function value. */
3167 ix86_function_value_regno_p (int regno)
3170 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3171 || (regno == FIRST_SSE_REG && TARGET_SSE))
3175 && (regno == FIRST_MMX_REG && TARGET_MMX))
3181 /* Define how to find the value returned by a function.
3182 VALTYPE is the data type of the value (as a tree).
3183 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3184 otherwise, FUNC is 0. */
3186 ix86_function_value (tree valtype, tree func)
3188 enum machine_mode natmode = type_natural_mode (valtype);
3192 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3193 1, REGPARM_MAX, SSE_REGPARM_MAX,
3194 x86_64_int_return_registers, 0);
3195 /* For zero sized structures, construct_container return NULL, but we
3196 need to keep rest of compiler happy by returning meaningful value. */
3198 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3202 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
3205 /* Return false iff type is returned in memory. */
3207 ix86_return_in_memory (tree type)
3209 int needed_intregs, needed_sseregs, size;
3210 enum machine_mode mode = type_natural_mode (type);
3213 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3215 if (mode == BLKmode)
3218 size = int_size_in_bytes (type);
3220 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3223 if (VECTOR_MODE_P (mode) || mode == TImode)
3225 /* User-created vectors small enough to fit in EAX. */
3229 /* MMX/3dNow values are returned in MM0,
3230 except when it doesn't exits. */
3232 return (TARGET_MMX ? 0 : 1);
3234 /* SSE values are returned in XMM0, except when it doesn't exist. */
3236 return (TARGET_SSE ? 0 : 1);
3247 /* When returning SSE vector types, we have a choice of either
3248 (1) being abi incompatible with a -march switch, or
3249 (2) generating an error.
3250 Given no good solution, I think the safest thing is one warning.
3251 The user won't be able to use -Werror, but....
3253 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3254 called in response to actually generating a caller or callee that
3255 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3256 via aggregate_value_p for general type probing from tree-ssa. */
3259 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3261 static bool warnedsse, warnedmmx;
3265 /* Look at the return type of the function, not the function type. */
3266 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3268 if (!TARGET_SSE && !warnedsse)
3271 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3274 warning (0, "SSE vector return without SSE enabled "
3279 if (!TARGET_MMX && !warnedmmx)
3281 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3284 warning (0, "MMX vector return without MMX enabled "
3293 /* Define how to find the value returned by a library function
3294 assuming the value has mode MODE. */
3296 ix86_libcall_value (enum machine_mode mode)
3307 return gen_rtx_REG (mode, FIRST_SSE_REG);
3310 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3314 return gen_rtx_REG (mode, 0);
3318 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
3321 /* Given a mode, return the register to use for a return value. */
3324 ix86_value_regno (enum machine_mode mode, tree func)
3326 gcc_assert (!TARGET_64BIT);
3328 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3329 we prevent this case when mmx is not available. */
3330 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3331 return FIRST_MMX_REG;
3333 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3334 we prevent this case when sse is not available. */
3335 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3336 return FIRST_SSE_REG;
3338 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3339 if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3342 /* Floating point return values in %st(0), except for local functions when
3343 SSE math is enabled or for functions with sseregparm attribute. */
3344 if (func && (mode == SFmode || mode == DFmode))
3346 int sse_level = ix86_function_sseregparm (TREE_TYPE (func), func);
3347 if ((sse_level >= 1 && mode == SFmode)
3348 || (sse_level == 2 && mode == DFmode))
3349 return FIRST_SSE_REG;
3352 return FIRST_FLOAT_REG;
3355 /* Create the va_list data type. */
3358 ix86_build_builtin_va_list (void)
3360 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3362 /* For i386 we use plain pointer to argument area. */
3364 return build_pointer_type (char_type_node);
3366 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3367 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3369 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3370 unsigned_type_node);
3371 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3372 unsigned_type_node);
3373 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3375 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3378 va_list_gpr_counter_field = f_gpr;
3379 va_list_fpr_counter_field = f_fpr;
3381 DECL_FIELD_CONTEXT (f_gpr) = record;
3382 DECL_FIELD_CONTEXT (f_fpr) = record;
3383 DECL_FIELD_CONTEXT (f_ovf) = record;
3384 DECL_FIELD_CONTEXT (f_sav) = record;
3386 TREE_CHAIN (record) = type_decl;
3387 TYPE_NAME (record) = type_decl;
3388 TYPE_FIELDS (record) = f_gpr;
3389 TREE_CHAIN (f_gpr) = f_fpr;
3390 TREE_CHAIN (f_fpr) = f_ovf;
3391 TREE_CHAIN (f_ovf) = f_sav;
3393 layout_type (record);
3395 /* The correct type is an array type of one element. */
3396 return build_array_type (record, build_index_type (size_zero_node));
3399 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3402 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3403 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3406 CUMULATIVE_ARGS next_cum;
3407 rtx save_area = NULL_RTX, mem;
3420 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3423 /* Indicate to allocate space on the stack for varargs save area. */
3424 ix86_save_varrargs_registers = 1;
3426 cfun->stack_alignment_needed = 128;
3428 fntype = TREE_TYPE (current_function_decl);
3429 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3430 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3431 != void_type_node));
3433 /* For varargs, we do not want to skip the dummy va_dcl argument.
3434 For stdargs, we do want to skip the last named argument. */
3437 function_arg_advance (&next_cum, mode, type, 1);
3440 save_area = frame_pointer_rtx;
3442 set = get_varargs_alias_set ();
3444 for (i = next_cum.regno;
3446 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3449 mem = gen_rtx_MEM (Pmode,
3450 plus_constant (save_area, i * UNITS_PER_WORD));
3451 set_mem_alias_set (mem, set);
3452 emit_move_insn (mem, gen_rtx_REG (Pmode,
3453 x86_64_int_parameter_registers[i]));
3456 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3458 /* Now emit code to save SSE registers. The AX parameter contains number
3459 of SSE parameter registers used to call this function. We use
3460 sse_prologue_save insn template that produces computed jump across
3461 SSE saves. We need some preparation work to get this working. */
3463 label = gen_label_rtx ();
3464 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3466 /* Compute address to jump to :
3467 label - 5*eax + nnamed_sse_arguments*5 */
3468 tmp_reg = gen_reg_rtx (Pmode);
3469 nsse_reg = gen_reg_rtx (Pmode);
3470 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3471 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3472 gen_rtx_MULT (Pmode, nsse_reg,
3474 if (next_cum.sse_regno)
3477 gen_rtx_CONST (DImode,
3478 gen_rtx_PLUS (DImode,
3480 GEN_INT (next_cum.sse_regno * 4))));
3482 emit_move_insn (nsse_reg, label_ref);
3483 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3485 /* Compute address of memory block we save into. We always use pointer
3486 pointing 127 bytes after first byte to store - this is needed to keep
3487 instruction size limited by 4 bytes. */
3488 tmp_reg = gen_reg_rtx (Pmode);
3489 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3490 plus_constant (save_area,
3491 8 * REGPARM_MAX + 127)));
3492 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3493 set_mem_alias_set (mem, set);
3494 set_mem_align (mem, BITS_PER_WORD);
3496 /* And finally do the dirty job! */
3497 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3498 GEN_INT (next_cum.sse_regno), label));
3503 /* Implement va_start. */
3506 ix86_va_start (tree valist, rtx nextarg)
3508 HOST_WIDE_INT words, n_gpr, n_fpr;
3509 tree f_gpr, f_fpr, f_ovf, f_sav;
3510 tree gpr, fpr, ovf, sav, t;
3512 /* Only 64bit target needs something special. */
3515 std_expand_builtin_va_start (valist, nextarg);
3519 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3520 f_fpr = TREE_CHAIN (f_gpr);
3521 f_ovf = TREE_CHAIN (f_fpr);
3522 f_sav = TREE_CHAIN (f_ovf);
3524 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3525 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3526 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3527 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3528 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3530 /* Count number of gp and fp argument registers used. */
3531 words = current_function_args_info.words;
3532 n_gpr = current_function_args_info.regno;
3533 n_fpr = current_function_args_info.sse_regno;
3535 if (TARGET_DEBUG_ARG)
3536 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3537 (int) words, (int) n_gpr, (int) n_fpr);
3539 if (cfun->va_list_gpr_size)
3541 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3542 build_int_cst (NULL_TREE, n_gpr * 8));
3543 TREE_SIDE_EFFECTS (t) = 1;
3544 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3547 if (cfun->va_list_fpr_size)
3549 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3550 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3551 TREE_SIDE_EFFECTS (t) = 1;
3552 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3555 /* Find the overflow area. */
3556 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3558 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3559 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3560 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3561 TREE_SIDE_EFFECTS (t) = 1;
3562 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3564 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3566 /* Find the register save area.
3567 Prologue of the function save it right above stack frame. */
3568 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3569 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3570 TREE_SIDE_EFFECTS (t) = 1;
3571 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3575 /* Implement va_arg. */
3578 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3580 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3581 tree f_gpr, f_fpr, f_ovf, f_sav;
3582 tree gpr, fpr, ovf, sav, t;
3584 tree lab_false, lab_over = NULL_TREE;
3589 enum machine_mode nat_mode;
3591 /* Only 64bit target needs something special. */
3593 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3595 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3596 f_fpr = TREE_CHAIN (f_gpr);
3597 f_ovf = TREE_CHAIN (f_fpr);
3598 f_sav = TREE_CHAIN (f_ovf);
3600 valist = build_va_arg_indirect_ref (valist);
3601 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3602 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3603 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3604 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3606 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3608 type = build_pointer_type (type);
3609 size = int_size_in_bytes (type);
3610 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3612 nat_mode = type_natural_mode (type);
3613 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3614 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3616 /* Pull the value out of the saved registers. */
3618 addr = create_tmp_var (ptr_type_node, "addr");
3619 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3623 int needed_intregs, needed_sseregs;
3625 tree int_addr, sse_addr;
3627 lab_false = create_artificial_label ();
3628 lab_over = create_artificial_label ();
3630 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3632 need_temp = (!REG_P (container)
3633 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3634 || TYPE_ALIGN (type) > 128));
3636 /* In case we are passing structure, verify that it is consecutive block
3637 on the register save area. If not we need to do moves. */
3638 if (!need_temp && !REG_P (container))
3640 /* Verify that all registers are strictly consecutive */
3641 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3645 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3647 rtx slot = XVECEXP (container, 0, i);
3648 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3649 || INTVAL (XEXP (slot, 1)) != i * 16)
3657 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3659 rtx slot = XVECEXP (container, 0, i);
3660 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3661 || INTVAL (XEXP (slot, 1)) != i * 8)
3673 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3674 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3675 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3676 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3679 /* First ensure that we fit completely in registers. */
3682 t = build_int_cst (TREE_TYPE (gpr),
3683 (REGPARM_MAX - needed_intregs + 1) * 8);
3684 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3685 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3686 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3687 gimplify_and_add (t, pre_p);
3691 t = build_int_cst (TREE_TYPE (fpr),
3692 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3694 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3695 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3696 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3697 gimplify_and_add (t, pre_p);
3700 /* Compute index to start of area used for integer regs. */
3703 /* int_addr = gpr + sav; */
3704 t = fold_convert (ptr_type_node, gpr);
3705 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3706 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3707 gimplify_and_add (t, pre_p);
3711 /* sse_addr = fpr + sav; */
3712 t = fold_convert (ptr_type_node, fpr);
3713 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3714 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3715 gimplify_and_add (t, pre_p);
3720 tree temp = create_tmp_var (type, "va_arg_tmp");
3723 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3724 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3725 gimplify_and_add (t, pre_p);
3727 for (i = 0; i < XVECLEN (container, 0); i++)
3729 rtx slot = XVECEXP (container, 0, i);
3730 rtx reg = XEXP (slot, 0);
3731 enum machine_mode mode = GET_MODE (reg);
3732 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3733 tree addr_type = build_pointer_type (piece_type);
3736 tree dest_addr, dest;
3738 if (SSE_REGNO_P (REGNO (reg)))
3740 src_addr = sse_addr;
3741 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3745 src_addr = int_addr;
3746 src_offset = REGNO (reg) * 8;
3748 src_addr = fold_convert (addr_type, src_addr);
3749 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3750 size_int (src_offset)));
3751 src = build_va_arg_indirect_ref (src_addr);
3753 dest_addr = fold_convert (addr_type, addr);
3754 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3755 size_int (INTVAL (XEXP (slot, 1)))));
3756 dest = build_va_arg_indirect_ref (dest_addr);
3758 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3759 gimplify_and_add (t, pre_p);
3765 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3766 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3767 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3768 gimplify_and_add (t, pre_p);
3772 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3773 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3774 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3775 gimplify_and_add (t, pre_p);
3778 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3779 gimplify_and_add (t, pre_p);
3781 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3782 append_to_statement_list (t, pre_p);
3785 /* ... otherwise out of the overflow area. */
3787 /* Care for on-stack alignment if needed. */
3788 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3792 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3793 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3794 build_int_cst (TREE_TYPE (ovf), align - 1));
3795 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3796 build_int_cst (TREE_TYPE (t), -align));
3798 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3800 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3801 gimplify_and_add (t2, pre_p);
3803 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3804 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3805 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3806 gimplify_and_add (t, pre_p);
3810 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3811 append_to_statement_list (t, pre_p);
3814 ptrtype = build_pointer_type (type);
3815 addr = fold_convert (ptrtype, addr);
3818 addr = build_va_arg_indirect_ref (addr);
3819 return build_va_arg_indirect_ref (addr);
3822 /* Return nonzero if OPNUM's MEM should be matched
3823 in movabs* patterns. */
3826 ix86_check_movabs (rtx insn, int opnum)
3830 set = PATTERN (insn);
3831 if (GET_CODE (set) == PARALLEL)
3832 set = XVECEXP (set, 0, 0);
3833 gcc_assert (GET_CODE (set) == SET);
3834 mem = XEXP (set, opnum);
3835 while (GET_CODE (mem) == SUBREG)
3836 mem = SUBREG_REG (mem);
3837 gcc_assert (GET_CODE (mem) == MEM);
3838 return (volatile_ok || !MEM_VOLATILE_P (mem));
3841 /* Initialize the table of extra 80387 mathematical constants. */
3844 init_ext_80387_constants (void)
3846 static const char * cst[5] =
3848 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3849 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3850 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3851 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3852 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3856 for (i = 0; i < 5; i++)
3858 real_from_string (&ext_80387_constants_table[i], cst[i]);
3859 /* Ensure each constant is rounded to XFmode precision. */
3860 real_convert (&ext_80387_constants_table[i],
3861 XFmode, &ext_80387_constants_table[i]);
3864 ext_80387_constants_init = 1;
3867 /* Return true if the constant is something that can be loaded with
3868 a special instruction. */
3871 standard_80387_constant_p (rtx x)
3873 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3876 if (x == CONST0_RTX (GET_MODE (x)))
3878 if (x == CONST1_RTX (GET_MODE (x)))
3881 /* For XFmode constants, try to find a special 80387 instruction when
3882 optimizing for size or on those CPUs that benefit from them. */
3883 if (GET_MODE (x) == XFmode
3884 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3889 if (! ext_80387_constants_init)
3890 init_ext_80387_constants ();
3892 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3893 for (i = 0; i < 5; i++)
3894 if (real_identical (&r, &ext_80387_constants_table[i]))
3901 /* Return the opcode of the special instruction to be used to load
3905 standard_80387_constant_opcode (rtx x)
3907 switch (standard_80387_constant_p (x))
3928 /* Return the CONST_DOUBLE representing the 80387 constant that is
3929 loaded by the specified special instruction. The argument IDX
3930 matches the return value from standard_80387_constant_p. */
3933 standard_80387_constant_rtx (int idx)
3937 if (! ext_80387_constants_init)
3938 init_ext_80387_constants ();
3954 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3958 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3961 standard_sse_constant_p (rtx x)
3963 if (x == const0_rtx)
3965 return (x == CONST0_RTX (GET_MODE (x)));
3968 /* Returns 1 if OP contains a symbol reference */
3971 symbolic_reference_mentioned_p (rtx op)
3976 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3979 fmt = GET_RTX_FORMAT (GET_CODE (op));
3980 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3986 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3987 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3991 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3998 /* Return 1 if it is appropriate to emit `ret' instructions in the
3999 body of a function. Do this only if the epilogue is simple, needing a
4000 couple of insns. Prior to reloading, we can't tell how many registers
4001 must be saved, so return 0 then. Return 0 if there is no frame
4002 marker to de-allocate. */
4005 ix86_can_use_return_insn_p (void)
4007 struct ix86_frame frame;
4009 if (! reload_completed || frame_pointer_needed)
4012 /* Don't allow more than 32 pop, since that's all we can do
4013 with one instruction. */
4014 if (current_function_pops_args
4015 && current_function_args_size >= 32768)
4018 ix86_compute_frame_layout (&frame);
4019 return frame.to_allocate == 0 && frame.nregs == 0;
4022 /* Value should be nonzero if functions must have frame pointers.
4023 Zero means the frame pointer need not be set up (and parms may
4024 be accessed via the stack pointer) in functions that seem suitable. */
4027 ix86_frame_pointer_required (void)
4029 /* If we accessed previous frames, then the generated code expects
4030 to be able to access the saved ebp value in our frame. */
4031 if (cfun->machine->accesses_prev_frame)
4034 /* Several x86 os'es need a frame pointer for other reasons,
4035 usually pertaining to setjmp. */
4036 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4039 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4040 the frame pointer by default. Turn it back on now if we've not
4041 got a leaf function. */
4042 if (TARGET_OMIT_LEAF_FRAME_POINTER
4043 && (!current_function_is_leaf))
4046 if (current_function_profile)
4052 /* Record that the current function accesses previous call frames. */
4055 ix86_setup_frame_addresses (void)
4057 cfun->machine->accesses_prev_frame = 1;
4060 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4061 # define USE_HIDDEN_LINKONCE 1
4063 # define USE_HIDDEN_LINKONCE 0
4066 static int pic_labels_used;
4068 /* Fills in the label name that should be used for a pc thunk for
4069 the given register. */
4072 get_pc_thunk_name (char name[32], unsigned int regno)
4074 if (USE_HIDDEN_LINKONCE)
4075 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4077 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4081 /* This function generates code for -fpic that loads %ebx with
4082 the return address of the caller and then returns. */
4085 ix86_file_end (void)
4090 for (regno = 0; regno < 8; ++regno)
4094 if (! ((pic_labels_used >> regno) & 1))
4097 get_pc_thunk_name (name, regno);
4099 if (USE_HIDDEN_LINKONCE)
4103 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4105 TREE_PUBLIC (decl) = 1;
4106 TREE_STATIC (decl) = 1;
4107 DECL_ONE_ONLY (decl) = 1;
4109 (*targetm.asm_out.unique_section) (decl, 0);
4110 named_section (decl, NULL, 0);
4112 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4113 fputs ("\t.hidden\t", asm_out_file);
4114 assemble_name (asm_out_file, name);
4115 fputc ('\n', asm_out_file);
4116 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4121 ASM_OUTPUT_LABEL (asm_out_file, name);
4124 xops[0] = gen_rtx_REG (SImode, regno);
4125 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4126 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4127 output_asm_insn ("ret", xops);
4130 if (NEED_INDICATE_EXEC_STACK)
4131 file_end_indicate_exec_stack ();
4134 /* Emit code for the SET_GOT patterns. */
4137 output_set_got (rtx dest)
4142 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4144 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4146 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4149 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4151 output_asm_insn ("call\t%a2", xops);
4154 /* Output the "canonical" label name ("Lxx$pb") here too. This
4155 is what will be referred to by the Mach-O PIC subsystem. */
4156 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4158 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4159 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4162 output_asm_insn ("pop{l}\t%0", xops);
4167 get_pc_thunk_name (name, REGNO (dest));
4168 pic_labels_used |= 1 << REGNO (dest);
4170 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4171 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4172 output_asm_insn ("call\t%X2", xops);
4175 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4176 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4177 else if (!TARGET_MACHO)
4178 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4183 /* Generate an "push" pattern for input ARG. */
4188 return gen_rtx_SET (VOIDmode,
4190 gen_rtx_PRE_DEC (Pmode,
4191 stack_pointer_rtx)),
4195 /* Return >= 0 if there is an unused call-clobbered register available
4196 for the entire function. */
4199 ix86_select_alt_pic_regnum (void)
4201 if (current_function_is_leaf && !current_function_profile)
4204 for (i = 2; i >= 0; --i)
4205 if (!regs_ever_live[i])
4209 return INVALID_REGNUM;
4212 /* Return 1 if we need to save REGNO. */
4214 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4216 if (pic_offset_table_rtx
4217 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4218 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4219 || current_function_profile
4220 || current_function_calls_eh_return
4221 || current_function_uses_const_pool))
4223 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4228 if (current_function_calls_eh_return && maybe_eh_return)
4233 unsigned test = EH_RETURN_DATA_REGNO (i);
4234 if (test == INVALID_REGNUM)
4241 return (regs_ever_live[regno]
4242 && !call_used_regs[regno]
4243 && !fixed_regs[regno]
4244 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4247 /* Return number of registers to be saved on the stack. */
4250 ix86_nsaved_regs (void)
4255 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4256 if (ix86_save_reg (regno, true))
4261 /* Return the offset between two registers, one to be eliminated, and the other
4262 its replacement, at the start of a routine. */
4265 ix86_initial_elimination_offset (int from, int to)
4267 struct ix86_frame frame;
4268 ix86_compute_frame_layout (&frame);
4270 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4271 return frame.hard_frame_pointer_offset;
4272 else if (from == FRAME_POINTER_REGNUM
4273 && to == HARD_FRAME_POINTER_REGNUM)
4274 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4277 gcc_assert (to == STACK_POINTER_REGNUM);
4279 if (from == ARG_POINTER_REGNUM)
4280 return frame.stack_pointer_offset;
4282 gcc_assert (from == FRAME_POINTER_REGNUM);
4283 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4287 /* Fill structure ix86_frame about frame of currently computed function. */
4290 ix86_compute_frame_layout (struct ix86_frame *frame)
4292 HOST_WIDE_INT total_size;
4293 unsigned int stack_alignment_needed;
4294 HOST_WIDE_INT offset;
4295 unsigned int preferred_alignment;
4296 HOST_WIDE_INT size = get_frame_size ();
4298 frame->nregs = ix86_nsaved_regs ();
4301 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4302 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4304 /* During reload iteration the amount of registers saved can change.
4305 Recompute the value as needed. Do not recompute when amount of registers
4306 didn't change as reload does multiple calls to the function and does not
4307 expect the decision to change within single iteration. */
4309 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4311 int count = frame->nregs;
4313 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4314 /* The fast prologue uses move instead of push to save registers. This
4315 is significantly longer, but also executes faster as modern hardware
4316 can execute the moves in parallel, but can't do that for push/pop.
4318 Be careful about choosing what prologue to emit: When function takes
4319 many instructions to execute we may use slow version as well as in
4320 case function is known to be outside hot spot (this is known with
4321 feedback only). Weight the size of function by number of registers
4322 to save as it is cheap to use one or two push instructions but very
4323 slow to use many of them. */
4325 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4326 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4327 || (flag_branch_probabilities
4328 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4329 cfun->machine->use_fast_prologue_epilogue = false;
4331 cfun->machine->use_fast_prologue_epilogue
4332 = !expensive_function_p (count);
4334 if (TARGET_PROLOGUE_USING_MOVE
4335 && cfun->machine->use_fast_prologue_epilogue)
4336 frame->save_regs_using_mov = true;
4338 frame->save_regs_using_mov = false;
4341 /* Skip return address and saved base pointer. */
4342 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4344 frame->hard_frame_pointer_offset = offset;
4346 /* Do some sanity checking of stack_alignment_needed and
4347 preferred_alignment, since i386 port is the only using those features
4348 that may break easily. */
4350 gcc_assert (!size || stack_alignment_needed);
4351 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4352 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4353 gcc_assert (stack_alignment_needed
4354 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4356 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4357 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4359 /* Register save area */
4360 offset += frame->nregs * UNITS_PER_WORD;
4363 if (ix86_save_varrargs_registers)
4365 offset += X86_64_VARARGS_SIZE;
4366 frame->va_arg_size = X86_64_VARARGS_SIZE;
4369 frame->va_arg_size = 0;
4371 /* Align start of frame for local function. */
4372 frame->padding1 = ((offset + stack_alignment_needed - 1)
4373 & -stack_alignment_needed) - offset;
4375 offset += frame->padding1;
4377 /* Frame pointer points here. */
4378 frame->frame_pointer_offset = offset;
4382 /* Add outgoing arguments area. Can be skipped if we eliminated
4383 all the function calls as dead code.
4384 Skipping is however impossible when function calls alloca. Alloca
4385 expander assumes that last current_function_outgoing_args_size
4386 of stack frame are unused. */
4387 if (ACCUMULATE_OUTGOING_ARGS
4388 && (!current_function_is_leaf || current_function_calls_alloca))
4390 offset += current_function_outgoing_args_size;
4391 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4394 frame->outgoing_arguments_size = 0;
4396 /* Align stack boundary. Only needed if we're calling another function
4398 if (!current_function_is_leaf || current_function_calls_alloca)
4399 frame->padding2 = ((offset + preferred_alignment - 1)
4400 & -preferred_alignment) - offset;
4402 frame->padding2 = 0;
4404 offset += frame->padding2;
4406 /* We've reached end of stack frame. */
4407 frame->stack_pointer_offset = offset;
4409 /* Size prologue needs to allocate. */
4410 frame->to_allocate =
4411 (size + frame->padding1 + frame->padding2
4412 + frame->outgoing_arguments_size + frame->va_arg_size);
4414 if ((!frame->to_allocate && frame->nregs <= 1)
4415 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4416 frame->save_regs_using_mov = false;
4418 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4419 && current_function_is_leaf)
4421 frame->red_zone_size = frame->to_allocate;
4422 if (frame->save_regs_using_mov)
4423 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4424 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4425 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4428 frame->red_zone_size = 0;
4429 frame->to_allocate -= frame->red_zone_size;
4430 frame->stack_pointer_offset -= frame->red_zone_size;
4432 fprintf (stderr, "nregs: %i\n", frame->nregs);
4433 fprintf (stderr, "size: %i\n", size);
4434 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4435 fprintf (stderr, "padding1: %i\n", frame->padding1);
4436 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4437 fprintf (stderr, "padding2: %i\n", frame->padding2);
4438 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4439 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4440 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4441 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4442 frame->hard_frame_pointer_offset);
4443 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4447 /* Emit code to save registers in the prologue. */
4450 ix86_emit_save_regs (void)
4455 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4456 if (ix86_save_reg (regno, true))
4458 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4459 RTX_FRAME_RELATED_P (insn) = 1;
4463 /* Emit code to save registers using MOV insns. First register
4464 is restored from POINTER + OFFSET. */
4466 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4471 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4472 if (ix86_save_reg (regno, true))
4474 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4476 gen_rtx_REG (Pmode, regno));
4477 RTX_FRAME_RELATED_P (insn) = 1;
4478 offset += UNITS_PER_WORD;
4482 /* Expand prologue or epilogue stack adjustment.
4483 The pattern exist to put a dependency on all ebp-based memory accesses.
4484 STYLE should be negative if instructions should be marked as frame related,
4485 zero if %r11 register is live and cannot be freely used and positive
4489 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4494 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4495 else if (x86_64_immediate_operand (offset, DImode))
4496 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4500 /* r11 is used by indirect sibcall return as well, set before the
4501 epilogue and used after the epilogue. ATM indirect sibcall
4502 shouldn't be used together with huge frame sizes in one
4503 function because of the frame_size check in sibcall.c. */
4505 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4506 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4508 RTX_FRAME_RELATED_P (insn) = 1;
4509 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4513 RTX_FRAME_RELATED_P (insn) = 1;
4516 /* Expand the prologue into a bunch of separate insns. */
4519 ix86_expand_prologue (void)
4523 struct ix86_frame frame;
4524 HOST_WIDE_INT allocate;
4526 ix86_compute_frame_layout (&frame);
4528 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4529 slower on all targets. Also sdb doesn't like it. */
4531 if (frame_pointer_needed)
4533 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4534 RTX_FRAME_RELATED_P (insn) = 1;
4536 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4537 RTX_FRAME_RELATED_P (insn) = 1;
4540 allocate = frame.to_allocate;
4542 if (!frame.save_regs_using_mov)
4543 ix86_emit_save_regs ();
4545 allocate += frame.nregs * UNITS_PER_WORD;
4547 /* When using red zone we may start register saving before allocating
4548 the stack frame saving one cycle of the prologue. */
4549 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4550 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4551 : stack_pointer_rtx,
4552 -frame.nregs * UNITS_PER_WORD);
4556 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4557 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4558 GEN_INT (-allocate), -1);
4561 /* Only valid for Win32. */
4562 rtx eax = gen_rtx_REG (SImode, 0);
4563 bool eax_live = ix86_eax_live_at_start_p ();
4566 gcc_assert (!TARGET_64BIT);
4570 emit_insn (gen_push (eax));
4574 emit_move_insn (eax, GEN_INT (allocate));
4576 insn = emit_insn (gen_allocate_stack_worker (eax));
4577 RTX_FRAME_RELATED_P (insn) = 1;
4578 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4579 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4580 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4581 t, REG_NOTES (insn));
4585 if (frame_pointer_needed)
4586 t = plus_constant (hard_frame_pointer_rtx,
4589 - frame.nregs * UNITS_PER_WORD);
4591 t = plus_constant (stack_pointer_rtx, allocate);
4592 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4596 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4598 if (!frame_pointer_needed || !frame.to_allocate)
4599 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4601 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4602 -frame.nregs * UNITS_PER_WORD);
4605 pic_reg_used = false;
4606 if (pic_offset_table_rtx
4607 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4608 || current_function_profile))
4610 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4612 if (alt_pic_reg_used != INVALID_REGNUM)
4613 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4615 pic_reg_used = true;
4620 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4622 /* Even with accurate pre-reload life analysis, we can wind up
4623 deleting all references to the pic register after reload.
4624 Consider if cross-jumping unifies two sides of a branch
4625 controlled by a comparison vs the only read from a global.
4626 In which case, allow the set_got to be deleted, though we're
4627 too late to do anything about the ebx save in the prologue. */
4628 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4631 /* Prevent function calls from be scheduled before the call to mcount.
4632 In the pic_reg_used case, make sure that the got load isn't deleted. */
4633 if (current_function_profile)
4634 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4637 /* Emit code to restore saved registers using MOV insns. First register
4638 is restored from POINTER + OFFSET. */
4640 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4641 int maybe_eh_return)
4644 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4646 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4647 if (ix86_save_reg (regno, maybe_eh_return))
4649 /* Ensure that adjust_address won't be forced to produce pointer
4650 out of range allowed by x86-64 instruction set. */
4651 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4655 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4656 emit_move_insn (r11, GEN_INT (offset));
4657 emit_insn (gen_adddi3 (r11, r11, pointer));
4658 base_address = gen_rtx_MEM (Pmode, r11);
4661 emit_move_insn (gen_rtx_REG (Pmode, regno),
4662 adjust_address (base_address, Pmode, offset));
4663 offset += UNITS_PER_WORD;
4667 /* Restore function stack, frame, and registers. */
4670 ix86_expand_epilogue (int style)
4673 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4674 struct ix86_frame frame;
4675 HOST_WIDE_INT offset;
4677 ix86_compute_frame_layout (&frame);
4679 /* Calculate start of saved registers relative to ebp. Special care
4680 must be taken for the normal return case of a function using
4681 eh_return: the eax and edx registers are marked as saved, but not
4682 restored along this path. */
4683 offset = frame.nregs;
4684 if (current_function_calls_eh_return && style != 2)
4686 offset *= -UNITS_PER_WORD;
4688 /* If we're only restoring one register and sp is not valid then
4689 using a move instruction to restore the register since it's
4690 less work than reloading sp and popping the register.
4692 The default code result in stack adjustment using add/lea instruction,
4693 while this code results in LEAVE instruction (or discrete equivalent),
4694 so it is profitable in some other cases as well. Especially when there
4695 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4696 and there is exactly one register to pop. This heuristic may need some
4697 tuning in future. */
4698 if ((!sp_valid && frame.nregs <= 1)
4699 || (TARGET_EPILOGUE_USING_MOVE
4700 && cfun->machine->use_fast_prologue_epilogue
4701 && (frame.nregs > 1 || frame.to_allocate))
4702 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4703 || (frame_pointer_needed && TARGET_USE_LEAVE
4704 && cfun->machine->use_fast_prologue_epilogue
4705 && frame.nregs == 1)
4706 || current_function_calls_eh_return)
4708 /* Restore registers. We can use ebp or esp to address the memory
4709 locations. If both are available, default to ebp, since offsets
4710 are known to be small. Only exception is esp pointing directly to the
4711 end of block of saved registers, where we may simplify addressing
4714 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4715 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4716 frame.to_allocate, style == 2);
4718 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4719 offset, style == 2);
4721 /* eh_return epilogues need %ecx added to the stack pointer. */
4724 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4726 if (frame_pointer_needed)
4728 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4729 tmp = plus_constant (tmp, UNITS_PER_WORD);
4730 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4732 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4733 emit_move_insn (hard_frame_pointer_rtx, tmp);
4735 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4740 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4741 tmp = plus_constant (tmp, (frame.to_allocate
4742 + frame.nregs * UNITS_PER_WORD));
4743 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4746 else if (!frame_pointer_needed)
4747 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4748 GEN_INT (frame.to_allocate
4749 + frame.nregs * UNITS_PER_WORD),
4751 /* If not an i386, mov & pop is faster than "leave". */
4752 else if (TARGET_USE_LEAVE || optimize_size
4753 || !cfun->machine->use_fast_prologue_epilogue)
4754 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4757 pro_epilogue_adjust_stack (stack_pointer_rtx,
4758 hard_frame_pointer_rtx,
4761 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4763 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4768 /* First step is to deallocate the stack frame so that we can
4769 pop the registers. */
4772 gcc_assert (frame_pointer_needed);
4773 pro_epilogue_adjust_stack (stack_pointer_rtx,
4774 hard_frame_pointer_rtx,
4775 GEN_INT (offset), style);
4777 else if (frame.to_allocate)
4778 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4779 GEN_INT (frame.to_allocate), style);
4781 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4782 if (ix86_save_reg (regno, false))
4785 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4787 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4789 if (frame_pointer_needed)
4791 /* Leave results in shorter dependency chains on CPUs that are
4792 able to grok it fast. */
4793 if (TARGET_USE_LEAVE)
4794 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4795 else if (TARGET_64BIT)
4796 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4798 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4802 /* Sibcall epilogues don't want a return instruction. */
4806 if (current_function_pops_args && current_function_args_size)
4808 rtx popc = GEN_INT (current_function_pops_args);
4810 /* i386 can only pop 64K bytes. If asked to pop more, pop
4811 return address, do explicit add, and jump indirectly to the
4814 if (current_function_pops_args >= 65536)
4816 rtx ecx = gen_rtx_REG (SImode, 2);
4818 /* There is no "pascal" calling convention in 64bit ABI. */
4819 gcc_assert (!TARGET_64BIT);
4821 emit_insn (gen_popsi1 (ecx));
4822 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4823 emit_jump_insn (gen_return_indirect_internal (ecx));
4826 emit_jump_insn (gen_return_pop_internal (popc));
4829 emit_jump_insn (gen_return_internal ());
4832 /* Reset from the function's potential modifications. */
4835 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4836 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4838 if (pic_offset_table_rtx)
4839 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4842 /* Extract the parts of an RTL expression that is a valid memory address
4843 for an instruction. Return 0 if the structure of the address is
4844 grossly off. Return -1 if the address contains ASHIFT, so it is not
4845 strictly valid, but still used for computing length of lea instruction. */
4848 ix86_decompose_address (rtx addr, struct ix86_address *out)
4850 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
4851 rtx base_reg, index_reg;
4852 HOST_WIDE_INT scale = 1;
4853 rtx scale_rtx = NULL_RTX;
4855 enum ix86_address_seg seg = SEG_DEFAULT;
4857 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4859 else if (GET_CODE (addr) == PLUS)
4869 addends[n++] = XEXP (op, 1);
4872 while (GET_CODE (op) == PLUS);
4877 for (i = n; i >= 0; --i)
4880 switch (GET_CODE (op))
4885 index = XEXP (op, 0);
4886 scale_rtx = XEXP (op, 1);
4890 if (XINT (op, 1) == UNSPEC_TP
4891 && TARGET_TLS_DIRECT_SEG_REFS
4892 && seg == SEG_DEFAULT)
4893 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4922 else if (GET_CODE (addr) == MULT)
4924 index = XEXP (addr, 0); /* index*scale */
4925 scale_rtx = XEXP (addr, 1);
4927 else if (GET_CODE (addr) == ASHIFT)
4931 /* We're called for lea too, which implements ashift on occasion. */
4932 index = XEXP (addr, 0);
4933 tmp = XEXP (addr, 1);
4934 if (GET_CODE (tmp) != CONST_INT)
4936 scale = INTVAL (tmp);
4937 if ((unsigned HOST_WIDE_INT) scale > 3)
4943 disp = addr; /* displacement */
4945 /* Extract the integral value of scale. */
4948 if (GET_CODE (scale_rtx) != CONST_INT)
4950 scale = INTVAL (scale_rtx);
4953 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
4954 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
4956 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4957 if (base_reg && index_reg && scale == 1
4958 && (index_reg == arg_pointer_rtx
4959 || index_reg == frame_pointer_rtx
4960 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
4963 tmp = base, base = index, index = tmp;
4964 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
4967 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4968 if ((base_reg == hard_frame_pointer_rtx
4969 || base_reg == frame_pointer_rtx
4970 || base_reg == arg_pointer_rtx) && !disp)
4973 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4974 Avoid this by transforming to [%esi+0]. */
4975 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4976 && base_reg && !index_reg && !disp
4978 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
4981 /* Special case: encode reg+reg instead of reg*2. */
4982 if (!base && index && scale && scale == 2)
4983 base = index, base_reg = index_reg, scale = 1;
4985 /* Special case: scaling cannot be encoded without base or displacement. */
4986 if (!base && !disp && index && scale != 1)
4998 /* Return cost of the memory address x.
4999 For i386, it is better to use a complex address than let gcc copy
5000 the address into a reg and make a new pseudo. But not if the address
5001 requires to two regs - that would mean more pseudos with longer
5004 ix86_address_cost (rtx x)
5006 struct ix86_address parts;
5008 int ok = ix86_decompose_address (x, &parts);
5012 if (parts.base && GET_CODE (parts.base) == SUBREG)
5013 parts.base = SUBREG_REG (parts.base);
5014 if (parts.index && GET_CODE (parts.index) == SUBREG)
5015 parts.index = SUBREG_REG (parts.index);
5017 /* More complex memory references are better. */
5018 if (parts.disp && parts.disp != const0_rtx)
5020 if (parts.seg != SEG_DEFAULT)
5023 /* Attempt to minimize number of registers in the address. */
5025 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5027 && (!REG_P (parts.index)
5028 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5032 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5034 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5035 && parts.base != parts.index)
5038 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5039 since it's predecode logic can't detect the length of instructions
5040 and it degenerates to vector decoded. Increase cost of such
5041 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5042 to split such addresses or even refuse such addresses at all.
5044 Following addressing modes are affected:
5049 The first and last case may be avoidable by explicitly coding the zero in
5050 memory address, but I don't have AMD-K6 machine handy to check this
5054 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5055 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5056 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5062 /* If X is a machine specific address (i.e. a symbol or label being
5063 referenced as a displacement from the GOT implemented using an
5064 UNSPEC), then return the base term. Otherwise return X. */
5067 ix86_find_base_term (rtx x)
5073 if (GET_CODE (x) != CONST)
5076 if (GET_CODE (term) == PLUS
5077 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5078 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5079 term = XEXP (term, 0);
5080 if (GET_CODE (term) != UNSPEC
5081 || XINT (term, 1) != UNSPEC_GOTPCREL)
5084 term = XVECEXP (term, 0, 0);
5086 if (GET_CODE (term) != SYMBOL_REF
5087 && GET_CODE (term) != LABEL_REF)
5093 term = ix86_delegitimize_address (x);
5095 if (GET_CODE (term) != SYMBOL_REF
5096 && GET_CODE (term) != LABEL_REF)
5102 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5103 this is used for to form addresses to local data when -fPIC is in
5107 darwin_local_data_pic (rtx disp)
5109 if (GET_CODE (disp) == MINUS)
5111 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5112 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5113 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5115 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5116 if (! strcmp (sym_name, "<pic base>"))
5124 /* Determine if a given RTX is a valid constant. We already know this
5125 satisfies CONSTANT_P. */
5128 legitimate_constant_p (rtx x)
5130 switch (GET_CODE (x))
5135 if (GET_CODE (x) == PLUS)
5137 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5142 if (TARGET_MACHO && darwin_local_data_pic (x))
5145 /* Only some unspecs are valid as "constants". */
5146 if (GET_CODE (x) == UNSPEC)
5147 switch (XINT (x, 1))
5151 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5153 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5158 /* We must have drilled down to a symbol. */
5159 if (!symbolic_operand (x, Pmode))
5164 /* TLS symbols are never valid. */
5165 if (tls_symbolic_operand (x, Pmode))
5173 /* Otherwise we handle everything else in the move patterns. */
5177 /* Determine if it's legal to put X into the constant pool. This
5178 is not possible for the address of thread-local symbols, which
5179 is checked above. */
5182 ix86_cannot_force_const_mem (rtx x)
5184 return !legitimate_constant_p (x);
5187 /* Determine if a given RTX is a valid constant address. */
5190 constant_address_p (rtx x)
5192 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5195 /* Nonzero if the constant value X is a legitimate general operand
5196 when generating PIC code. It is given that flag_pic is on and
5197 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5200 legitimate_pic_operand_p (rtx x)
5204 switch (GET_CODE (x))
5207 inner = XEXP (x, 0);
5209 /* Only some unspecs are valid as "constants". */
5210 if (GET_CODE (inner) == UNSPEC)
5211 switch (XINT (inner, 1))
5214 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5222 return legitimate_pic_address_disp_p (x);
5229 /* Determine if a given CONST RTX is a valid memory displacement
5233 legitimate_pic_address_disp_p (rtx disp)
5237 /* In 64bit mode we can allow direct addresses of symbols and labels
5238 when they are not dynamic symbols. */
5241 /* TLS references should always be enclosed in UNSPEC. */
5242 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5244 if (GET_CODE (disp) == SYMBOL_REF
5245 && ix86_cmodel == CM_SMALL_PIC
5246 && SYMBOL_REF_LOCAL_P (disp))
5248 if (GET_CODE (disp) == LABEL_REF)
5250 if (GET_CODE (disp) == CONST
5251 && GET_CODE (XEXP (disp, 0)) == PLUS)
5253 rtx op0 = XEXP (XEXP (disp, 0), 0);
5254 rtx op1 = XEXP (XEXP (disp, 0), 1);
5256 /* TLS references should always be enclosed in UNSPEC. */
5257 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5259 if (((GET_CODE (op0) == SYMBOL_REF
5260 && ix86_cmodel == CM_SMALL_PIC
5261 && SYMBOL_REF_LOCAL_P (op0))
5262 || GET_CODE (op0) == LABEL_REF)
5263 && GET_CODE (op1) == CONST_INT
5264 && INTVAL (op1) < 16*1024*1024
5265 && INTVAL (op1) >= -16*1024*1024)
5269 if (GET_CODE (disp) != CONST)
5271 disp = XEXP (disp, 0);
5275 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5276 of GOT tables. We should not need these anyway. */
5277 if (GET_CODE (disp) != UNSPEC
5278 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5281 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5282 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5288 if (GET_CODE (disp) == PLUS)
5290 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5292 disp = XEXP (disp, 0);
5296 if (TARGET_MACHO && darwin_local_data_pic (disp))
5299 if (GET_CODE (disp) != UNSPEC)
5302 switch (XINT (disp, 1))
5307 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5309 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5310 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5311 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5313 case UNSPEC_GOTTPOFF:
5314 case UNSPEC_GOTNTPOFF:
5315 case UNSPEC_INDNTPOFF:
5318 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5320 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5322 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5328 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5329 memory address for an instruction. The MODE argument is the machine mode
5330 for the MEM expression that wants to use this address.
5332 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5333 convert common non-canonical forms to canonical form so that they will
5337 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5339 struct ix86_address parts;
5340 rtx base, index, disp;
5341 HOST_WIDE_INT scale;
5342 const char *reason = NULL;
5343 rtx reason_rtx = NULL_RTX;
5345 if (TARGET_DEBUG_ADDR)
5348 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5349 GET_MODE_NAME (mode), strict);
5353 if (ix86_decompose_address (addr, &parts) <= 0)
5355 reason = "decomposition failed";
5360 index = parts.index;
5362 scale = parts.scale;
5364 /* Validate base register.
5366 Don't allow SUBREG's that span more than a word here. It can lead to spill
5367 failures when the base is one word out of a two word structure, which is
5368 represented internally as a DImode int. */
5377 else if (GET_CODE (base) == SUBREG
5378 && REG_P (SUBREG_REG (base))
5379 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5381 reg = SUBREG_REG (base);
5384 reason = "base is not a register";
5388 if (GET_MODE (base) != Pmode)
5390 reason = "base is not in Pmode";
5394 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5395 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5397 reason = "base is not valid";
5402 /* Validate index register.
5404 Don't allow SUBREG's that span more than a word here -- same as above. */
5413 else if (GET_CODE (index) == SUBREG
5414 && REG_P (SUBREG_REG (index))
5415 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5417 reg = SUBREG_REG (index);
5420 reason = "index is not a register";
5424 if (GET_MODE (index) != Pmode)
5426 reason = "index is not in Pmode";
5430 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5431 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5433 reason = "index is not valid";
5438 /* Validate scale factor. */
5441 reason_rtx = GEN_INT (scale);
5444 reason = "scale without index";
5448 if (scale != 2 && scale != 4 && scale != 8)
5450 reason = "scale is not a valid multiplier";
5455 /* Validate displacement. */
5460 if (GET_CODE (disp) == CONST
5461 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5462 switch (XINT (XEXP (disp, 0), 1))
5466 case UNSPEC_GOTPCREL:
5467 gcc_assert (flag_pic);
5468 goto is_legitimate_pic;
5470 case UNSPEC_GOTTPOFF:
5471 case UNSPEC_GOTNTPOFF:
5472 case UNSPEC_INDNTPOFF:
5478 reason = "invalid address unspec";
5482 else if (flag_pic && (SYMBOLIC_CONST (disp)
5484 && !machopic_operand_p (disp)
5489 if (TARGET_64BIT && (index || base))
5491 /* foo@dtpoff(%rX) is ok. */
5492 if (GET_CODE (disp) != CONST
5493 || GET_CODE (XEXP (disp, 0)) != PLUS
5494 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5495 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5496 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5497 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5499 reason = "non-constant pic memory reference";
5503 else if (! legitimate_pic_address_disp_p (disp))
5505 reason = "displacement is an invalid pic construct";
5509 /* This code used to verify that a symbolic pic displacement
5510 includes the pic_offset_table_rtx register.
5512 While this is good idea, unfortunately these constructs may
5513 be created by "adds using lea" optimization for incorrect
5522 This code is nonsensical, but results in addressing
5523 GOT table with pic_offset_table_rtx base. We can't
5524 just refuse it easily, since it gets matched by
5525 "addsi3" pattern, that later gets split to lea in the
5526 case output register differs from input. While this
5527 can be handled by separate addsi pattern for this case
5528 that never results in lea, this seems to be easier and
5529 correct fix for crash to disable this test. */
5531 else if (GET_CODE (disp) != LABEL_REF
5532 && GET_CODE (disp) != CONST_INT
5533 && (GET_CODE (disp) != CONST
5534 || !legitimate_constant_p (disp))
5535 && (GET_CODE (disp) != SYMBOL_REF
5536 || !legitimate_constant_p (disp)))
5538 reason = "displacement is not constant";
5541 else if (TARGET_64BIT
5542 && !x86_64_immediate_operand (disp, VOIDmode))
5544 reason = "displacement is out of range";
5549 /* Everything looks valid. */
5550 if (TARGET_DEBUG_ADDR)
5551 fprintf (stderr, "Success.\n");
5555 if (TARGET_DEBUG_ADDR)
5557 fprintf (stderr, "Error: %s\n", reason);
5558 debug_rtx (reason_rtx);
5563 /* Return an unique alias set for the GOT. */
5565 static HOST_WIDE_INT
5566 ix86_GOT_alias_set (void)
5568 static HOST_WIDE_INT set = -1;
5570 set = new_alias_set ();
5574 /* Return a legitimate reference for ORIG (an address) using the
5575 register REG. If REG is 0, a new pseudo is generated.
5577 There are two types of references that must be handled:
5579 1. Global data references must load the address from the GOT, via
5580 the PIC reg. An insn is emitted to do this load, and the reg is
5583 2. Static data references, constant pool addresses, and code labels
5584 compute the address as an offset from the GOT, whose base is in
5585 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5586 differentiate them from global data objects. The returned
5587 address is the PIC reg + an unspec constant.
5589 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5590 reg also appears in the address. */
5593 legitimize_pic_address (rtx orig, rtx reg)
5601 reg = gen_reg_rtx (Pmode);
5602 /* Use the generic Mach-O PIC machinery. */
5603 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5606 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5608 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5610 /* This symbol may be referenced via a displacement from the PIC
5611 base address (@GOTOFF). */
5613 if (reload_in_progress)
5614 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5615 if (GET_CODE (addr) == CONST)
5616 addr = XEXP (addr, 0);
5617 if (GET_CODE (addr) == PLUS)
5619 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5620 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5623 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5624 new = gen_rtx_CONST (Pmode, new);
5625 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5629 emit_move_insn (reg, new);
5633 else if (GET_CODE (addr) == SYMBOL_REF)
5637 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5638 new = gen_rtx_CONST (Pmode, new);
5639 new = gen_const_mem (Pmode, new);
5640 set_mem_alias_set (new, ix86_GOT_alias_set ());
5643 reg = gen_reg_rtx (Pmode);
5644 /* Use directly gen_movsi, otherwise the address is loaded
5645 into register for CSE. We don't want to CSE this addresses,
5646 instead we CSE addresses from the GOT table, so skip this. */
5647 emit_insn (gen_movsi (reg, new));
5652 /* This symbol must be referenced via a load from the
5653 Global Offset Table (@GOT). */
5655 if (reload_in_progress)
5656 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5657 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5658 new = gen_rtx_CONST (Pmode, new);
5659 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5660 new = gen_const_mem (Pmode, new);
5661 set_mem_alias_set (new, ix86_GOT_alias_set ());
5664 reg = gen_reg_rtx (Pmode);
5665 emit_move_insn (reg, new);
5671 if (GET_CODE (addr) == CONST)
5673 addr = XEXP (addr, 0);
5675 /* We must match stuff we generate before. Assume the only
5676 unspecs that can get here are ours. Not that we could do
5677 anything with them anyway.... */
5678 if (GET_CODE (addr) == UNSPEC
5679 || (GET_CODE (addr) == PLUS
5680 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5682 gcc_assert (GET_CODE (addr) == PLUS);
5684 if (GET_CODE (addr) == PLUS)
5686 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5688 /* Check first to see if this is a constant offset from a @GOTOFF
5689 symbol reference. */
5690 if (local_symbolic_operand (op0, Pmode)
5691 && GET_CODE (op1) == CONST_INT)
5695 if (reload_in_progress)
5696 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5697 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5699 new = gen_rtx_PLUS (Pmode, new, op1);
5700 new = gen_rtx_CONST (Pmode, new);
5701 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5705 emit_move_insn (reg, new);
5711 if (INTVAL (op1) < -16*1024*1024
5712 || INTVAL (op1) >= 16*1024*1024)
5713 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5718 base = legitimize_pic_address (XEXP (addr, 0), reg);
5719 new = legitimize_pic_address (XEXP (addr, 1),
5720 base == reg ? NULL_RTX : reg);
5722 if (GET_CODE (new) == CONST_INT)
5723 new = plus_constant (base, INTVAL (new));
5726 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5728 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5729 new = XEXP (new, 1);
5731 new = gen_rtx_PLUS (Pmode, base, new);
5739 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5742 get_thread_pointer (int to_reg)
5746 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5750 reg = gen_reg_rtx (Pmode);
5751 insn = gen_rtx_SET (VOIDmode, reg, tp);
5752 insn = emit_insn (insn);
5757 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5758 false if we expect this to be used for a memory address and true if
5759 we expect to load the address into a register. */
5762 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5764 rtx dest, base, off, pic;
5769 case TLS_MODEL_GLOBAL_DYNAMIC:
5770 dest = gen_reg_rtx (Pmode);
5773 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5776 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5777 insns = get_insns ();
5780 emit_libcall_block (insns, dest, rax, x);
5783 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5786 case TLS_MODEL_LOCAL_DYNAMIC:
5787 base = gen_reg_rtx (Pmode);
5790 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5793 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5794 insns = get_insns ();
5797 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5798 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5799 emit_libcall_block (insns, base, rax, note);
5802 emit_insn (gen_tls_local_dynamic_base_32 (base));
5804 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5805 off = gen_rtx_CONST (Pmode, off);
5807 return gen_rtx_PLUS (Pmode, base, off);
5809 case TLS_MODEL_INITIAL_EXEC:
5813 type = UNSPEC_GOTNTPOFF;
5817 if (reload_in_progress)
5818 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5819 pic = pic_offset_table_rtx;
5820 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5822 else if (!TARGET_GNU_TLS)
5824 pic = gen_reg_rtx (Pmode);
5825 emit_insn (gen_set_got (pic));
5826 type = UNSPEC_GOTTPOFF;
5831 type = UNSPEC_INDNTPOFF;
5834 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5835 off = gen_rtx_CONST (Pmode, off);
5837 off = gen_rtx_PLUS (Pmode, pic, off);
5838 off = gen_const_mem (Pmode, off);
5839 set_mem_alias_set (off, ix86_GOT_alias_set ());
5841 if (TARGET_64BIT || TARGET_GNU_TLS)
5843 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5844 off = force_reg (Pmode, off);
5845 return gen_rtx_PLUS (Pmode, base, off);
5849 base = get_thread_pointer (true);
5850 dest = gen_reg_rtx (Pmode);
5851 emit_insn (gen_subsi3 (dest, base, off));
5855 case TLS_MODEL_LOCAL_EXEC:
5856 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5857 (TARGET_64BIT || TARGET_GNU_TLS)
5858 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5859 off = gen_rtx_CONST (Pmode, off);
5861 if (TARGET_64BIT || TARGET_GNU_TLS)
5863 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5864 return gen_rtx_PLUS (Pmode, base, off);
5868 base = get_thread_pointer (true);
5869 dest = gen_reg_rtx (Pmode);
5870 emit_insn (gen_subsi3 (dest, base, off));
5881 /* Try machine-dependent ways of modifying an illegitimate address
5882 to be legitimate. If we find one, return the new, valid address.
5883 This macro is used in only one place: `memory_address' in explow.c.
5885 OLDX is the address as it was before break_out_memory_refs was called.
5886 In some cases it is useful to look at this to decide what needs to be done.
5888 MODE and WIN are passed so that this macro can use
5889 GO_IF_LEGITIMATE_ADDRESS.
5891 It is always safe for this macro to do nothing. It exists to recognize
5892 opportunities to optimize the output.
5894 For the 80386, we handle X+REG by loading X into a register R and
5895 using R+REG. R will go in a general reg and indexing will be used.
5896 However, if REG is a broken-out memory address or multiplication,
5897 nothing needs to be done because REG can certainly go in a general reg.
5899 When -fpic is used, special handling is needed for symbolic references.
5900 See comments by legitimize_pic_address in i386.c for details. */
5903 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5908 if (TARGET_DEBUG_ADDR)
5910 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5911 GET_MODE_NAME (mode));
5915 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5917 return legitimize_tls_address (x, log, false);
5918 if (GET_CODE (x) == CONST
5919 && GET_CODE (XEXP (x, 0)) == PLUS
5920 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5921 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5923 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5924 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5927 if (flag_pic && SYMBOLIC_CONST (x))
5928 return legitimize_pic_address (x, 0);
5930 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5931 if (GET_CODE (x) == ASHIFT
5932 && GET_CODE (XEXP (x, 1)) == CONST_INT
5933 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
5936 log = INTVAL (XEXP (x, 1));
5937 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5938 GEN_INT (1 << log));
5941 if (GET_CODE (x) == PLUS)
5943 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5945 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5946 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5947 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
5950 log = INTVAL (XEXP (XEXP (x, 0), 1));
5951 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5952 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5953 GEN_INT (1 << log));
5956 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5957 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5958 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
5961 log = INTVAL (XEXP (XEXP (x, 1), 1));
5962 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5963 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5964 GEN_INT (1 << log));
5967 /* Put multiply first if it isn't already. */
5968 if (GET_CODE (XEXP (x, 1)) == MULT)
5970 rtx tmp = XEXP (x, 0);
5971 XEXP (x, 0) = XEXP (x, 1);
5976 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5977 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5978 created by virtual register instantiation, register elimination, and
5979 similar optimizations. */
5980 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5983 x = gen_rtx_PLUS (Pmode,
5984 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5985 XEXP (XEXP (x, 1), 0)),
5986 XEXP (XEXP (x, 1), 1));
5990 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5991 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5992 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5993 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5994 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5995 && CONSTANT_P (XEXP (x, 1)))
5998 rtx other = NULL_RTX;
6000 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6002 constant = XEXP (x, 1);
6003 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6005 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6007 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6008 other = XEXP (x, 1);
6016 x = gen_rtx_PLUS (Pmode,
6017 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6018 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6019 plus_constant (other, INTVAL (constant)));
6023 if (changed && legitimate_address_p (mode, x, FALSE))
6026 if (GET_CODE (XEXP (x, 0)) == MULT)
6029 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6032 if (GET_CODE (XEXP (x, 1)) == MULT)
6035 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6039 && GET_CODE (XEXP (x, 1)) == REG
6040 && GET_CODE (XEXP (x, 0)) == REG)
6043 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6046 x = legitimize_pic_address (x, 0);
6049 if (changed && legitimate_address_p (mode, x, FALSE))
6052 if (GET_CODE (XEXP (x, 0)) == REG)
6054 rtx temp = gen_reg_rtx (Pmode);
6055 rtx val = force_operand (XEXP (x, 1), temp);
6057 emit_move_insn (temp, val);
6063 else if (GET_CODE (XEXP (x, 1)) == REG)
6065 rtx temp = gen_reg_rtx (Pmode);
6066 rtx val = force_operand (XEXP (x, 0), temp);
6068 emit_move_insn (temp, val);
6078 /* Print an integer constant expression in assembler syntax. Addition
6079 and subtraction are the only arithmetic that may appear in these
6080 expressions. FILE is the stdio stream to write to, X is the rtx, and
6081 CODE is the operand print code from the output string. */
6084 output_pic_addr_const (FILE *file, rtx x, int code)
6088 switch (GET_CODE (x))
6091 gcc_assert (flag_pic);
6096 assemble_name (file, XSTR (x, 0));
6097 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6098 fputs ("@PLT", file);
6105 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6106 assemble_name (asm_out_file, buf);
6110 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6114 /* This used to output parentheses around the expression,
6115 but that does not work on the 386 (either ATT or BSD assembler). */
6116 output_pic_addr_const (file, XEXP (x, 0), code);
6120 if (GET_MODE (x) == VOIDmode)
6122 /* We can use %d if the number is <32 bits and positive. */
6123 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6124 fprintf (file, "0x%lx%08lx",
6125 (unsigned long) CONST_DOUBLE_HIGH (x),
6126 (unsigned long) CONST_DOUBLE_LOW (x));
6128 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6131 /* We can't handle floating point constants;
6132 PRINT_OPERAND must handle them. */
6133 output_operand_lossage ("floating constant misused");
6137 /* Some assemblers need integer constants to appear first. */
6138 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6140 output_pic_addr_const (file, XEXP (x, 0), code);
6142 output_pic_addr_const (file, XEXP (x, 1), code);
6146 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6147 output_pic_addr_const (file, XEXP (x, 1), code);
6149 output_pic_addr_const (file, XEXP (x, 0), code);
6155 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6156 output_pic_addr_const (file, XEXP (x, 0), code);
6158 output_pic_addr_const (file, XEXP (x, 1), code);
6160 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6164 gcc_assert (XVECLEN (x, 0) == 1);
6165 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6166 switch (XINT (x, 1))
6169 fputs ("@GOT", file);
6172 fputs ("@GOTOFF", file);
6174 case UNSPEC_GOTPCREL:
6175 fputs ("@GOTPCREL(%rip)", file);
6177 case UNSPEC_GOTTPOFF:
6178 /* FIXME: This might be @TPOFF in Sun ld too. */
6179 fputs ("@GOTTPOFF", file);
6182 fputs ("@TPOFF", file);
6186 fputs ("@TPOFF", file);
6188 fputs ("@NTPOFF", file);
6191 fputs ("@DTPOFF", file);
6193 case UNSPEC_GOTNTPOFF:
6195 fputs ("@GOTTPOFF(%rip)", file);
6197 fputs ("@GOTNTPOFF", file);
6199 case UNSPEC_INDNTPOFF:
6200 fputs ("@INDNTPOFF", file);
6203 output_operand_lossage ("invalid UNSPEC as operand");
6209 output_operand_lossage ("invalid expression as operand");
6213 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6214 We need to emit DTP-relative relocations. */
6217 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6219 fputs (ASM_LONG, file);
6220 output_addr_const (file, x);
6221 fputs ("@DTPOFF", file);
6227 fputs (", 0", file);
6234 /* In the name of slightly smaller debug output, and to cater to
6235 general assembler lossage, recognize PIC+GOTOFF and turn it back
6236 into a direct symbol reference. */
6239 ix86_delegitimize_address (rtx orig_x)
6243 if (GET_CODE (x) == MEM)
6248 if (GET_CODE (x) != CONST
6249 || GET_CODE (XEXP (x, 0)) != UNSPEC
6250 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6251 || GET_CODE (orig_x) != MEM)
6253 return XVECEXP (XEXP (x, 0), 0, 0);
6256 if (GET_CODE (x) != PLUS
6257 || GET_CODE (XEXP (x, 1)) != CONST)
6260 if (GET_CODE (XEXP (x, 0)) == REG
6261 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6262 /* %ebx + GOT/GOTOFF */
6264 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6266 /* %ebx + %reg * scale + GOT/GOTOFF */
6268 if (GET_CODE (XEXP (y, 0)) == REG
6269 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6271 else if (GET_CODE (XEXP (y, 1)) == REG
6272 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6276 if (GET_CODE (y) != REG
6277 && GET_CODE (y) != MULT
6278 && GET_CODE (y) != ASHIFT)
6284 x = XEXP (XEXP (x, 1), 0);
6285 if (GET_CODE (x) == UNSPEC
6286 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6287 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6290 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6291 return XVECEXP (x, 0, 0);
6294 if (GET_CODE (x) == PLUS
6295 && GET_CODE (XEXP (x, 0)) == UNSPEC
6296 && GET_CODE (XEXP (x, 1)) == CONST_INT
6297 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6298 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6299 && GET_CODE (orig_x) != MEM)))
6301 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6303 return gen_rtx_PLUS (Pmode, y, x);
6311 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6316 if (mode == CCFPmode || mode == CCFPUmode)
6318 enum rtx_code second_code, bypass_code;
6319 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6320 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
6321 code = ix86_fp_compare_code_to_integer (code);
6325 code = reverse_condition (code);
6336 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
6340 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6341 Those same assemblers have the same but opposite lossage on cmov. */
6342 gcc_assert (mode == CCmode);
6343 suffix = fp ? "nbe" : "a";
6363 gcc_assert (mode == CCmode);
6385 gcc_assert (mode == CCmode);
6386 suffix = fp ? "nb" : "ae";
6389 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
6393 gcc_assert (mode == CCmode);
6397 suffix = fp ? "u" : "p";
6400 suffix = fp ? "nu" : "np";
6405 fputs (suffix, file);
6408 /* Print the name of register X to FILE based on its machine mode and number.
6409 If CODE is 'w', pretend the mode is HImode.
6410 If CODE is 'b', pretend the mode is QImode.
6411 If CODE is 'k', pretend the mode is SImode.
6412 If CODE is 'q', pretend the mode is DImode.
6413 If CODE is 'h', pretend the reg is the 'high' byte register.
6414 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6417 print_reg (rtx x, int code, FILE *file)
6419 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
6420 && REGNO (x) != FRAME_POINTER_REGNUM
6421 && REGNO (x) != FLAGS_REG
6422 && REGNO (x) != FPSR_REG);
6424 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6427 if (code == 'w' || MMX_REG_P (x))
6429 else if (code == 'b')
6431 else if (code == 'k')
6433 else if (code == 'q')
6435 else if (code == 'y')
6437 else if (code == 'h')
6440 code = GET_MODE_SIZE (GET_MODE (x));
6442 /* Irritatingly, AMD extended registers use different naming convention
6443 from the normal registers. */
6444 if (REX_INT_REG_P (x))
6446 gcc_assert (TARGET_64BIT);
6450 error ("extended registers have no high halves");
6453 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6456 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6459 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6462 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6465 error ("unsupported operand size for extended register");
6473 if (STACK_TOP_P (x))
6475 fputs ("st(0)", file);
6482 if (! ANY_FP_REG_P (x))
6483 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6488 fputs (hi_reg_name[REGNO (x)], file);
6491 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6493 fputs (qi_reg_name[REGNO (x)], file);
6496 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6498 fputs (qi_high_reg_name[REGNO (x)], file);
6505 /* Locate some local-dynamic symbol still in use by this function
6506 so that we can print its name in some tls_local_dynamic_base
6510 get_some_local_dynamic_name (void)
6514 if (cfun->machine->some_ld_name)
6515 return cfun->machine->some_ld_name;
6517 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6519 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6520 return cfun->machine->some_ld_name;
6526 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6530 if (GET_CODE (x) == SYMBOL_REF
6531 && local_dynamic_symbolic_operand (x, Pmode))
6533 cfun->machine->some_ld_name = XSTR (x, 0);
6541 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6542 C -- print opcode suffix for set/cmov insn.
6543 c -- like C, but print reversed condition
6544 F,f -- likewise, but for floating-point.
6545 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6547 R -- print the prefix for register names.
6548 z -- print the opcode suffix for the size of the current operand.
6549 * -- print a star (in certain assembler syntax)
6550 A -- print an absolute memory reference.
6551 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6552 s -- print a shift double count, followed by the assemblers argument
6554 b -- print the QImode name of the register for the indicated operand.
6555 %b0 would print %al if operands[0] is reg 0.
6556 w -- likewise, print the HImode name of the register.
6557 k -- likewise, print the SImode name of the register.
6558 q -- likewise, print the DImode name of the register.
6559 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6560 y -- print "st(0)" instead of "st" as a register.
6561 D -- print condition for SSE cmp instruction.
6562 P -- if PIC, print an @PLT suffix.
6563 X -- don't print any sort of PIC '@' suffix for a symbol.
6564 & -- print some in-use local-dynamic symbol name.
6565 H -- print a memory address offset by 8; used for sse high-parts
6569 print_operand (FILE *file, rtx x, int code)
6576 if (ASSEMBLER_DIALECT == ASM_ATT)
6581 assemble_name (file, get_some_local_dynamic_name ());
6585 switch (ASSEMBLER_DIALECT)
6592 /* Intel syntax. For absolute addresses, registers should not
6593 be surrounded by braces. */
6594 if (GET_CODE (x) != REG)
6597 PRINT_OPERAND (file, x, 0);
6607 PRINT_OPERAND (file, x, 0);
6612 if (ASSEMBLER_DIALECT == ASM_ATT)
6617 if (ASSEMBLER_DIALECT == ASM_ATT)
6622 if (ASSEMBLER_DIALECT == ASM_ATT)
6627 if (ASSEMBLER_DIALECT == ASM_ATT)
6632 if (ASSEMBLER_DIALECT == ASM_ATT)
6637 if (ASSEMBLER_DIALECT == ASM_ATT)
6642 /* 387 opcodes don't get size suffixes if the operands are
6644 if (STACK_REG_P (x))
6647 /* Likewise if using Intel opcodes. */
6648 if (ASSEMBLER_DIALECT == ASM_INTEL)
6651 /* This is the size of op from size of operand. */
6652 switch (GET_MODE_SIZE (GET_MODE (x)))
6655 #ifdef HAVE_GAS_FILDS_FISTS
6661 if (GET_MODE (x) == SFmode)
6676 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6678 #ifdef GAS_MNEMONICS
6704 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6706 PRINT_OPERAND (file, x, 0);
6712 /* Little bit of braindamage here. The SSE compare instructions
6713 does use completely different names for the comparisons that the
6714 fp conditional moves. */
6715 switch (GET_CODE (x))
6730 fputs ("unord", file);
6734 fputs ("neq", file);
6738 fputs ("nlt", file);
6742 fputs ("nle", file);
6745 fputs ("ord", file);
6752 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6753 if (ASSEMBLER_DIALECT == ASM_ATT)
6755 switch (GET_MODE (x))
6757 case HImode: putc ('w', file); break;
6759 case SFmode: putc ('l', file); break;
6761 case DFmode: putc ('q', file); break;
6762 default: gcc_unreachable ();
6769 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6772 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6773 if (ASSEMBLER_DIALECT == ASM_ATT)
6776 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6779 /* Like above, but reverse condition */
6781 /* Check to see if argument to %c is really a constant
6782 and not a condition code which needs to be reversed. */
6783 if (!COMPARISON_P (x))
6785 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6788 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6791 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6792 if (ASSEMBLER_DIALECT == ASM_ATT)
6795 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6799 /* It doesn't actually matter what mode we use here, as we're
6800 only going to use this for printing. */
6801 x = adjust_address_nv (x, DImode, 8);
6808 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6811 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6814 int pred_val = INTVAL (XEXP (x, 0));
6816 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6817 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6819 int taken = pred_val > REG_BR_PROB_BASE / 2;
6820 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6822 /* Emit hints only in the case default branch prediction
6823 heuristics would fail. */
6824 if (taken != cputaken)
6826 /* We use 3e (DS) prefix for taken branches and
6827 2e (CS) prefix for not taken branches. */
6829 fputs ("ds ; ", file);
6831 fputs ("cs ; ", file);
6838 output_operand_lossage ("invalid operand code '%c'", code);
6842 if (GET_CODE (x) == REG)
6843 print_reg (x, code, file);
6845 else if (GET_CODE (x) == MEM)
6847 /* No `byte ptr' prefix for call instructions. */
6848 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6851 switch (GET_MODE_SIZE (GET_MODE (x)))
6853 case 1: size = "BYTE"; break;
6854 case 2: size = "WORD"; break;
6855 case 4: size = "DWORD"; break;
6856 case 8: size = "QWORD"; break;
6857 case 12: size = "XWORD"; break;
6858 case 16: size = "XMMWORD"; break;
6863 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6866 else if (code == 'w')
6868 else if (code == 'k')
6872 fputs (" PTR ", file);
6876 /* Avoid (%rip) for call operands. */
6877 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6878 && GET_CODE (x) != CONST_INT)
6879 output_addr_const (file, x);
6880 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6881 output_operand_lossage ("invalid constraints for operand");
6886 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6891 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6892 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6894 if (ASSEMBLER_DIALECT == ASM_ATT)
6896 fprintf (file, "0x%08lx", l);
6899 /* These float cases don't actually occur as immediate operands. */
6900 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6904 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6905 fprintf (file, "%s", dstr);
6908 else if (GET_CODE (x) == CONST_DOUBLE
6909 && GET_MODE (x) == XFmode)
6913 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6914 fprintf (file, "%s", dstr);
6919 /* We have patterns that allow zero sets of memory, for instance.
6920 In 64-bit mode, we should probably support all 8-byte vectors,
6921 since we can in fact encode that into an immediate. */
6922 if (GET_CODE (x) == CONST_VECTOR)
6924 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
6930 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6932 if (ASSEMBLER_DIALECT == ASM_ATT)
6935 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6936 || GET_CODE (x) == LABEL_REF)
6938 if (ASSEMBLER_DIALECT == ASM_ATT)
6941 fputs ("OFFSET FLAT:", file);
6944 if (GET_CODE (x) == CONST_INT)
6945 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6947 output_pic_addr_const (file, x, code);
6949 output_addr_const (file, x);
6953 /* Print a memory operand whose address is ADDR. */
6956 print_operand_address (FILE *file, rtx addr)
6958 struct ix86_address parts;
6959 rtx base, index, disp;
6961 int ok = ix86_decompose_address (addr, &parts);
6966 index = parts.index;
6968 scale = parts.scale;
6976 if (USER_LABEL_PREFIX[0] == 0)
6978 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6984 if (!base && !index)
6986 /* Displacement only requires special attention. */
6988 if (GET_CODE (disp) == CONST_INT)
6990 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6992 if (USER_LABEL_PREFIX[0] == 0)
6994 fputs ("ds:", file);
6996 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6999 output_pic_addr_const (file, disp, 0);
7001 output_addr_const (file, disp);
7003 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7005 && ((GET_CODE (disp) == SYMBOL_REF
7006 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7007 || GET_CODE (disp) == LABEL_REF
7008 || (GET_CODE (disp) == CONST
7009 && GET_CODE (XEXP (disp, 0)) == PLUS
7010 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7011 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7012 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7013 fputs ("(%rip)", file);
7017 if (ASSEMBLER_DIALECT == ASM_ATT)
7022 output_pic_addr_const (file, disp, 0);
7023 else if (GET_CODE (disp) == LABEL_REF)
7024 output_asm_label (disp);
7026 output_addr_const (file, disp);
7031 print_reg (base, 0, file);
7035 print_reg (index, 0, file);
7037 fprintf (file, ",%d", scale);
7043 rtx offset = NULL_RTX;
7047 /* Pull out the offset of a symbol; print any symbol itself. */
7048 if (GET_CODE (disp) == CONST
7049 && GET_CODE (XEXP (disp, 0)) == PLUS
7050 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7052 offset = XEXP (XEXP (disp, 0), 1);
7053 disp = gen_rtx_CONST (VOIDmode,
7054 XEXP (XEXP (disp, 0), 0));
7058 output_pic_addr_const (file, disp, 0);
7059 else if (GET_CODE (disp) == LABEL_REF)
7060 output_asm_label (disp);
7061 else if (GET_CODE (disp) == CONST_INT)
7064 output_addr_const (file, disp);
7070 print_reg (base, 0, file);
7073 if (INTVAL (offset) >= 0)
7075 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7079 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7086 print_reg (index, 0, file);
7088 fprintf (file, "*%d", scale);
7096 output_addr_const_extra (FILE *file, rtx x)
7100 if (GET_CODE (x) != UNSPEC)
7103 op = XVECEXP (x, 0, 0);
7104 switch (XINT (x, 1))
7106 case UNSPEC_GOTTPOFF:
7107 output_addr_const (file, op);
7108 /* FIXME: This might be @TPOFF in Sun ld. */
7109 fputs ("@GOTTPOFF", file);
7112 output_addr_const (file, op);
7113 fputs ("@TPOFF", file);
7116 output_addr_const (file, op);
7118 fputs ("@TPOFF", file);
7120 fputs ("@NTPOFF", file);
7123 output_addr_const (file, op);
7124 fputs ("@DTPOFF", file);
7126 case UNSPEC_GOTNTPOFF:
7127 output_addr_const (file, op);
7129 fputs ("@GOTTPOFF(%rip)", file);
7131 fputs ("@GOTNTPOFF", file);
7133 case UNSPEC_INDNTPOFF:
7134 output_addr_const (file, op);
7135 fputs ("@INDNTPOFF", file);
7145 /* Split one or more DImode RTL references into pairs of SImode
7146 references. The RTL can be REG, offsettable MEM, integer constant, or
7147 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7148 split and "num" is its length. lo_half and hi_half are output arrays
7149 that parallel "operands". */
7152 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7156 rtx op = operands[num];
7158 /* simplify_subreg refuse to split volatile memory addresses,
7159 but we still have to handle it. */
7160 if (GET_CODE (op) == MEM)
7162 lo_half[num] = adjust_address (op, SImode, 0);
7163 hi_half[num] = adjust_address (op, SImode, 4);
7167 lo_half[num] = simplify_gen_subreg (SImode, op,
7168 GET_MODE (op) == VOIDmode
7169 ? DImode : GET_MODE (op), 0);
7170 hi_half[num] = simplify_gen_subreg (SImode, op,
7171 GET_MODE (op) == VOIDmode
7172 ? DImode : GET_MODE (op), 4);
7176 /* Split one or more TImode RTL references into pairs of SImode
7177 references. The RTL can be REG, offsettable MEM, integer constant, or
7178 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7179 split and "num" is its length. lo_half and hi_half are output arrays
7180 that parallel "operands". */
7183 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7187 rtx op = operands[num];
7189 /* simplify_subreg refuse to split volatile memory addresses, but we
7190 still have to handle it. */
7191 if (GET_CODE (op) == MEM)
7193 lo_half[num] = adjust_address (op, DImode, 0);
7194 hi_half[num] = adjust_address (op, DImode, 8);
7198 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7199 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7204 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7205 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7206 is the expression of the binary operation. The output may either be
7207 emitted here, or returned to the caller, like all output_* functions.
7209 There is no guarantee that the operands are the same mode, as they
7210 might be within FLOAT or FLOAT_EXTEND expressions. */
7212 #ifndef SYSV386_COMPAT
7213 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7214 wants to fix the assemblers because that causes incompatibility
7215 with gcc. No-one wants to fix gcc because that causes
7216 incompatibility with assemblers... You can use the option of
7217 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7218 #define SYSV386_COMPAT 1
7222 output_387_binary_op (rtx insn, rtx *operands)
7224 static char buf[30];
7227 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7229 #ifdef ENABLE_CHECKING
7230 /* Even if we do not want to check the inputs, this documents input
7231 constraints. Which helps in understanding the following code. */
7232 if (STACK_REG_P (operands[0])
7233 && ((REG_P (operands[1])
7234 && REGNO (operands[0]) == REGNO (operands[1])
7235 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7236 || (REG_P (operands[2])
7237 && REGNO (operands[0]) == REGNO (operands[2])
7238 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7239 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7242 gcc_assert (is_sse);
7245 switch (GET_CODE (operands[3]))
7248 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7249 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7257 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7258 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7266 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7267 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7275 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7276 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7290 if (GET_MODE (operands[0]) == SFmode)
7291 strcat (buf, "ss\t{%2, %0|%0, %2}");
7293 strcat (buf, "sd\t{%2, %0|%0, %2}");
7298 switch (GET_CODE (operands[3]))
7302 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7304 rtx temp = operands[2];
7305 operands[2] = operands[1];
7309 /* know operands[0] == operands[1]. */
7311 if (GET_CODE (operands[2]) == MEM)
7317 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7319 if (STACK_TOP_P (operands[0]))
7320 /* How is it that we are storing to a dead operand[2]?
7321 Well, presumably operands[1] is dead too. We can't
7322 store the result to st(0) as st(0) gets popped on this
7323 instruction. Instead store to operands[2] (which I
7324 think has to be st(1)). st(1) will be popped later.
7325 gcc <= 2.8.1 didn't have this check and generated
7326 assembly code that the Unixware assembler rejected. */
7327 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7329 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7333 if (STACK_TOP_P (operands[0]))
7334 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7336 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7341 if (GET_CODE (operands[1]) == MEM)
7347 if (GET_CODE (operands[2]) == MEM)
7353 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7356 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7357 derived assemblers, confusingly reverse the direction of
7358 the operation for fsub{r} and fdiv{r} when the
7359 destination register is not st(0). The Intel assembler
7360 doesn't have this brain damage. Read !SYSV386_COMPAT to
7361 figure out what the hardware really does. */
7362 if (STACK_TOP_P (operands[0]))
7363 p = "{p\t%0, %2|rp\t%2, %0}";
7365 p = "{rp\t%2, %0|p\t%0, %2}";
7367 if (STACK_TOP_P (operands[0]))
7368 /* As above for fmul/fadd, we can't store to st(0). */
7369 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7371 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7376 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7379 if (STACK_TOP_P (operands[0]))
7380 p = "{rp\t%0, %1|p\t%1, %0}";
7382 p = "{p\t%1, %0|rp\t%0, %1}";
7384 if (STACK_TOP_P (operands[0]))
7385 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7387 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7392 if (STACK_TOP_P (operands[0]))
7394 if (STACK_TOP_P (operands[1]))
7395 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7397 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7400 else if (STACK_TOP_P (operands[1]))
7403 p = "{\t%1, %0|r\t%0, %1}";
7405 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7411 p = "{r\t%2, %0|\t%0, %2}";
7413 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7426 /* Return needed mode for entity in optimize_mode_switching pass. */
7429 ix86_mode_needed (int entity, rtx insn)
7431 enum attr_i387_cw mode;
7433 /* The mode UNINITIALIZED is used to store control word after a
7434 function call or ASM pattern. The mode ANY specify that function
7435 has no requirements on the control word and make no changes in the
7436 bits we are interested in. */
7439 || (NONJUMP_INSN_P (insn)
7440 && (asm_noperands (PATTERN (insn)) >= 0
7441 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
7442 return I387_CW_UNINITIALIZED;
7444 if (recog_memoized (insn) < 0)
7447 mode = get_attr_i387_cw (insn);
7452 if (mode == I387_CW_TRUNC)
7457 if (mode == I387_CW_FLOOR)
7462 if (mode == I387_CW_CEIL)
7467 if (mode == I387_CW_MASK_PM)
7478 /* Output code to initialize control word copies used by trunc?f?i and
7479 rounding patterns. CURRENT_MODE is set to current control word,
7480 while NEW_MODE is set to new control word. */
7483 emit_i387_cw_initialization (int mode)
7485 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
7490 rtx reg = gen_reg_rtx (HImode);
7492 emit_insn (gen_x86_fnstcw_1 (stored_mode));
7493 emit_move_insn (reg, stored_mode);
7495 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
7500 /* round toward zero (truncate) */
7501 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7502 slot = SLOT_CW_TRUNC;
7506 /* round down toward -oo */
7507 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7508 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7509 slot = SLOT_CW_FLOOR;
7513 /* round up toward +oo */
7514 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7515 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7516 slot = SLOT_CW_CEIL;
7519 case I387_CW_MASK_PM:
7520 /* mask precision exception for nearbyint() */
7521 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7522 slot = SLOT_CW_MASK_PM;
7534 /* round toward zero (truncate) */
7535 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7536 slot = SLOT_CW_TRUNC;
7540 /* round down toward -oo */
7541 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7542 slot = SLOT_CW_FLOOR;
7546 /* round up toward +oo */
7547 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7548 slot = SLOT_CW_CEIL;
7551 case I387_CW_MASK_PM:
7552 /* mask precision exception for nearbyint() */
7553 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7554 slot = SLOT_CW_MASK_PM;
7562 gcc_assert (slot < MAX_386_STACK_LOCALS);
7564 new_mode = assign_386_stack_local (HImode, slot);
7565 emit_move_insn (new_mode, reg);
7568 /* Output code for INSN to convert a float to a signed int. OPERANDS
7569 are the insn operands. The output may be [HSD]Imode and the input
7570 operand may be [SDX]Fmode. */
7573 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
7575 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7576 int dimode_p = GET_MODE (operands[0]) == DImode;
7577 int round_mode = get_attr_i387_cw (insn);
7579 /* Jump through a hoop or two for DImode, since the hardware has no
7580 non-popping instruction. We used to do this a different way, but
7581 that was somewhat fragile and broke with post-reload splitters. */
7582 if ((dimode_p || fisttp) && !stack_top_dies)
7583 output_asm_insn ("fld\t%y1", operands);
7585 gcc_assert (STACK_TOP_P (operands[1]));
7586 gcc_assert (GET_CODE (operands[0]) == MEM);
7589 output_asm_insn ("fisttp%z0\t%0", operands);
7592 if (round_mode != I387_CW_ANY)
7593 output_asm_insn ("fldcw\t%3", operands);
7594 if (stack_top_dies || dimode_p)
7595 output_asm_insn ("fistp%z0\t%0", operands);
7597 output_asm_insn ("fist%z0\t%0", operands);
7598 if (round_mode != I387_CW_ANY)
7599 output_asm_insn ("fldcw\t%2", operands);
7605 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7606 should be used. UNORDERED_P is true when fucom should be used. */
7609 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7612 rtx cmp_op0, cmp_op1;
7613 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7617 cmp_op0 = operands[0];
7618 cmp_op1 = operands[1];
7622 cmp_op0 = operands[1];
7623 cmp_op1 = operands[2];
7628 if (GET_MODE (operands[0]) == SFmode)
7630 return "ucomiss\t{%1, %0|%0, %1}";
7632 return "comiss\t{%1, %0|%0, %1}";
7635 return "ucomisd\t{%1, %0|%0, %1}";
7637 return "comisd\t{%1, %0|%0, %1}";
7640 gcc_assert (STACK_TOP_P (cmp_op0));
7642 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7644 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7648 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7649 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7652 return "ftst\n\tfnstsw\t%0";
7655 if (STACK_REG_P (cmp_op1)
7657 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7658 && REGNO (cmp_op1) != FIRST_STACK_REG)
7660 /* If both the top of the 387 stack dies, and the other operand
7661 is also a stack register that dies, then this must be a
7662 `fcompp' float compare */
7666 /* There is no double popping fcomi variant. Fortunately,
7667 eflags is immune from the fstp's cc clobbering. */
7669 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7671 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7672 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7677 return "fucompp\n\tfnstsw\t%0";
7679 return "fcompp\n\tfnstsw\t%0";
7684 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7686 static const char * const alt[16] =
7688 "fcom%z2\t%y2\n\tfnstsw\t%0",
7689 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7690 "fucom%z2\t%y2\n\tfnstsw\t%0",
7691 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7693 "ficom%z2\t%y2\n\tfnstsw\t%0",
7694 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7698 "fcomi\t{%y1, %0|%0, %y1}",
7699 "fcomip\t{%y1, %0|%0, %y1}",
7700 "fucomi\t{%y1, %0|%0, %y1}",
7701 "fucomip\t{%y1, %0|%0, %y1}",
7712 mask = eflags_p << 3;
7713 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7714 mask |= unordered_p << 1;
7715 mask |= stack_top_dies;
7717 gcc_assert (mask < 16);
7726 ix86_output_addr_vec_elt (FILE *file, int value)
7728 const char *directive = ASM_LONG;
7732 directive = ASM_QUAD;
7734 gcc_assert (!TARGET_64BIT);
7737 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7741 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7744 fprintf (file, "%s%s%d-%s%d\n",
7745 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7746 else if (HAVE_AS_GOTOFF_IN_DATA)
7747 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7749 else if (TARGET_MACHO)
7751 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7752 machopic_output_function_base_name (file);
7753 fprintf(file, "\n");
7757 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7758 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7761 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7765 ix86_expand_clear (rtx dest)
7769 /* We play register width games, which are only valid after reload. */
7770 gcc_assert (reload_completed);
7772 /* Avoid HImode and its attendant prefix byte. */
7773 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7774 dest = gen_rtx_REG (SImode, REGNO (dest));
7776 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7778 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7779 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7781 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7782 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7788 /* X is an unchanging MEM. If it is a constant pool reference, return
7789 the constant pool rtx, else NULL. */
7792 maybe_get_pool_constant (rtx x)
7794 x = ix86_delegitimize_address (XEXP (x, 0));
7796 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7797 return get_pool_constant (x);
7803 ix86_expand_move (enum machine_mode mode, rtx operands[])
7805 int strict = (reload_in_progress || reload_completed);
7807 enum tls_model model;
7812 if (GET_CODE (op1) == SYMBOL_REF)
7814 model = SYMBOL_REF_TLS_MODEL (op1);
7817 op1 = legitimize_tls_address (op1, model, true);
7818 op1 = force_operand (op1, op0);
7823 else if (GET_CODE (op1) == CONST
7824 && GET_CODE (XEXP (op1, 0)) == PLUS
7825 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7827 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7830 rtx addend = XEXP (XEXP (op1, 0), 1);
7831 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7832 op1 = force_operand (op1, NULL);
7833 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7834 op0, 1, OPTAB_DIRECT);
7840 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7845 rtx temp = ((reload_in_progress
7846 || ((op0 && GET_CODE (op0) == REG)
7848 ? op0 : gen_reg_rtx (Pmode));
7849 op1 = machopic_indirect_data_reference (op1, temp);
7850 op1 = machopic_legitimize_pic_address (op1, mode,
7851 temp == op1 ? 0 : temp);
7853 else if (MACHOPIC_INDIRECT)
7854 op1 = machopic_indirect_data_reference (op1, 0);
7858 if (GET_CODE (op0) == MEM)
7859 op1 = force_reg (Pmode, op1);
7861 op1 = legitimize_address (op1, op1, Pmode);
7862 #endif /* TARGET_MACHO */
7866 if (GET_CODE (op0) == MEM
7867 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7868 || !push_operand (op0, mode))
7869 && GET_CODE (op1) == MEM)
7870 op1 = force_reg (mode, op1);
7872 if (push_operand (op0, mode)
7873 && ! general_no_elim_operand (op1, mode))
7874 op1 = copy_to_mode_reg (mode, op1);
7876 /* Force large constants in 64bit compilation into register
7877 to get them CSEed. */
7878 if (TARGET_64BIT && mode == DImode
7879 && immediate_operand (op1, mode)
7880 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7881 && !register_operand (op0, mode)
7882 && optimize && !reload_completed && !reload_in_progress)
7883 op1 = copy_to_mode_reg (mode, op1);
7885 if (FLOAT_MODE_P (mode))
7887 /* If we are loading a floating point constant to a register,
7888 force the value to memory now, since we'll get better code
7889 out the back end. */
7893 else if (GET_CODE (op1) == CONST_DOUBLE)
7895 op1 = validize_mem (force_const_mem (mode, op1));
7896 if (!register_operand (op0, mode))
7898 rtx temp = gen_reg_rtx (mode);
7899 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7900 emit_move_insn (op0, temp);
7907 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7911 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7913 rtx op0 = operands[0], op1 = operands[1];
7915 /* Force constants other than zero into memory. We do not know how
7916 the instructions used to build constants modify the upper 64 bits
7917 of the register, once we have that information we may be able
7918 to handle some of them more efficiently. */
7919 if ((reload_in_progress | reload_completed) == 0
7920 && register_operand (op0, mode)
7921 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7922 op1 = validize_mem (force_const_mem (mode, op1));
7924 /* Make operand1 a register if it isn't already. */
7926 && !register_operand (op0, mode)
7927 && !register_operand (op1, mode))
7929 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7933 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7936 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7937 straight to ix86_expand_vector_move. */
7940 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7949 /* If we're optimizing for size, movups is the smallest. */
7952 op0 = gen_lowpart (V4SFmode, op0);
7953 op1 = gen_lowpart (V4SFmode, op1);
7954 emit_insn (gen_sse_movups (op0, op1));
7958 /* ??? If we have typed data, then it would appear that using
7959 movdqu is the only way to get unaligned data loaded with
7961 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7963 op0 = gen_lowpart (V16QImode, op0);
7964 op1 = gen_lowpart (V16QImode, op1);
7965 emit_insn (gen_sse2_movdqu (op0, op1));
7969 if (TARGET_SSE2 && mode == V2DFmode)
7973 /* When SSE registers are split into halves, we can avoid
7974 writing to the top half twice. */
7975 if (TARGET_SSE_SPLIT_REGS)
7977 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7982 /* ??? Not sure about the best option for the Intel chips.
7983 The following would seem to satisfy; the register is
7984 entirely cleared, breaking the dependency chain. We
7985 then store to the upper half, with a dependency depth
7986 of one. A rumor has it that Intel recommends two movsd
7987 followed by an unpacklpd, but this is unconfirmed. And
7988 given that the dependency depth of the unpacklpd would
7989 still be one, I'm not sure why this would be better. */
7990 zero = CONST0_RTX (V2DFmode);
7993 m = adjust_address (op1, DFmode, 0);
7994 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7995 m = adjust_address (op1, DFmode, 8);
7996 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8000 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8001 emit_move_insn (op0, CONST0_RTX (mode));
8003 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8005 if (mode != V4SFmode)
8006 op0 = gen_lowpart (V4SFmode, op0);
8007 m = adjust_address (op1, V2SFmode, 0);
8008 emit_insn (gen_sse_loadlps (op0, op0, m));
8009 m = adjust_address (op1, V2SFmode, 8);
8010 emit_insn (gen_sse_loadhps (op0, op0, m));
8013 else if (MEM_P (op0))
8015 /* If we're optimizing for size, movups is the smallest. */
8018 op0 = gen_lowpart (V4SFmode, op0);
8019 op1 = gen_lowpart (V4SFmode, op1);
8020 emit_insn (gen_sse_movups (op0, op1));
8024 /* ??? Similar to above, only less clear because of quote
8025 typeless stores unquote. */
8026 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8027 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8029 op0 = gen_lowpart (V16QImode, op0);
8030 op1 = gen_lowpart (V16QImode, op1);
8031 emit_insn (gen_sse2_movdqu (op0, op1));
8035 if (TARGET_SSE2 && mode == V2DFmode)
8037 m = adjust_address (op0, DFmode, 0);
8038 emit_insn (gen_sse2_storelpd (m, op1));
8039 m = adjust_address (op0, DFmode, 8);
8040 emit_insn (gen_sse2_storehpd (m, op1));
8044 if (mode != V4SFmode)
8045 op1 = gen_lowpart (V4SFmode, op1);
8046 m = adjust_address (op0, V2SFmode, 0);
8047 emit_insn (gen_sse_storelps (m, op1));
8048 m = adjust_address (op0, V2SFmode, 8);
8049 emit_insn (gen_sse_storehps (m, op1));
8056 /* Expand a push in MODE. This is some mode for which we do not support
8057 proper push instructions, at least from the registers that we expect
8058 the value to live in. */
8061 ix86_expand_push (enum machine_mode mode, rtx x)
8065 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8066 GEN_INT (-GET_MODE_SIZE (mode)),
8067 stack_pointer_rtx, 1, OPTAB_DIRECT);
8068 if (tmp != stack_pointer_rtx)
8069 emit_move_insn (stack_pointer_rtx, tmp);
8071 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8072 emit_move_insn (tmp, x);
8075 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8076 destination to use for the operation. If different from the true
8077 destination in operands[0], a copy operation will be required. */
8080 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8083 int matching_memory;
8084 rtx src1, src2, dst;
8090 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8091 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8092 && (rtx_equal_p (dst, src2)
8093 || immediate_operand (src1, mode)))
8100 /* If the destination is memory, and we do not have matching source
8101 operands, do things in registers. */
8102 matching_memory = 0;
8103 if (GET_CODE (dst) == MEM)
8105 if (rtx_equal_p (dst, src1))
8106 matching_memory = 1;
8107 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8108 && rtx_equal_p (dst, src2))
8109 matching_memory = 2;
8111 dst = gen_reg_rtx (mode);
8114 /* Both source operands cannot be in memory. */
8115 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8117 if (matching_memory != 2)
8118 src2 = force_reg (mode, src2);
8120 src1 = force_reg (mode, src1);
8123 /* If the operation is not commutable, source 1 cannot be a constant
8124 or non-matching memory. */
8125 if ((CONSTANT_P (src1)
8126 || (!matching_memory && GET_CODE (src1) == MEM))
8127 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8128 src1 = force_reg (mode, src1);
8130 /* If optimizing, copy to regs to improve CSE */
8131 if (optimize && ! no_new_pseudos)
8133 if (GET_CODE (dst) == MEM)
8134 dst = gen_reg_rtx (mode);
8135 if (GET_CODE (src1) == MEM)
8136 src1 = force_reg (mode, src1);
8137 if (GET_CODE (src2) == MEM)
8138 src2 = force_reg (mode, src2);
8141 src1 = operands[1] = src1;
8142 src2 = operands[2] = src2;
8146 /* Similarly, but assume that the destination has already been
8150 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8151 enum machine_mode mode, rtx operands[])
8153 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8154 gcc_assert (dst == operands[0]);
8157 /* Attempt to expand a binary operator. Make the expansion closer to the
8158 actual machine, then just general_operand, which will allow 3 separate
8159 memory references (one output, two input) in a single insn. */
8162 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8165 rtx src1, src2, dst, op, clob;
8167 dst = ix86_fixup_binary_operands (code, mode, operands);
8171 /* Emit the instruction. */
8173 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8174 if (reload_in_progress)
8176 /* Reload doesn't know about the flags register, and doesn't know that
8177 it doesn't want to clobber it. We can only do this with PLUS. */
8178 gcc_assert (code == PLUS);
8183 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8184 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8187 /* Fix up the destination if needed. */
8188 if (dst != operands[0])
8189 emit_move_insn (operands[0], dst);
8192 /* Return TRUE or FALSE depending on whether the binary operator meets the
8193 appropriate constraints. */
8196 ix86_binary_operator_ok (enum rtx_code code,
8197 enum machine_mode mode ATTRIBUTE_UNUSED,
8200 /* Both source operands cannot be in memory. */
8201 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8203 /* If the operation is not commutable, source 1 cannot be a constant. */
8204 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8206 /* If the destination is memory, we must have a matching source operand. */
8207 if (GET_CODE (operands[0]) == MEM
8208 && ! (rtx_equal_p (operands[0], operands[1])
8209 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8210 && rtx_equal_p (operands[0], operands[2]))))
8212 /* If the operation is not commutable and the source 1 is memory, we must
8213 have a matching destination. */
8214 if (GET_CODE (operands[1]) == MEM
8215 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8216 && ! rtx_equal_p (operands[0], operands[1]))
8221 /* Attempt to expand a unary operator. Make the expansion closer to the
8222 actual machine, then just general_operand, which will allow 2 separate
8223 memory references (one output, one input) in a single insn. */
8226 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8229 int matching_memory;
8230 rtx src, dst, op, clob;
8235 /* If the destination is memory, and we do not have matching source
8236 operands, do things in registers. */
8237 matching_memory = 0;
8240 if (rtx_equal_p (dst, src))
8241 matching_memory = 1;
8243 dst = gen_reg_rtx (mode);
8246 /* When source operand is memory, destination must match. */
8247 if (MEM_P (src) && !matching_memory)
8248 src = force_reg (mode, src);
8250 /* If optimizing, copy to regs to improve CSE. */
8251 if (optimize && ! no_new_pseudos)
8253 if (GET_CODE (dst) == MEM)
8254 dst = gen_reg_rtx (mode);
8255 if (GET_CODE (src) == MEM)
8256 src = force_reg (mode, src);
8259 /* Emit the instruction. */
8261 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8262 if (reload_in_progress || code == NOT)
8264 /* Reload doesn't know about the flags register, and doesn't know that
8265 it doesn't want to clobber it. */
8266 gcc_assert (code == NOT);
8271 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8272 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8275 /* Fix up the destination if needed. */
8276 if (dst != operands[0])
8277 emit_move_insn (operands[0], dst);
8280 /* Return TRUE or FALSE depending on whether the unary operator meets the
8281 appropriate constraints. */
8284 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8285 enum machine_mode mode ATTRIBUTE_UNUSED,
8286 rtx operands[2] ATTRIBUTE_UNUSED)
8288 /* If one of operands is memory, source and destination must match. */
8289 if ((GET_CODE (operands[0]) == MEM
8290 || GET_CODE (operands[1]) == MEM)
8291 && ! rtx_equal_p (operands[0], operands[1]))
8296 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8297 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8298 true, then replicate the mask for all elements of the vector register.
8299 If INVERT is true, then create a mask excluding the sign bit. */
8302 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8304 enum machine_mode vec_mode;
8305 HOST_WIDE_INT hi, lo;
8310 /* Find the sign bit, sign extended to 2*HWI. */
8312 lo = 0x80000000, hi = lo < 0;
8313 else if (HOST_BITS_PER_WIDE_INT >= 64)
8314 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8316 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8321 /* Force this value into the low part of a fp vector constant. */
8322 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8323 mask = gen_lowpart (mode, mask);
8328 v = gen_rtvec (4, mask, mask, mask, mask);
8330 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8331 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8332 vec_mode = V4SFmode;
8337 v = gen_rtvec (2, mask, mask);
8339 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8340 vec_mode = V2DFmode;
8343 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8346 /* Generate code for floating point ABS or NEG. */
8349 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8352 rtx mask, set, use, clob, dst, src;
8353 bool matching_memory;
8354 bool use_sse = false;
8355 bool vector_mode = VECTOR_MODE_P (mode);
8356 enum machine_mode elt_mode = mode;
8360 elt_mode = GET_MODE_INNER (mode);
8363 else if (TARGET_SSE_MATH)
8364 use_sse = SSE_FLOAT_MODE_P (mode);
8366 /* NEG and ABS performed with SSE use bitwise mask operations.
8367 Create the appropriate mask now. */
8369 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8372 /* When not using SSE, we don't use the mask, but prefer to keep the
8373 same general form of the insn pattern to reduce duplication when
8374 it comes time to split. */
8381 /* If the destination is memory, and we don't have matching source
8382 operands, do things in registers. */
8383 matching_memory = false;
8386 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8387 matching_memory = true;
8389 dst = gen_reg_rtx (mode);
8391 if (MEM_P (src) && !matching_memory)
8392 src = force_reg (mode, src);
8396 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8397 set = gen_rtx_SET (VOIDmode, dst, set);
8402 set = gen_rtx_fmt_e (code, mode, src);
8403 set = gen_rtx_SET (VOIDmode, dst, set);
8404 use = gen_rtx_USE (VOIDmode, mask);
8405 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8406 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8409 if (dst != operands[0])
8410 emit_move_insn (operands[0], dst);
8413 /* Expand a copysign operation. Special case operand 0 being a constant. */
8416 ix86_expand_copysign (rtx operands[])
8418 enum machine_mode mode, vmode;
8419 rtx dest, op0, op1, mask, nmask;
8425 mode = GET_MODE (dest);
8426 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8428 if (GET_CODE (op0) == CONST_DOUBLE)
8432 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8433 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8435 if (op0 == CONST0_RTX (mode))
8436 op0 = CONST0_RTX (vmode);
8440 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8441 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8443 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8444 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8447 mask = ix86_build_signbit_mask (mode, 0, 0);
8450 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8452 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8456 nmask = ix86_build_signbit_mask (mode, 0, 1);
8457 mask = ix86_build_signbit_mask (mode, 0, 0);
8460 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8462 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8466 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8467 be a constant, and so has already been expanded into a vector constant. */
8470 ix86_split_copysign_const (rtx operands[])
8472 enum machine_mode mode, vmode;
8473 rtx dest, op0, op1, mask, x;
8480 mode = GET_MODE (dest);
8481 vmode = GET_MODE (mask);
8483 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8484 x = gen_rtx_AND (vmode, dest, mask);
8485 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8487 if (op0 != CONST0_RTX (vmode))
8489 x = gen_rtx_IOR (vmode, dest, op0);
8490 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8494 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8495 so we have to do two masks. */
8498 ix86_split_copysign_var (rtx operands[])
8500 enum machine_mode mode, vmode;
8501 rtx dest, scratch, op0, op1, mask, nmask, x;
8504 scratch = operands[1];
8507 nmask = operands[4];
8510 mode = GET_MODE (dest);
8511 vmode = GET_MODE (mask);
8513 if (rtx_equal_p (op0, op1))
8515 /* Shouldn't happen often (it's useless, obviously), but when it does
8516 we'd generate incorrect code if we continue below. */
8517 emit_move_insn (dest, op0);
8521 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8523 gcc_assert (REGNO (op1) == REGNO (scratch));
8525 x = gen_rtx_AND (vmode, scratch, mask);
8526 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8529 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8530 x = gen_rtx_NOT (vmode, dest);
8531 x = gen_rtx_AND (vmode, x, op0);
8532 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8536 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8538 x = gen_rtx_AND (vmode, scratch, mask);
8540 else /* alternative 2,4 */
8542 gcc_assert (REGNO (mask) == REGNO (scratch));
8543 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8544 x = gen_rtx_AND (vmode, scratch, op1);
8546 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8548 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8550 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8551 x = gen_rtx_AND (vmode, dest, nmask);
8553 else /* alternative 3,4 */
8555 gcc_assert (REGNO (nmask) == REGNO (dest));
8557 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8558 x = gen_rtx_AND (vmode, dest, op0);
8560 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8563 x = gen_rtx_IOR (vmode, dest, scratch);
8564 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8567 /* Return TRUE or FALSE depending on whether the first SET in INSN
8568 has source and destination with matching CC modes, and that the
8569 CC mode is at least as constrained as REQ_MODE. */
8572 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8575 enum machine_mode set_mode;
8577 set = PATTERN (insn);
8578 if (GET_CODE (set) == PARALLEL)
8579 set = XVECEXP (set, 0, 0);
8580 gcc_assert (GET_CODE (set) == SET);
8581 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
8583 set_mode = GET_MODE (SET_DEST (set));
8587 if (req_mode != CCNOmode
8588 && (req_mode != CCmode
8589 || XEXP (SET_SRC (set), 1) != const0_rtx))
8593 if (req_mode == CCGCmode)
8597 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8601 if (req_mode == CCZmode)
8611 return (GET_MODE (SET_SRC (set)) == set_mode);
8614 /* Generate insn patterns to do an integer compare of OPERANDS. */
8617 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8619 enum machine_mode cmpmode;
8622 cmpmode = SELECT_CC_MODE (code, op0, op1);
8623 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8625 /* This is very simple, but making the interface the same as in the
8626 FP case makes the rest of the code easier. */
8627 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8628 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8630 /* Return the test that should be put into the flags user, i.e.
8631 the bcc, scc, or cmov instruction. */
8632 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8635 /* Figure out whether to use ordered or unordered fp comparisons.
8636 Return the appropriate mode to use. */
8639 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8641 /* ??? In order to make all comparisons reversible, we do all comparisons
8642 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8643 all forms trapping and nontrapping comparisons, we can make inequality
8644 comparisons trapping again, since it results in better code when using
8645 FCOM based compares. */
8646 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8650 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8652 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8653 return ix86_fp_compare_mode (code);
8656 /* Only zero flag is needed. */
8658 case NE: /* ZF!=0 */
8660 /* Codes needing carry flag. */
8661 case GEU: /* CF=0 */
8662 case GTU: /* CF=0 & ZF=0 */
8663 case LTU: /* CF=1 */
8664 case LEU: /* CF=1 | ZF=1 */
8666 /* Codes possibly doable only with sign flag when
8667 comparing against zero. */
8668 case GE: /* SF=OF or SF=0 */
8669 case LT: /* SF<>OF or SF=1 */
8670 if (op1 == const0_rtx)
8673 /* For other cases Carry flag is not required. */
8675 /* Codes doable only with sign flag when comparing
8676 against zero, but we miss jump instruction for it
8677 so we need to use relational tests against overflow
8678 that thus needs to be zero. */
8679 case GT: /* ZF=0 & SF=OF */
8680 case LE: /* ZF=1 | SF<>OF */
8681 if (op1 == const0_rtx)
8685 /* strcmp pattern do (use flags) and combine may ask us for proper
8694 /* Return the fixed registers used for condition codes. */
8697 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8704 /* If two condition code modes are compatible, return a condition code
8705 mode which is compatible with both. Otherwise, return
8708 static enum machine_mode
8709 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8714 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8717 if ((m1 == CCGCmode && m2 == CCGOCmode)
8718 || (m1 == CCGOCmode && m2 == CCGCmode))
8746 /* These are only compatible with themselves, which we already
8752 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8755 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8757 enum rtx_code swapped_code = swap_condition (code);
8758 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8759 || (ix86_fp_comparison_cost (swapped_code)
8760 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8763 /* Swap, force into registers, or otherwise massage the two operands
8764 to a fp comparison. The operands are updated in place; the new
8765 comparison code is returned. */
8767 static enum rtx_code
8768 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8770 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8771 rtx op0 = *pop0, op1 = *pop1;
8772 enum machine_mode op_mode = GET_MODE (op0);
8773 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8775 /* All of the unordered compare instructions only work on registers.
8776 The same is true of the fcomi compare instructions. The same is
8777 true of the XFmode compare instructions if not comparing with
8778 zero (ftst insn is used in this case). */
8781 && (fpcmp_mode == CCFPUmode
8782 || (op_mode == XFmode
8783 && ! (standard_80387_constant_p (op0) == 1
8784 || standard_80387_constant_p (op1) == 1))
8785 || ix86_use_fcomi_compare (code)))
8787 op0 = force_reg (op_mode, op0);
8788 op1 = force_reg (op_mode, op1);
8792 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8793 things around if they appear profitable, otherwise force op0
8796 if (standard_80387_constant_p (op0) == 0
8797 || (GET_CODE (op0) == MEM
8798 && ! (standard_80387_constant_p (op1) == 0
8799 || GET_CODE (op1) == MEM)))
8802 tmp = op0, op0 = op1, op1 = tmp;
8803 code = swap_condition (code);
8806 if (GET_CODE (op0) != REG)
8807 op0 = force_reg (op_mode, op0);
8809 if (CONSTANT_P (op1))
8811 int tmp = standard_80387_constant_p (op1);
8813 op1 = validize_mem (force_const_mem (op_mode, op1));
8817 op1 = force_reg (op_mode, op1);
8820 op1 = force_reg (op_mode, op1);
8824 /* Try to rearrange the comparison to make it cheaper. */
8825 if (ix86_fp_comparison_cost (code)
8826 > ix86_fp_comparison_cost (swap_condition (code))
8827 && (GET_CODE (op1) == REG || !no_new_pseudos))
8830 tmp = op0, op0 = op1, op1 = tmp;
8831 code = swap_condition (code);
8832 if (GET_CODE (op0) != REG)
8833 op0 = force_reg (op_mode, op0);
8841 /* Convert comparison codes we use to represent FP comparison to integer
8842 code that will result in proper branch. Return UNKNOWN if no such code
8846 ix86_fp_compare_code_to_integer (enum rtx_code code)
8875 /* Split comparison code CODE into comparisons we can do using branch
8876 instructions. BYPASS_CODE is comparison code for branch that will
8877 branch around FIRST_CODE and SECOND_CODE. If some of branches
8878 is not required, set value to UNKNOWN.
8879 We never require more than two branches. */
8882 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8883 enum rtx_code *first_code,
8884 enum rtx_code *second_code)
8887 *bypass_code = UNKNOWN;
8888 *second_code = UNKNOWN;
8890 /* The fcomi comparison sets flags as follows:
8900 case GT: /* GTU - CF=0 & ZF=0 */
8901 case GE: /* GEU - CF=0 */
8902 case ORDERED: /* PF=0 */
8903 case UNORDERED: /* PF=1 */
8904 case UNEQ: /* EQ - ZF=1 */
8905 case UNLT: /* LTU - CF=1 */
8906 case UNLE: /* LEU - CF=1 | ZF=1 */
8907 case LTGT: /* EQ - ZF=0 */
8909 case LT: /* LTU - CF=1 - fails on unordered */
8911 *bypass_code = UNORDERED;
8913 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8915 *bypass_code = UNORDERED;
8917 case EQ: /* EQ - ZF=1 - fails on unordered */
8919 *bypass_code = UNORDERED;
8921 case NE: /* NE - ZF=0 - fails on unordered */
8923 *second_code = UNORDERED;
8925 case UNGE: /* GEU - CF=0 - fails on unordered */
8927 *second_code = UNORDERED;
8929 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8931 *second_code = UNORDERED;
8936 if (!TARGET_IEEE_FP)
8938 *second_code = UNKNOWN;
8939 *bypass_code = UNKNOWN;
8943 /* Return cost of comparison done fcom + arithmetics operations on AX.
8944 All following functions do use number of instructions as a cost metrics.
8945 In future this should be tweaked to compute bytes for optimize_size and
8946 take into account performance of various instructions on various CPUs. */
8948 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8950 if (!TARGET_IEEE_FP)
8952 /* The cost of code output by ix86_expand_fp_compare. */
8980 /* Return cost of comparison done using fcomi operation.
8981 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8983 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8985 enum rtx_code bypass_code, first_code, second_code;
8986 /* Return arbitrarily high cost when instruction is not supported - this
8987 prevents gcc from using it. */
8990 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8991 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8994 /* Return cost of comparison done using sahf operation.
8995 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8997 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8999 enum rtx_code bypass_code, first_code, second_code;
9000 /* Return arbitrarily high cost when instruction is not preferred - this
9001 avoids gcc from using it. */
9002 if (!TARGET_USE_SAHF && !optimize_size)
9004 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9005 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9008 /* Compute cost of the comparison done using any method.
9009 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9011 ix86_fp_comparison_cost (enum rtx_code code)
9013 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9016 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9017 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9019 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9020 if (min > sahf_cost)
9022 if (min > fcomi_cost)
9027 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9030 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9031 rtx *second_test, rtx *bypass_test)
9033 enum machine_mode fpcmp_mode, intcmp_mode;
9035 int cost = ix86_fp_comparison_cost (code);
9036 enum rtx_code bypass_code, first_code, second_code;
9038 fpcmp_mode = ix86_fp_compare_mode (code);
9039 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9042 *second_test = NULL_RTX;
9044 *bypass_test = NULL_RTX;
9046 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9048 /* Do fcomi/sahf based test when profitable. */
9049 if ((bypass_code == UNKNOWN || bypass_test)
9050 && (second_code == UNKNOWN || second_test)
9051 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9055 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9056 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9062 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9063 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9065 scratch = gen_reg_rtx (HImode);
9066 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9067 emit_insn (gen_x86_sahf_1 (scratch));
9070 /* The FP codes work out to act like unsigned. */
9071 intcmp_mode = fpcmp_mode;
9073 if (bypass_code != UNKNOWN)
9074 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9075 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9077 if (second_code != UNKNOWN)
9078 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9079 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9084 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9085 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9086 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9088 scratch = gen_reg_rtx (HImode);
9089 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9091 /* In the unordered case, we have to check C2 for NaN's, which
9092 doesn't happen to work out to anything nice combination-wise.
9093 So do some bit twiddling on the value we've got in AH to come
9094 up with an appropriate set of condition codes. */
9096 intcmp_mode = CCNOmode;
9101 if (code == GT || !TARGET_IEEE_FP)
9103 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9108 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9109 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9110 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9111 intcmp_mode = CCmode;
9117 if (code == LT && TARGET_IEEE_FP)
9119 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9120 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9121 intcmp_mode = CCmode;
9126 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9132 if (code == GE || !TARGET_IEEE_FP)
9134 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9139 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9140 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9147 if (code == LE && TARGET_IEEE_FP)
9149 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9150 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9151 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9152 intcmp_mode = CCmode;
9157 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9163 if (code == EQ && TARGET_IEEE_FP)
9165 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9166 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9167 intcmp_mode = CCmode;
9172 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9179 if (code == NE && TARGET_IEEE_FP)
9181 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9182 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9188 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9194 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9198 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9207 /* Return the test that should be put into the flags user, i.e.
9208 the bcc, scc, or cmov instruction. */
9209 return gen_rtx_fmt_ee (code, VOIDmode,
9210 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9215 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9218 op0 = ix86_compare_op0;
9219 op1 = ix86_compare_op1;
9222 *second_test = NULL_RTX;
9224 *bypass_test = NULL_RTX;
9226 if (ix86_compare_emitted)
9228 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
9229 ix86_compare_emitted = NULL_RTX;
9231 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9232 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9233 second_test, bypass_test);
9235 ret = ix86_expand_int_compare (code, op0, op1);
9240 /* Return true if the CODE will result in nontrivial jump sequence. */
9242 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9244 enum rtx_code bypass_code, first_code, second_code;
9247 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9248 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9252 ix86_expand_branch (enum rtx_code code, rtx label)
9256 switch (GET_MODE (ix86_compare_op0))
9262 tmp = ix86_expand_compare (code, NULL, NULL);
9263 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9264 gen_rtx_LABEL_REF (VOIDmode, label),
9266 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9275 enum rtx_code bypass_code, first_code, second_code;
9277 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9280 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9282 /* Check whether we will use the natural sequence with one jump. If
9283 so, we can expand jump early. Otherwise delay expansion by
9284 creating compound insn to not confuse optimizers. */
9285 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9288 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9289 gen_rtx_LABEL_REF (VOIDmode, label),
9290 pc_rtx, NULL_RTX, NULL_RTX);
9294 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9295 ix86_compare_op0, ix86_compare_op1);
9296 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9297 gen_rtx_LABEL_REF (VOIDmode, label),
9299 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9301 use_fcomi = ix86_use_fcomi_compare (code);
9302 vec = rtvec_alloc (3 + !use_fcomi);
9303 RTVEC_ELT (vec, 0) = tmp;
9305 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9307 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9310 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9312 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9320 /* Expand DImode branch into multiple compare+branch. */
9322 rtx lo[2], hi[2], label2;
9323 enum rtx_code code1, code2, code3;
9325 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9327 tmp = ix86_compare_op0;
9328 ix86_compare_op0 = ix86_compare_op1;
9329 ix86_compare_op1 = tmp;
9330 code = swap_condition (code);
9332 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9333 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9335 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9336 avoid two branches. This costs one extra insn, so disable when
9337 optimizing for size. */
9339 if ((code == EQ || code == NE)
9341 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9346 if (hi[1] != const0_rtx)
9347 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9348 NULL_RTX, 0, OPTAB_WIDEN);
9351 if (lo[1] != const0_rtx)
9352 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9353 NULL_RTX, 0, OPTAB_WIDEN);
9355 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9356 NULL_RTX, 0, OPTAB_WIDEN);
9358 ix86_compare_op0 = tmp;
9359 ix86_compare_op1 = const0_rtx;
9360 ix86_expand_branch (code, label);
9364 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9365 op1 is a constant and the low word is zero, then we can just
9366 examine the high word. */
9368 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9371 case LT: case LTU: case GE: case GEU:
9372 ix86_compare_op0 = hi[0];
9373 ix86_compare_op1 = hi[1];
9374 ix86_expand_branch (code, label);
9380 /* Otherwise, we need two or three jumps. */
9382 label2 = gen_label_rtx ();
9385 code2 = swap_condition (code);
9386 code3 = unsigned_condition (code);
9390 case LT: case GT: case LTU: case GTU:
9393 case LE: code1 = LT; code2 = GT; break;
9394 case GE: code1 = GT; code2 = LT; break;
9395 case LEU: code1 = LTU; code2 = GTU; break;
9396 case GEU: code1 = GTU; code2 = LTU; break;
9398 case EQ: code1 = UNKNOWN; code2 = NE; break;
9399 case NE: code2 = UNKNOWN; break;
9407 * if (hi(a) < hi(b)) goto true;
9408 * if (hi(a) > hi(b)) goto false;
9409 * if (lo(a) < lo(b)) goto true;
9413 ix86_compare_op0 = hi[0];
9414 ix86_compare_op1 = hi[1];
9416 if (code1 != UNKNOWN)
9417 ix86_expand_branch (code1, label);
9418 if (code2 != UNKNOWN)
9419 ix86_expand_branch (code2, label2);
9421 ix86_compare_op0 = lo[0];
9422 ix86_compare_op1 = lo[1];
9423 ix86_expand_branch (code3, label);
9425 if (code2 != UNKNOWN)
9426 emit_label (label2);
9435 /* Split branch based on floating point condition. */
9437 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9438 rtx target1, rtx target2, rtx tmp, rtx pushed)
9441 rtx label = NULL_RTX;
9443 int bypass_probability = -1, second_probability = -1, probability = -1;
9446 if (target2 != pc_rtx)
9449 code = reverse_condition_maybe_unordered (code);
9454 condition = ix86_expand_fp_compare (code, op1, op2,
9455 tmp, &second, &bypass);
9457 /* Remove pushed operand from stack. */
9459 ix86_free_from_memory (GET_MODE (pushed));
9461 if (split_branch_probability >= 0)
9463 /* Distribute the probabilities across the jumps.
9464 Assume the BYPASS and SECOND to be always test
9466 probability = split_branch_probability;
9468 /* Value of 1 is low enough to make no need for probability
9469 to be updated. Later we may run some experiments and see
9470 if unordered values are more frequent in practice. */
9472 bypass_probability = 1;
9474 second_probability = 1;
9476 if (bypass != NULL_RTX)
9478 label = gen_label_rtx ();
9479 i = emit_jump_insn (gen_rtx_SET
9481 gen_rtx_IF_THEN_ELSE (VOIDmode,
9483 gen_rtx_LABEL_REF (VOIDmode,
9486 if (bypass_probability >= 0)
9488 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9489 GEN_INT (bypass_probability),
9492 i = emit_jump_insn (gen_rtx_SET
9494 gen_rtx_IF_THEN_ELSE (VOIDmode,
9495 condition, target1, target2)));
9496 if (probability >= 0)
9498 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9499 GEN_INT (probability),
9501 if (second != NULL_RTX)
9503 i = emit_jump_insn (gen_rtx_SET
9505 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9507 if (second_probability >= 0)
9509 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9510 GEN_INT (second_probability),
9513 if (label != NULL_RTX)
9518 ix86_expand_setcc (enum rtx_code code, rtx dest)
9520 rtx ret, tmp, tmpreg, equiv;
9521 rtx second_test, bypass_test;
9523 if (GET_MODE (ix86_compare_op0) == DImode
9525 return 0; /* FAIL */
9527 gcc_assert (GET_MODE (dest) == QImode);
9529 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9530 PUT_MODE (ret, QImode);
9535 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9536 if (bypass_test || second_test)
9538 rtx test = second_test;
9540 rtx tmp2 = gen_reg_rtx (QImode);
9543 gcc_assert (!second_test);
9546 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9548 PUT_MODE (test, QImode);
9549 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9552 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9554 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9557 /* Attach a REG_EQUAL note describing the comparison result. */
9558 if (ix86_compare_op0 && ix86_compare_op1)
9560 equiv = simplify_gen_relational (code, QImode,
9561 GET_MODE (ix86_compare_op0),
9562 ix86_compare_op0, ix86_compare_op1);
9563 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9566 return 1; /* DONE */
9569 /* Expand comparison setting or clearing carry flag. Return true when
9570 successful and set pop for the operation. */
9572 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9574 enum machine_mode mode =
9575 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9577 /* Do not handle DImode compares that go trought special path. Also we can't
9578 deal with FP compares yet. This is possible to add. */
9579 if ((mode == DImode && !TARGET_64BIT))
9581 if (FLOAT_MODE_P (mode))
9583 rtx second_test = NULL, bypass_test = NULL;
9584 rtx compare_op, compare_seq;
9586 /* Shortcut: following common codes never translate into carry flag compares. */
9587 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9588 || code == ORDERED || code == UNORDERED)
9591 /* These comparisons require zero flag; swap operands so they won't. */
9592 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9598 code = swap_condition (code);
9601 /* Try to expand the comparison and verify that we end up with carry flag
9602 based comparison. This is fails to be true only when we decide to expand
9603 comparison using arithmetic that is not too common scenario. */
9605 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9606 &second_test, &bypass_test);
9607 compare_seq = get_insns ();
9610 if (second_test || bypass_test)
9612 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9613 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9614 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9616 code = GET_CODE (compare_op);
9617 if (code != LTU && code != GEU)
9619 emit_insn (compare_seq);
9623 if (!INTEGRAL_MODE_P (mode))
9631 /* Convert a==0 into (unsigned)a<1. */
9634 if (op1 != const0_rtx)
9637 code = (code == EQ ? LTU : GEU);
9640 /* Convert a>b into b<a or a>=b-1. */
9643 if (GET_CODE (op1) == CONST_INT)
9645 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9646 /* Bail out on overflow. We still can swap operands but that
9647 would force loading of the constant into register. */
9648 if (op1 == const0_rtx
9649 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9651 code = (code == GTU ? GEU : LTU);
9658 code = (code == GTU ? LTU : GEU);
9662 /* Convert a>=0 into (unsigned)a<0x80000000. */
9665 if (mode == DImode || op1 != const0_rtx)
9667 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9668 code = (code == LT ? GEU : LTU);
9672 if (mode == DImode || op1 != constm1_rtx)
9674 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9675 code = (code == LE ? GEU : LTU);
9681 /* Swapping operands may cause constant to appear as first operand. */
9682 if (!nonimmediate_operand (op0, VOIDmode))
9686 op0 = force_reg (mode, op0);
9688 ix86_compare_op0 = op0;
9689 ix86_compare_op1 = op1;
9690 *pop = ix86_expand_compare (code, NULL, NULL);
9691 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
9696 ix86_expand_int_movcc (rtx operands[])
9698 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9699 rtx compare_seq, compare_op;
9700 rtx second_test, bypass_test;
9701 enum machine_mode mode = GET_MODE (operands[0]);
9702 bool sign_bit_compare_p = false;;
9705 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9706 compare_seq = get_insns ();
9709 compare_code = GET_CODE (compare_op);
9711 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9712 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9713 sign_bit_compare_p = true;
9715 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9716 HImode insns, we'd be swallowed in word prefix ops. */
9718 if ((mode != HImode || TARGET_FAST_PREFIX)
9719 && (mode != DImode || TARGET_64BIT)
9720 && GET_CODE (operands[2]) == CONST_INT
9721 && GET_CODE (operands[3]) == CONST_INT)
9723 rtx out = operands[0];
9724 HOST_WIDE_INT ct = INTVAL (operands[2]);
9725 HOST_WIDE_INT cf = INTVAL (operands[3]);
9729 /* Sign bit compares are better done using shifts than we do by using
9731 if (sign_bit_compare_p
9732 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9733 ix86_compare_op1, &compare_op))
9735 /* Detect overlap between destination and compare sources. */
9738 if (!sign_bit_compare_p)
9742 compare_code = GET_CODE (compare_op);
9744 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9745 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9748 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9751 /* To simplify rest of code, restrict to the GEU case. */
9752 if (compare_code == LTU)
9754 HOST_WIDE_INT tmp = ct;
9757 compare_code = reverse_condition (compare_code);
9758 code = reverse_condition (code);
9763 PUT_CODE (compare_op,
9764 reverse_condition_maybe_unordered
9765 (GET_CODE (compare_op)));
9767 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9771 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9772 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9773 tmp = gen_reg_rtx (mode);
9776 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9778 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9782 if (code == GT || code == GE)
9783 code = reverse_condition (code);
9786 HOST_WIDE_INT tmp = ct;
9791 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9792 ix86_compare_op1, VOIDmode, 0, -1);
9805 tmp = expand_simple_binop (mode, PLUS,
9807 copy_rtx (tmp), 1, OPTAB_DIRECT);
9818 tmp = expand_simple_binop (mode, IOR,
9820 copy_rtx (tmp), 1, OPTAB_DIRECT);
9822 else if (diff == -1 && ct)
9832 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9834 tmp = expand_simple_binop (mode, PLUS,
9835 copy_rtx (tmp), GEN_INT (cf),
9836 copy_rtx (tmp), 1, OPTAB_DIRECT);
9844 * andl cf - ct, dest
9854 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9857 tmp = expand_simple_binop (mode, AND,
9859 gen_int_mode (cf - ct, mode),
9860 copy_rtx (tmp), 1, OPTAB_DIRECT);
9862 tmp = expand_simple_binop (mode, PLUS,
9863 copy_rtx (tmp), GEN_INT (ct),
9864 copy_rtx (tmp), 1, OPTAB_DIRECT);
9867 if (!rtx_equal_p (tmp, out))
9868 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9870 return 1; /* DONE */
9876 tmp = ct, ct = cf, cf = tmp;
9878 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9880 /* We may be reversing unordered compare to normal compare, that
9881 is not valid in general (we may convert non-trapping condition
9882 to trapping one), however on i386 we currently emit all
9883 comparisons unordered. */
9884 compare_code = reverse_condition_maybe_unordered (compare_code);
9885 code = reverse_condition_maybe_unordered (code);
9889 compare_code = reverse_condition (compare_code);
9890 code = reverse_condition (code);
9894 compare_code = UNKNOWN;
9895 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9896 && GET_CODE (ix86_compare_op1) == CONST_INT)
9898 if (ix86_compare_op1 == const0_rtx
9899 && (code == LT || code == GE))
9900 compare_code = code;
9901 else if (ix86_compare_op1 == constm1_rtx)
9905 else if (code == GT)
9910 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9911 if (compare_code != UNKNOWN
9912 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9913 && (cf == -1 || ct == -1))
9915 /* If lea code below could be used, only optimize
9916 if it results in a 2 insn sequence. */
9918 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9919 || diff == 3 || diff == 5 || diff == 9)
9920 || (compare_code == LT && ct == -1)
9921 || (compare_code == GE && cf == -1))
9924 * notl op1 (if necessary)
9932 code = reverse_condition (code);
9935 out = emit_store_flag (out, code, ix86_compare_op0,
9936 ix86_compare_op1, VOIDmode, 0, -1);
9938 out = expand_simple_binop (mode, IOR,
9940 out, 1, OPTAB_DIRECT);
9941 if (out != operands[0])
9942 emit_move_insn (operands[0], out);
9944 return 1; /* DONE */
9949 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9950 || diff == 3 || diff == 5 || diff == 9)
9951 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9953 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9959 * lea cf(dest*(ct-cf)),dest
9963 * This also catches the degenerate setcc-only case.
9969 out = emit_store_flag (out, code, ix86_compare_op0,
9970 ix86_compare_op1, VOIDmode, 0, 1);
9973 /* On x86_64 the lea instruction operates on Pmode, so we need
9974 to get arithmetics done in proper mode to match. */
9976 tmp = copy_rtx (out);
9980 out1 = copy_rtx (out);
9981 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9985 tmp = gen_rtx_PLUS (mode, tmp, out1);
9991 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9994 if (!rtx_equal_p (tmp, out))
9997 out = force_operand (tmp, copy_rtx (out));
9999 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10001 if (!rtx_equal_p (out, operands[0]))
10002 emit_move_insn (operands[0], copy_rtx (out));
10004 return 1; /* DONE */
10008 * General case: Jumpful:
10009 * xorl dest,dest cmpl op1, op2
10010 * cmpl op1, op2 movl ct, dest
10011 * setcc dest jcc 1f
10012 * decl dest movl cf, dest
10013 * andl (cf-ct),dest 1:
10016 * Size 20. Size 14.
10018 * This is reasonably steep, but branch mispredict costs are
10019 * high on modern cpus, so consider failing only if optimizing
10023 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10024 && BRANCH_COST >= 2)
10030 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10031 /* We may be reversing unordered compare to normal compare,
10032 that is not valid in general (we may convert non-trapping
10033 condition to trapping one), however on i386 we currently
10034 emit all comparisons unordered. */
10035 code = reverse_condition_maybe_unordered (code);
10038 code = reverse_condition (code);
10039 if (compare_code != UNKNOWN)
10040 compare_code = reverse_condition (compare_code);
10044 if (compare_code != UNKNOWN)
10046 /* notl op1 (if needed)
10051 For x < 0 (resp. x <= -1) there will be no notl,
10052 so if possible swap the constants to get rid of the
10054 True/false will be -1/0 while code below (store flag
10055 followed by decrement) is 0/-1, so the constants need
10056 to be exchanged once more. */
10058 if (compare_code == GE || !cf)
10060 code = reverse_condition (code);
10065 HOST_WIDE_INT tmp = cf;
10070 out = emit_store_flag (out, code, ix86_compare_op0,
10071 ix86_compare_op1, VOIDmode, 0, -1);
10075 out = emit_store_flag (out, code, ix86_compare_op0,
10076 ix86_compare_op1, VOIDmode, 0, 1);
10078 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10079 copy_rtx (out), 1, OPTAB_DIRECT);
10082 out = expand_simple_binop (mode, AND, copy_rtx (out),
10083 gen_int_mode (cf - ct, mode),
10084 copy_rtx (out), 1, OPTAB_DIRECT);
10086 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10087 copy_rtx (out), 1, OPTAB_DIRECT);
10088 if (!rtx_equal_p (out, operands[0]))
10089 emit_move_insn (operands[0], copy_rtx (out));
10091 return 1; /* DONE */
10095 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10097 /* Try a few things more with specific constants and a variable. */
10100 rtx var, orig_out, out, tmp;
10102 if (BRANCH_COST <= 2)
10103 return 0; /* FAIL */
10105 /* If one of the two operands is an interesting constant, load a
10106 constant with the above and mask it in with a logical operation. */
10108 if (GET_CODE (operands[2]) == CONST_INT)
10111 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10112 operands[3] = constm1_rtx, op = and_optab;
10113 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10114 operands[3] = const0_rtx, op = ior_optab;
10116 return 0; /* FAIL */
10118 else if (GET_CODE (operands[3]) == CONST_INT)
10121 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10122 operands[2] = constm1_rtx, op = and_optab;
10123 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10124 operands[2] = const0_rtx, op = ior_optab;
10126 return 0; /* FAIL */
10129 return 0; /* FAIL */
10131 orig_out = operands[0];
10132 tmp = gen_reg_rtx (mode);
10135 /* Recurse to get the constant loaded. */
10136 if (ix86_expand_int_movcc (operands) == 0)
10137 return 0; /* FAIL */
10139 /* Mask in the interesting variable. */
10140 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10142 if (!rtx_equal_p (out, orig_out))
10143 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10145 return 1; /* DONE */
10149 * For comparison with above,
10159 if (! nonimmediate_operand (operands[2], mode))
10160 operands[2] = force_reg (mode, operands[2]);
10161 if (! nonimmediate_operand (operands[3], mode))
10162 operands[3] = force_reg (mode, operands[3]);
10164 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10166 rtx tmp = gen_reg_rtx (mode);
10167 emit_move_insn (tmp, operands[3]);
10170 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10172 rtx tmp = gen_reg_rtx (mode);
10173 emit_move_insn (tmp, operands[2]);
10177 if (! register_operand (operands[2], VOIDmode)
10179 || ! register_operand (operands[3], VOIDmode)))
10180 operands[2] = force_reg (mode, operands[2]);
10183 && ! register_operand (operands[3], VOIDmode))
10184 operands[3] = force_reg (mode, operands[3]);
10186 emit_insn (compare_seq);
10187 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10188 gen_rtx_IF_THEN_ELSE (mode,
10189 compare_op, operands[2],
10192 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10193 gen_rtx_IF_THEN_ELSE (mode,
10195 copy_rtx (operands[3]),
10196 copy_rtx (operands[0]))));
10198 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10199 gen_rtx_IF_THEN_ELSE (mode,
10201 copy_rtx (operands[2]),
10202 copy_rtx (operands[0]))));
10204 return 1; /* DONE */
10207 /* Swap, force into registers, or otherwise massage the two operands
10208 to an sse comparison with a mask result. Thus we differ a bit from
10209 ix86_prepare_fp_compare_args which expects to produce a flags result.
10211 The DEST operand exists to help determine whether to commute commutative
10212 operators. The POP0/POP1 operands are updated in place. The new
10213 comparison code is returned, or UNKNOWN if not implementable. */
10215 static enum rtx_code
10216 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
10217 rtx *pop0, rtx *pop1)
10225 /* We have no LTGT as an operator. We could implement it with
10226 NE & ORDERED, but this requires an extra temporary. It's
10227 not clear that it's worth it. */
10234 /* These are supported directly. */
10241 /* For commutative operators, try to canonicalize the destination
10242 operand to be first in the comparison - this helps reload to
10243 avoid extra moves. */
10244 if (!dest || !rtx_equal_p (dest, *pop1))
10252 /* These are not supported directly. Swap the comparison operands
10253 to transform into something that is supported. */
10257 code = swap_condition (code);
10261 gcc_unreachable ();
10267 /* Detect conditional moves that exactly match min/max operational
10268 semantics. Note that this is IEEE safe, as long as we don't
10269 interchange the operands.
10271 Returns FALSE if this conditional move doesn't match a MIN/MAX,
10272 and TRUE if the operation is successful and instructions are emitted. */
10275 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
10276 rtx cmp_op1, rtx if_true, rtx if_false)
10278 enum machine_mode mode;
10284 else if (code == UNGE)
10287 if_true = if_false;
10293 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
10295 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
10300 mode = GET_MODE (dest);
10302 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10303 but MODE may be a vector mode and thus not appropriate. */
10304 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
10306 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
10309 if_true = force_reg (mode, if_true);
10310 v = gen_rtvec (2, if_true, if_false);
10311 tmp = gen_rtx_UNSPEC (mode, v, u);
10315 code = is_min ? SMIN : SMAX;
10316 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
10319 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
10323 /* Expand an sse vector comparison. Return the register with the result. */
10326 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
10327 rtx op_true, rtx op_false)
10329 enum machine_mode mode = GET_MODE (dest);
10332 cmp_op0 = force_reg (mode, cmp_op0);
10333 if (!nonimmediate_operand (cmp_op1, mode))
10334 cmp_op1 = force_reg (mode, cmp_op1);
10337 || reg_overlap_mentioned_p (dest, op_true)
10338 || reg_overlap_mentioned_p (dest, op_false))
10339 dest = gen_reg_rtx (mode);
10341 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10342 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10347 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10348 operations. This is used for both scalar and vector conditional moves. */
10351 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
10353 enum machine_mode mode = GET_MODE (dest);
10356 if (op_false == CONST0_RTX (mode))
10358 op_true = force_reg (mode, op_true);
10359 x = gen_rtx_AND (mode, cmp, op_true);
10360 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10362 else if (op_true == CONST0_RTX (mode))
10364 op_false = force_reg (mode, op_false);
10365 x = gen_rtx_NOT (mode, cmp);
10366 x = gen_rtx_AND (mode, x, op_false);
10367 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10371 op_true = force_reg (mode, op_true);
10372 op_false = force_reg (mode, op_false);
10374 t2 = gen_reg_rtx (mode);
10376 t3 = gen_reg_rtx (mode);
10380 x = gen_rtx_AND (mode, op_true, cmp);
10381 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
10383 x = gen_rtx_NOT (mode, cmp);
10384 x = gen_rtx_AND (mode, x, op_false);
10385 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
10387 x = gen_rtx_IOR (mode, t3, t2);
10388 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10392 /* Expand a floating-point conditional move. Return true if successful. */
10395 ix86_expand_fp_movcc (rtx operands[])
10397 enum machine_mode mode = GET_MODE (operands[0]);
10398 enum rtx_code code = GET_CODE (operands[1]);
10399 rtx tmp, compare_op, second_test, bypass_test;
10401 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10403 enum machine_mode cmode;
10405 /* Since we've no cmove for sse registers, don't force bad register
10406 allocation just to gain access to it. Deny movcc when the
10407 comparison mode doesn't match the move mode. */
10408 cmode = GET_MODE (ix86_compare_op0);
10409 if (cmode == VOIDmode)
10410 cmode = GET_MODE (ix86_compare_op1);
10414 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10416 &ix86_compare_op1);
10417 if (code == UNKNOWN)
10420 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
10421 ix86_compare_op1, operands[2],
10425 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
10426 ix86_compare_op1, operands[2], operands[3]);
10427 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
10431 /* The floating point conditional move instructions don't directly
10432 support conditions resulting from a signed integer comparison. */
10434 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10436 /* The floating point conditional move instructions don't directly
10437 support signed integer comparisons. */
10439 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10441 gcc_assert (!second_test && !bypass_test);
10442 tmp = gen_reg_rtx (QImode);
10443 ix86_expand_setcc (code, tmp);
10445 ix86_compare_op0 = tmp;
10446 ix86_compare_op1 = const0_rtx;
10447 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10449 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10451 tmp = gen_reg_rtx (mode);
10452 emit_move_insn (tmp, operands[3]);
10455 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10457 tmp = gen_reg_rtx (mode);
10458 emit_move_insn (tmp, operands[2]);
10462 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10463 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10464 operands[2], operands[3])));
10466 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10467 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10468 operands[3], operands[0])));
10470 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10471 gen_rtx_IF_THEN_ELSE (mode, second_test,
10472 operands[2], operands[0])));
10477 /* Expand a floating-point vector conditional move; a vcond operation
10478 rather than a movcc operation. */
10481 ix86_expand_fp_vcond (rtx operands[])
10483 enum rtx_code code = GET_CODE (operands[3]);
10486 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10487 &operands[4], &operands[5]);
10488 if (code == UNKNOWN)
10491 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
10492 operands[5], operands[1], operands[2]))
10495 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10496 operands[1], operands[2]);
10497 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
10501 /* Expand a signed integral vector conditional move. */
10504 ix86_expand_int_vcond (rtx operands[])
10506 enum machine_mode mode = GET_MODE (operands[0]);
10507 enum rtx_code code = GET_CODE (operands[3]);
10508 bool negate = false;
10511 cop0 = operands[4];
10512 cop1 = operands[5];
10514 /* Canonicalize the comparison to EQ, GT, GTU. */
10525 code = reverse_condition (code);
10531 code = reverse_condition (code);
10537 code = swap_condition (code);
10538 x = cop0, cop0 = cop1, cop1 = x;
10542 gcc_unreachable ();
10545 /* Unsigned parallel compare is not supported by the hardware. Play some
10546 tricks to turn this into a signed comparison against 0. */
10555 /* Perform a parallel modulo subtraction. */
10556 t1 = gen_reg_rtx (mode);
10557 emit_insn (gen_subv4si3 (t1, cop0, cop1));
10559 /* Extract the original sign bit of op0. */
10560 mask = GEN_INT (-0x80000000);
10561 mask = gen_rtx_CONST_VECTOR (mode,
10562 gen_rtvec (4, mask, mask, mask, mask));
10563 mask = force_reg (mode, mask);
10564 t2 = gen_reg_rtx (mode);
10565 emit_insn (gen_andv4si3 (t2, cop0, mask));
10567 /* XOR it back into the result of the subtraction. This results
10568 in the sign bit set iff we saw unsigned underflow. */
10569 x = gen_reg_rtx (mode);
10570 emit_insn (gen_xorv4si3 (x, t1, t2));
10578 /* Perform a parallel unsigned saturating subtraction. */
10579 x = gen_reg_rtx (mode);
10580 emit_insn (gen_rtx_SET (VOIDmode, x,
10581 gen_rtx_US_MINUS (mode, cop0, cop1)));
10588 gcc_unreachable ();
10592 cop1 = CONST0_RTX (mode);
10595 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
10596 operands[1+negate], operands[2-negate]);
10598 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
10599 operands[2-negate]);
10603 /* Expand conditional increment or decrement using adb/sbb instructions.
10604 The default case using setcc followed by the conditional move can be
10605 done by generic code. */
10607 ix86_expand_int_addcc (rtx operands[])
10609 enum rtx_code code = GET_CODE (operands[1]);
10611 rtx val = const0_rtx;
10612 bool fpcmp = false;
10613 enum machine_mode mode = GET_MODE (operands[0]);
10615 if (operands[3] != const1_rtx
10616 && operands[3] != constm1_rtx)
10618 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10619 ix86_compare_op1, &compare_op))
10621 code = GET_CODE (compare_op);
10623 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10624 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10627 code = ix86_fp_compare_code_to_integer (code);
10634 PUT_CODE (compare_op,
10635 reverse_condition_maybe_unordered
10636 (GET_CODE (compare_op)));
10638 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10640 PUT_MODE (compare_op, mode);
10642 /* Construct either adc or sbb insn. */
10643 if ((code == LTU) == (operands[3] == constm1_rtx))
10645 switch (GET_MODE (operands[0]))
10648 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10651 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10654 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10657 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10660 gcc_unreachable ();
10665 switch (GET_MODE (operands[0]))
10668 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10671 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10674 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10677 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10680 gcc_unreachable ();
10683 return 1; /* DONE */
10687 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10688 works for floating pointer parameters and nonoffsetable memories.
10689 For pushes, it returns just stack offsets; the values will be saved
10690 in the right order. Maximally three parts are generated. */
10693 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10698 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10700 size = (GET_MODE_SIZE (mode) + 4) / 8;
10702 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
10703 gcc_assert (size >= 2 && size <= 3);
10705 /* Optimize constant pool reference to immediates. This is used by fp
10706 moves, that force all constants to memory to allow combining. */
10707 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10709 rtx tmp = maybe_get_pool_constant (operand);
10714 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10716 /* The only non-offsetable memories we handle are pushes. */
10717 int ok = push_operand (operand, VOIDmode);
10721 operand = copy_rtx (operand);
10722 PUT_MODE (operand, Pmode);
10723 parts[0] = parts[1] = parts[2] = operand;
10727 if (GET_CODE (operand) == CONST_VECTOR)
10729 enum machine_mode imode = int_mode_for_mode (mode);
10730 /* Caution: if we looked through a constant pool memory above,
10731 the operand may actually have a different mode now. That's
10732 ok, since we want to pun this all the way back to an integer. */
10733 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
10734 gcc_assert (operand != NULL);
10740 if (mode == DImode)
10741 split_di (&operand, 1, &parts[0], &parts[1]);
10744 if (REG_P (operand))
10746 gcc_assert (reload_completed);
10747 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10748 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10750 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10752 else if (offsettable_memref_p (operand))
10754 operand = adjust_address (operand, SImode, 0);
10755 parts[0] = operand;
10756 parts[1] = adjust_address (operand, SImode, 4);
10758 parts[2] = adjust_address (operand, SImode, 8);
10760 else if (GET_CODE (operand) == CONST_DOUBLE)
10765 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10769 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10770 parts[2] = gen_int_mode (l[2], SImode);
10773 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10776 gcc_unreachable ();
10778 parts[1] = gen_int_mode (l[1], SImode);
10779 parts[0] = gen_int_mode (l[0], SImode);
10782 gcc_unreachable ();
10787 if (mode == TImode)
10788 split_ti (&operand, 1, &parts[0], &parts[1]);
10789 if (mode == XFmode || mode == TFmode)
10791 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10792 if (REG_P (operand))
10794 gcc_assert (reload_completed);
10795 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10796 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10798 else if (offsettable_memref_p (operand))
10800 operand = adjust_address (operand, DImode, 0);
10801 parts[0] = operand;
10802 parts[1] = adjust_address (operand, upper_mode, 8);
10804 else if (GET_CODE (operand) == CONST_DOUBLE)
10809 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10810 real_to_target (l, &r, mode);
10812 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10813 if (HOST_BITS_PER_WIDE_INT >= 64)
10816 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10817 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10820 parts[0] = immed_double_const (l[0], l[1], DImode);
10822 if (upper_mode == SImode)
10823 parts[1] = gen_int_mode (l[2], SImode);
10824 else if (HOST_BITS_PER_WIDE_INT >= 64)
10827 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10828 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10831 parts[1] = immed_double_const (l[2], l[3], DImode);
10834 gcc_unreachable ();
10841 /* Emit insns to perform a move or push of DI, DF, and XF values.
10842 Return false when normal moves are needed; true when all required
10843 insns have been emitted. Operands 2-4 contain the input values
10844 int the correct order; operands 5-7 contain the output values. */
10847 ix86_split_long_move (rtx operands[])
10852 int collisions = 0;
10853 enum machine_mode mode = GET_MODE (operands[0]);
10855 /* The DFmode expanders may ask us to move double.
10856 For 64bit target this is single move. By hiding the fact
10857 here we simplify i386.md splitters. */
10858 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10860 /* Optimize constant pool reference to immediates. This is used by
10861 fp moves, that force all constants to memory to allow combining. */
10863 if (GET_CODE (operands[1]) == MEM
10864 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10865 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10866 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10867 if (push_operand (operands[0], VOIDmode))
10869 operands[0] = copy_rtx (operands[0]);
10870 PUT_MODE (operands[0], Pmode);
10873 operands[0] = gen_lowpart (DImode, operands[0]);
10874 operands[1] = gen_lowpart (DImode, operands[1]);
10875 emit_move_insn (operands[0], operands[1]);
10879 /* The only non-offsettable memory we handle is push. */
10880 if (push_operand (operands[0], VOIDmode))
10883 gcc_assert (GET_CODE (operands[0]) != MEM
10884 || offsettable_memref_p (operands[0]));
10886 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10887 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10889 /* When emitting push, take care for source operands on the stack. */
10890 if (push && GET_CODE (operands[1]) == MEM
10891 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10894 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10895 XEXP (part[1][2], 0));
10896 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10897 XEXP (part[1][1], 0));
10900 /* We need to do copy in the right order in case an address register
10901 of the source overlaps the destination. */
10902 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10904 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10906 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10909 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10912 /* Collision in the middle part can be handled by reordering. */
10913 if (collisions == 1 && nparts == 3
10914 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10917 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10918 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10921 /* If there are more collisions, we can't handle it by reordering.
10922 Do an lea to the last part and use only one colliding move. */
10923 else if (collisions > 1)
10929 base = part[0][nparts - 1];
10931 /* Handle the case when the last part isn't valid for lea.
10932 Happens in 64-bit mode storing the 12-byte XFmode. */
10933 if (GET_MODE (base) != Pmode)
10934 base = gen_rtx_REG (Pmode, REGNO (base));
10936 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10937 part[1][0] = replace_equiv_address (part[1][0], base);
10938 part[1][1] = replace_equiv_address (part[1][1],
10939 plus_constant (base, UNITS_PER_WORD));
10941 part[1][2] = replace_equiv_address (part[1][2],
10942 plus_constant (base, 8));
10952 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10953 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10954 emit_move_insn (part[0][2], part[1][2]);
10959 /* In 64bit mode we don't have 32bit push available. In case this is
10960 register, it is OK - we will just use larger counterpart. We also
10961 retype memory - these comes from attempt to avoid REX prefix on
10962 moving of second half of TFmode value. */
10963 if (GET_MODE (part[1][1]) == SImode)
10965 switch (GET_CODE (part[1][1]))
10968 part[1][1] = adjust_address (part[1][1], DImode, 0);
10972 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10976 gcc_unreachable ();
10979 if (GET_MODE (part[1][0]) == SImode)
10980 part[1][0] = part[1][1];
10983 emit_move_insn (part[0][1], part[1][1]);
10984 emit_move_insn (part[0][0], part[1][0]);
10988 /* Choose correct order to not overwrite the source before it is copied. */
10989 if ((REG_P (part[0][0])
10990 && REG_P (part[1][1])
10991 && (REGNO (part[0][0]) == REGNO (part[1][1])
10993 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10995 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10999 operands[2] = part[0][2];
11000 operands[3] = part[0][1];
11001 operands[4] = part[0][0];
11002 operands[5] = part[1][2];
11003 operands[6] = part[1][1];
11004 operands[7] = part[1][0];
11008 operands[2] = part[0][1];
11009 operands[3] = part[0][0];
11010 operands[5] = part[1][1];
11011 operands[6] = part[1][0];
11018 operands[2] = part[0][0];
11019 operands[3] = part[0][1];
11020 operands[4] = part[0][2];
11021 operands[5] = part[1][0];
11022 operands[6] = part[1][1];
11023 operands[7] = part[1][2];
11027 operands[2] = part[0][0];
11028 operands[3] = part[0][1];
11029 operands[5] = part[1][0];
11030 operands[6] = part[1][1];
11034 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11037 if (GET_CODE (operands[5]) == CONST_INT
11038 && operands[5] != const0_rtx
11039 && REG_P (operands[2]))
11041 if (GET_CODE (operands[6]) == CONST_INT
11042 && INTVAL (operands[6]) == INTVAL (operands[5]))
11043 operands[6] = operands[2];
11046 && GET_CODE (operands[7]) == CONST_INT
11047 && INTVAL (operands[7]) == INTVAL (operands[5]))
11048 operands[7] = operands[2];
11052 && GET_CODE (operands[6]) == CONST_INT
11053 && operands[6] != const0_rtx
11054 && REG_P (operands[3])
11055 && GET_CODE (operands[7]) == CONST_INT
11056 && INTVAL (operands[7]) == INTVAL (operands[6]))
11057 operands[7] = operands[3];
11060 emit_move_insn (operands[2], operands[5]);
11061 emit_move_insn (operands[3], operands[6]);
11063 emit_move_insn (operands[4], operands[7]);
11068 /* Helper function of ix86_split_ashldi used to generate an SImode
11069 left shift by a constant, either using a single shift or
11070 a sequence of add instructions. */
11073 ix86_expand_ashlsi3_const (rtx operand, int count)
11076 emit_insn (gen_addsi3 (operand, operand, operand));
11077 else if (!optimize_size
11078 && count * ix86_cost->add <= ix86_cost->shift_const)
11081 for (i=0; i<count; i++)
11082 emit_insn (gen_addsi3 (operand, operand, operand));
11085 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
11089 ix86_split_ashldi (rtx *operands, rtx scratch)
11091 rtx low[2], high[2];
11094 if (GET_CODE (operands[2]) == CONST_INT)
11096 split_di (operands, 2, low, high);
11097 count = INTVAL (operands[2]) & 63;
11101 emit_move_insn (high[0], low[1]);
11102 emit_move_insn (low[0], const0_rtx);
11105 ix86_expand_ashlsi3_const (high[0], count - 32);
11109 if (!rtx_equal_p (operands[0], operands[1]))
11110 emit_move_insn (operands[0], operands[1]);
11111 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
11112 ix86_expand_ashlsi3_const (low[0], count);
11117 split_di (operands, 1, low, high);
11119 if (operands[1] == const1_rtx)
11121 /* Assuming we've chosen a QImode capable registers, then 1LL << N
11122 can be done with two 32-bit shifts, no branches, no cmoves. */
11123 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11125 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11127 ix86_expand_clear (low[0]);
11128 ix86_expand_clear (high[0]);
11129 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
11131 d = gen_lowpart (QImode, low[0]);
11132 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11133 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11134 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11136 d = gen_lowpart (QImode, high[0]);
11137 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11138 s = gen_rtx_NE (QImode, flags, const0_rtx);
11139 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11142 /* Otherwise, we can get the same results by manually performing
11143 a bit extract operation on bit 5, and then performing the two
11144 shifts. The two methods of getting 0/1 into low/high are exactly
11145 the same size. Avoiding the shift in the bit extract case helps
11146 pentium4 a bit; no one else seems to care much either way. */
11151 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11152 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
11154 x = gen_lowpart (SImode, operands[2]);
11155 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11157 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
11158 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
11159 emit_move_insn (low[0], high[0]);
11160 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
11163 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
11164 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
11168 if (operands[1] == constm1_rtx)
11170 /* For -1LL << N, we can avoid the shld instruction, because we
11171 know that we're shifting 0...31 ones into a -1. */
11172 emit_move_insn (low[0], constm1_rtx);
11174 emit_move_insn (high[0], low[0]);
11176 emit_move_insn (high[0], constm1_rtx);
11180 if (!rtx_equal_p (operands[0], operands[1]))
11181 emit_move_insn (operands[0], operands[1]);
11183 split_di (operands, 1, low, high);
11184 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
11187 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
11189 if (TARGET_CMOVE && scratch)
11191 ix86_expand_clear (scratch);
11192 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
11195 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11199 ix86_split_ashrdi (rtx *operands, rtx scratch)
11201 rtx low[2], high[2];
11204 if (GET_CODE (operands[2]) == CONST_INT)
11206 split_di (operands, 2, low, high);
11207 count = INTVAL (operands[2]) & 63;
11211 emit_move_insn (high[0], high[1]);
11212 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11213 emit_move_insn (low[0], high[0]);
11216 else if (count >= 32)
11218 emit_move_insn (low[0], high[1]);
11219 emit_move_insn (high[0], low[0]);
11220 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11222 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11226 if (!rtx_equal_p (operands[0], operands[1]))
11227 emit_move_insn (operands[0], operands[1]);
11228 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11229 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11234 if (!rtx_equal_p (operands[0], operands[1]))
11235 emit_move_insn (operands[0], operands[1]);
11237 split_di (operands, 1, low, high);
11239 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11240 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11242 if (TARGET_CMOVE && scratch)
11244 emit_move_insn (scratch, high[0]);
11245 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11246 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11250 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11255 ix86_split_lshrdi (rtx *operands, rtx scratch)
11257 rtx low[2], high[2];
11260 if (GET_CODE (operands[2]) == CONST_INT)
11262 split_di (operands, 2, low, high);
11263 count = INTVAL (operands[2]) & 63;
11267 emit_move_insn (low[0], high[1]);
11268 ix86_expand_clear (high[0]);
11271 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11275 if (!rtx_equal_p (operands[0], operands[1]))
11276 emit_move_insn (operands[0], operands[1]);
11277 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11278 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11283 if (!rtx_equal_p (operands[0], operands[1]))
11284 emit_move_insn (operands[0], operands[1]);
11286 split_di (operands, 1, low, high);
11288 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11289 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11291 /* Heh. By reversing the arguments, we can reuse this pattern. */
11292 if (TARGET_CMOVE && scratch)
11294 ix86_expand_clear (scratch);
11295 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11299 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11303 /* Helper function for the string operations below. Dest VARIABLE whether
11304 it is aligned to VALUE bytes. If true, jump to the label. */
11306 ix86_expand_aligntest (rtx variable, int value)
11308 rtx label = gen_label_rtx ();
11309 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11310 if (GET_MODE (variable) == DImode)
11311 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11313 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11314 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11319 /* Adjust COUNTER by the VALUE. */
11321 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11323 if (GET_MODE (countreg) == DImode)
11324 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11326 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11329 /* Zero extend possibly SImode EXP to Pmode register. */
11331 ix86_zero_extend_to_Pmode (rtx exp)
11334 if (GET_MODE (exp) == VOIDmode)
11335 return force_reg (Pmode, exp);
11336 if (GET_MODE (exp) == Pmode)
11337 return copy_to_mode_reg (Pmode, exp);
11338 r = gen_reg_rtx (Pmode);
11339 emit_insn (gen_zero_extendsidi2 (r, exp));
11343 /* Expand string move (memcpy) operation. Use i386 string operations when
11344 profitable. expand_clrmem contains similar code. */
11346 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11348 rtx srcreg, destreg, countreg, srcexp, destexp;
11349 enum machine_mode counter_mode;
11350 HOST_WIDE_INT align = 0;
11351 unsigned HOST_WIDE_INT count = 0;
11353 if (GET_CODE (align_exp) == CONST_INT)
11354 align = INTVAL (align_exp);
11356 /* Can't use any of this if the user has appropriated esi or edi. */
11357 if (global_regs[4] || global_regs[5])
11360 /* This simple hack avoids all inlining code and simplifies code below. */
11361 if (!TARGET_ALIGN_STRINGOPS)
11364 if (GET_CODE (count_exp) == CONST_INT)
11366 count = INTVAL (count_exp);
11367 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11371 /* Figure out proper mode for counter. For 32bits it is always SImode,
11372 for 64bits use SImode when possible, otherwise DImode.
11373 Set count to number of bytes copied when known at compile time. */
11375 || GET_MODE (count_exp) == SImode
11376 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11377 counter_mode = SImode;
11379 counter_mode = DImode;
11381 gcc_assert (counter_mode == SImode || counter_mode == DImode);
11383 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11384 if (destreg != XEXP (dst, 0))
11385 dst = replace_equiv_address_nv (dst, destreg);
11386 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11387 if (srcreg != XEXP (src, 0))
11388 src = replace_equiv_address_nv (src, srcreg);
11390 /* When optimizing for size emit simple rep ; movsb instruction for
11391 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
11392 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
11393 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
11394 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
11395 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
11396 known to be zero or not. The rep; movsb sequence causes higher
11397 register pressure though, so take that into account. */
11399 if ((!optimize || optimize_size)
11404 || (count & 3) + count / 4 > 6))))
11406 emit_insn (gen_cld ());
11407 countreg = ix86_zero_extend_to_Pmode (count_exp);
11408 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11409 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11410 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11414 /* For constant aligned (or small unaligned) copies use rep movsl
11415 followed by code copying the rest. For PentiumPro ensure 8 byte
11416 alignment to allow rep movsl acceleration. */
11418 else if (count != 0
11420 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11421 || optimize_size || count < (unsigned int) 64))
11423 unsigned HOST_WIDE_INT offset = 0;
11424 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11425 rtx srcmem, dstmem;
11427 emit_insn (gen_cld ());
11428 if (count & ~(size - 1))
11430 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
11432 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
11434 while (offset < (count & ~(size - 1)))
11436 srcmem = adjust_automodify_address_nv (src, movs_mode,
11438 dstmem = adjust_automodify_address_nv (dst, movs_mode,
11440 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11446 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
11447 & (TARGET_64BIT ? -1 : 0x3fffffff));
11448 countreg = copy_to_mode_reg (counter_mode, countreg);
11449 countreg = ix86_zero_extend_to_Pmode (countreg);
11451 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11452 GEN_INT (size == 4 ? 2 : 3));
11453 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11454 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11456 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11457 countreg, destexp, srcexp));
11458 offset = count & ~(size - 1);
11461 if (size == 8 && (count & 0x04))
11463 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11465 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11467 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11472 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11474 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11476 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11481 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11483 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11485 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11488 /* The generic code based on the glibc implementation:
11489 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11490 allowing accelerated copying there)
11491 - copy the data using rep movsl
11492 - copy the rest. */
11497 rtx srcmem, dstmem;
11498 int desired_alignment = (TARGET_PENTIUMPRO
11499 && (count == 0 || count >= (unsigned int) 260)
11500 ? 8 : UNITS_PER_WORD);
11501 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11502 dst = change_address (dst, BLKmode, destreg);
11503 src = change_address (src, BLKmode, srcreg);
11505 /* In case we don't know anything about the alignment, default to
11506 library version, since it is usually equally fast and result in
11509 Also emit call when we know that the count is large and call overhead
11510 will not be important. */
11511 if (!TARGET_INLINE_ALL_STRINGOPS
11512 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11515 if (TARGET_SINGLE_STRINGOP)
11516 emit_insn (gen_cld ());
11518 countreg2 = gen_reg_rtx (Pmode);
11519 countreg = copy_to_mode_reg (counter_mode, count_exp);
11521 /* We don't use loops to align destination and to copy parts smaller
11522 than 4 bytes, because gcc is able to optimize such code better (in
11523 the case the destination or the count really is aligned, gcc is often
11524 able to predict the branches) and also it is friendlier to the
11525 hardware branch prediction.
11527 Using loops is beneficial for generic case, because we can
11528 handle small counts using the loops. Many CPUs (such as Athlon)
11529 have large REP prefix setup costs.
11531 This is quite costly. Maybe we can revisit this decision later or
11532 add some customizability to this code. */
11534 if (count == 0 && align < desired_alignment)
11536 label = gen_label_rtx ();
11537 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11538 LEU, 0, counter_mode, 1, label);
11542 rtx label = ix86_expand_aligntest (destreg, 1);
11543 srcmem = change_address (src, QImode, srcreg);
11544 dstmem = change_address (dst, QImode, destreg);
11545 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11546 ix86_adjust_counter (countreg, 1);
11547 emit_label (label);
11548 LABEL_NUSES (label) = 1;
11552 rtx label = ix86_expand_aligntest (destreg, 2);
11553 srcmem = change_address (src, HImode, srcreg);
11554 dstmem = change_address (dst, HImode, destreg);
11555 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11556 ix86_adjust_counter (countreg, 2);
11557 emit_label (label);
11558 LABEL_NUSES (label) = 1;
11560 if (align <= 4 && desired_alignment > 4)
11562 rtx label = ix86_expand_aligntest (destreg, 4);
11563 srcmem = change_address (src, SImode, srcreg);
11564 dstmem = change_address (dst, SImode, destreg);
11565 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11566 ix86_adjust_counter (countreg, 4);
11567 emit_label (label);
11568 LABEL_NUSES (label) = 1;
11571 if (label && desired_alignment > 4 && !TARGET_64BIT)
11573 emit_label (label);
11574 LABEL_NUSES (label) = 1;
11577 if (!TARGET_SINGLE_STRINGOP)
11578 emit_insn (gen_cld ());
11581 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11583 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11587 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11588 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11590 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11591 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11592 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11593 countreg2, destexp, srcexp));
11597 emit_label (label);
11598 LABEL_NUSES (label) = 1;
11600 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11602 srcmem = change_address (src, SImode, srcreg);
11603 dstmem = change_address (dst, SImode, destreg);
11604 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11606 if ((align <= 4 || count == 0) && TARGET_64BIT)
11608 rtx label = ix86_expand_aligntest (countreg, 4);
11609 srcmem = change_address (src, SImode, srcreg);
11610 dstmem = change_address (dst, SImode, destreg);
11611 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11612 emit_label (label);
11613 LABEL_NUSES (label) = 1;
11615 if (align > 2 && count != 0 && (count & 2))
11617 srcmem = change_address (src, HImode, srcreg);
11618 dstmem = change_address (dst, HImode, destreg);
11619 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11621 if (align <= 2 || count == 0)
11623 rtx label = ix86_expand_aligntest (countreg, 2);
11624 srcmem = change_address (src, HImode, srcreg);
11625 dstmem = change_address (dst, HImode, destreg);
11626 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11627 emit_label (label);
11628 LABEL_NUSES (label) = 1;
11630 if (align > 1 && count != 0 && (count & 1))
11632 srcmem = change_address (src, QImode, srcreg);
11633 dstmem = change_address (dst, QImode, destreg);
11634 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11636 if (align <= 1 || count == 0)
11638 rtx label = ix86_expand_aligntest (countreg, 1);
11639 srcmem = change_address (src, QImode, srcreg);
11640 dstmem = change_address (dst, QImode, destreg);
11641 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11642 emit_label (label);
11643 LABEL_NUSES (label) = 1;
11650 /* Expand string clear operation (bzero). Use i386 string operations when
11651 profitable. expand_movmem contains similar code. */
11653 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11655 rtx destreg, zeroreg, countreg, destexp;
11656 enum machine_mode counter_mode;
11657 HOST_WIDE_INT align = 0;
11658 unsigned HOST_WIDE_INT count = 0;
11660 if (GET_CODE (align_exp) == CONST_INT)
11661 align = INTVAL (align_exp);
11663 /* Can't use any of this if the user has appropriated esi. */
11664 if (global_regs[4])
11667 /* This simple hack avoids all inlining code and simplifies code below. */
11668 if (!TARGET_ALIGN_STRINGOPS)
11671 if (GET_CODE (count_exp) == CONST_INT)
11673 count = INTVAL (count_exp);
11674 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11677 /* Figure out proper mode for counter. For 32bits it is always SImode,
11678 for 64bits use SImode when possible, otherwise DImode.
11679 Set count to number of bytes copied when known at compile time. */
11681 || GET_MODE (count_exp) == SImode
11682 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11683 counter_mode = SImode;
11685 counter_mode = DImode;
11687 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11688 if (destreg != XEXP (dst, 0))
11689 dst = replace_equiv_address_nv (dst, destreg);
11692 /* When optimizing for size emit simple rep ; movsb instruction for
11693 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11694 sequence is 7 bytes long, so if optimizing for size and count is
11695 small enough that some stosl, stosw and stosb instructions without
11696 rep are shorter, fall back into the next if. */
11698 if ((!optimize || optimize_size)
11701 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11703 emit_insn (gen_cld ());
11705 countreg = ix86_zero_extend_to_Pmode (count_exp);
11706 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11707 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11708 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11710 else if (count != 0
11712 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11713 || optimize_size || count < (unsigned int) 64))
11715 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11716 unsigned HOST_WIDE_INT offset = 0;
11718 emit_insn (gen_cld ());
11720 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11721 if (count & ~(size - 1))
11723 unsigned HOST_WIDE_INT repcount;
11724 unsigned int max_nonrep;
11726 repcount = count >> (size == 4 ? 2 : 3);
11728 repcount &= 0x3fffffff;
11730 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11731 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11732 bytes. In both cases the latter seems to be faster for small
11734 max_nonrep = size == 4 ? 7 : 4;
11735 if (!optimize_size)
11738 case PROCESSOR_PENTIUM4:
11739 case PROCESSOR_NOCONA:
11746 if (repcount <= max_nonrep)
11747 while (repcount-- > 0)
11749 rtx mem = adjust_automodify_address_nv (dst,
11750 GET_MODE (zeroreg),
11752 emit_insn (gen_strset (destreg, mem, zeroreg));
11757 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11758 countreg = ix86_zero_extend_to_Pmode (countreg);
11759 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11760 GEN_INT (size == 4 ? 2 : 3));
11761 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11762 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11764 offset = count & ~(size - 1);
11767 if (size == 8 && (count & 0x04))
11769 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11771 emit_insn (gen_strset (destreg, mem,
11772 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11777 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11779 emit_insn (gen_strset (destreg, mem,
11780 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11785 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11787 emit_insn (gen_strset (destreg, mem,
11788 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11795 /* Compute desired alignment of the string operation. */
11796 int desired_alignment = (TARGET_PENTIUMPRO
11797 && (count == 0 || count >= (unsigned int) 260)
11798 ? 8 : UNITS_PER_WORD);
11800 /* In case we don't know anything about the alignment, default to
11801 library version, since it is usually equally fast and result in
11804 Also emit call when we know that the count is large and call overhead
11805 will not be important. */
11806 if (!TARGET_INLINE_ALL_STRINGOPS
11807 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11810 if (TARGET_SINGLE_STRINGOP)
11811 emit_insn (gen_cld ());
11813 countreg2 = gen_reg_rtx (Pmode);
11814 countreg = copy_to_mode_reg (counter_mode, count_exp);
11815 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11816 /* Get rid of MEM_OFFSET, it won't be accurate. */
11817 dst = change_address (dst, BLKmode, destreg);
11819 if (count == 0 && align < desired_alignment)
11821 label = gen_label_rtx ();
11822 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11823 LEU, 0, counter_mode, 1, label);
11827 rtx label = ix86_expand_aligntest (destreg, 1);
11828 emit_insn (gen_strset (destreg, dst,
11829 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11830 ix86_adjust_counter (countreg, 1);
11831 emit_label (label);
11832 LABEL_NUSES (label) = 1;
11836 rtx label = ix86_expand_aligntest (destreg, 2);
11837 emit_insn (gen_strset (destreg, dst,
11838 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11839 ix86_adjust_counter (countreg, 2);
11840 emit_label (label);
11841 LABEL_NUSES (label) = 1;
11843 if (align <= 4 && desired_alignment > 4)
11845 rtx label = ix86_expand_aligntest (destreg, 4);
11846 emit_insn (gen_strset (destreg, dst,
11848 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11850 ix86_adjust_counter (countreg, 4);
11851 emit_label (label);
11852 LABEL_NUSES (label) = 1;
11855 if (label && desired_alignment > 4 && !TARGET_64BIT)
11857 emit_label (label);
11858 LABEL_NUSES (label) = 1;
11862 if (!TARGET_SINGLE_STRINGOP)
11863 emit_insn (gen_cld ());
11866 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11868 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11872 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11873 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11875 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11876 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11880 emit_label (label);
11881 LABEL_NUSES (label) = 1;
11884 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11885 emit_insn (gen_strset (destreg, dst,
11886 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11887 if (TARGET_64BIT && (align <= 4 || count == 0))
11889 rtx label = ix86_expand_aligntest (countreg, 4);
11890 emit_insn (gen_strset (destreg, dst,
11891 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11892 emit_label (label);
11893 LABEL_NUSES (label) = 1;
11895 if (align > 2 && count != 0 && (count & 2))
11896 emit_insn (gen_strset (destreg, dst,
11897 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11898 if (align <= 2 || count == 0)
11900 rtx label = ix86_expand_aligntest (countreg, 2);
11901 emit_insn (gen_strset (destreg, dst,
11902 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11903 emit_label (label);
11904 LABEL_NUSES (label) = 1;
11906 if (align > 1 && count != 0 && (count & 1))
11907 emit_insn (gen_strset (destreg, dst,
11908 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11909 if (align <= 1 || count == 0)
11911 rtx label = ix86_expand_aligntest (countreg, 1);
11912 emit_insn (gen_strset (destreg, dst,
11913 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11914 emit_label (label);
11915 LABEL_NUSES (label) = 1;
11921 /* Expand strlen. */
11923 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11925 rtx addr, scratch1, scratch2, scratch3, scratch4;
11927 /* The generic case of strlen expander is long. Avoid it's
11928 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11930 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11931 && !TARGET_INLINE_ALL_STRINGOPS
11933 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11936 addr = force_reg (Pmode, XEXP (src, 0));
11937 scratch1 = gen_reg_rtx (Pmode);
11939 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11942 /* Well it seems that some optimizer does not combine a call like
11943 foo(strlen(bar), strlen(bar));
11944 when the move and the subtraction is done here. It does calculate
11945 the length just once when these instructions are done inside of
11946 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11947 often used and I use one fewer register for the lifetime of
11948 output_strlen_unroll() this is better. */
11950 emit_move_insn (out, addr);
11952 ix86_expand_strlensi_unroll_1 (out, src, align);
11954 /* strlensi_unroll_1 returns the address of the zero at the end of
11955 the string, like memchr(), so compute the length by subtracting
11956 the start address. */
11958 emit_insn (gen_subdi3 (out, out, addr));
11960 emit_insn (gen_subsi3 (out, out, addr));
11965 scratch2 = gen_reg_rtx (Pmode);
11966 scratch3 = gen_reg_rtx (Pmode);
11967 scratch4 = force_reg (Pmode, constm1_rtx);
11969 emit_move_insn (scratch3, addr);
11970 eoschar = force_reg (QImode, eoschar);
11972 emit_insn (gen_cld ());
11973 src = replace_equiv_address_nv (src, scratch3);
11975 /* If .md starts supporting :P, this can be done in .md. */
11976 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11977 scratch4), UNSPEC_SCAS);
11978 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11981 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11982 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11986 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11987 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11993 /* Expand the appropriate insns for doing strlen if not just doing
11996 out = result, initialized with the start address
11997 align_rtx = alignment of the address.
11998 scratch = scratch register, initialized with the startaddress when
11999 not aligned, otherwise undefined
12001 This is just the body. It needs the initializations mentioned above and
12002 some address computing at the end. These things are done in i386.md. */
12005 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12009 rtx align_2_label = NULL_RTX;
12010 rtx align_3_label = NULL_RTX;
12011 rtx align_4_label = gen_label_rtx ();
12012 rtx end_0_label = gen_label_rtx ();
12014 rtx tmpreg = gen_reg_rtx (SImode);
12015 rtx scratch = gen_reg_rtx (SImode);
12019 if (GET_CODE (align_rtx) == CONST_INT)
12020 align = INTVAL (align_rtx);
12022 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12024 /* Is there a known alignment and is it less than 4? */
12027 rtx scratch1 = gen_reg_rtx (Pmode);
12028 emit_move_insn (scratch1, out);
12029 /* Is there a known alignment and is it not 2? */
12032 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12033 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12035 /* Leave just the 3 lower bits. */
12036 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12037 NULL_RTX, 0, OPTAB_WIDEN);
12039 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12040 Pmode, 1, align_4_label);
12041 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12042 Pmode, 1, align_2_label);
12043 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12044 Pmode, 1, align_3_label);
12048 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12049 check if is aligned to 4 - byte. */
12051 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12052 NULL_RTX, 0, OPTAB_WIDEN);
12054 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12055 Pmode, 1, align_4_label);
12058 mem = change_address (src, QImode, out);
12060 /* Now compare the bytes. */
12062 /* Compare the first n unaligned byte on a byte per byte basis. */
12063 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12064 QImode, 1, end_0_label);
12066 /* Increment the address. */
12068 emit_insn (gen_adddi3 (out, out, const1_rtx));
12070 emit_insn (gen_addsi3 (out, out, const1_rtx));
12072 /* Not needed with an alignment of 2 */
12075 emit_label (align_2_label);
12077 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12081 emit_insn (gen_adddi3 (out, out, const1_rtx));
12083 emit_insn (gen_addsi3 (out, out, const1_rtx));
12085 emit_label (align_3_label);
12088 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12092 emit_insn (gen_adddi3 (out, out, const1_rtx));
12094 emit_insn (gen_addsi3 (out, out, const1_rtx));
12097 /* Generate loop to check 4 bytes at a time. It is not a good idea to
12098 align this loop. It gives only huge programs, but does not help to
12100 emit_label (align_4_label);
12102 mem = change_address (src, SImode, out);
12103 emit_move_insn (scratch, mem);
12105 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
12107 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
12109 /* This formula yields a nonzero result iff one of the bytes is zero.
12110 This saves three branches inside loop and many cycles. */
12112 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
12113 emit_insn (gen_one_cmplsi2 (scratch, scratch));
12114 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
12115 emit_insn (gen_andsi3 (tmpreg, tmpreg,
12116 gen_int_mode (0x80808080, SImode)));
12117 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
12122 rtx reg = gen_reg_rtx (SImode);
12123 rtx reg2 = gen_reg_rtx (Pmode);
12124 emit_move_insn (reg, tmpreg);
12125 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
12127 /* If zero is not in the first two bytes, move two bytes forward. */
12128 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12129 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12130 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12131 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
12132 gen_rtx_IF_THEN_ELSE (SImode, tmp,
12135 /* Emit lea manually to avoid clobbering of flags. */
12136 emit_insn (gen_rtx_SET (SImode, reg2,
12137 gen_rtx_PLUS (Pmode, out, const2_rtx)));
12139 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12140 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12141 emit_insn (gen_rtx_SET (VOIDmode, out,
12142 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
12149 rtx end_2_label = gen_label_rtx ();
12150 /* Is zero in the first two bytes? */
12152 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12153 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12154 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
12155 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12156 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
12158 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12159 JUMP_LABEL (tmp) = end_2_label;
12161 /* Not in the first two. Move two bytes forward. */
12162 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
12164 emit_insn (gen_adddi3 (out, out, const2_rtx));
12166 emit_insn (gen_addsi3 (out, out, const2_rtx));
12168 emit_label (end_2_label);
12172 /* Avoid branch in fixing the byte. */
12173 tmpreg = gen_lowpart (QImode, tmpreg);
12174 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
12175 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
12177 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
12179 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
12181 emit_label (end_0_label);
12185 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
12186 rtx callarg2 ATTRIBUTE_UNUSED,
12187 rtx pop, int sibcall)
12189 rtx use = NULL, call;
12191 if (pop == const0_rtx)
12193 gcc_assert (!TARGET_64BIT || !pop);
12196 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
12197 fnaddr = machopic_indirect_call_target (fnaddr);
12199 /* Static functions and indirect calls don't need the pic register. */
12200 if (! TARGET_64BIT && flag_pic
12201 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12202 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
12203 use_reg (&use, pic_offset_table_rtx);
12205 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
12207 rtx al = gen_rtx_REG (QImode, 0);
12208 emit_move_insn (al, callarg2);
12209 use_reg (&use, al);
12211 #endif /* TARGET_MACHO */
12213 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
12215 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12216 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12218 if (sibcall && TARGET_64BIT
12219 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
12222 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12223 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12224 emit_move_insn (fnaddr, addr);
12225 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12228 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12230 call = gen_rtx_SET (VOIDmode, retval, call);
12233 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12234 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12235 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12238 call = emit_call_insn (call);
12240 CALL_INSN_FUNCTION_USAGE (call) = use;
12244 /* Clear stack slot assignments remembered from previous functions.
12245 This is called from INIT_EXPANDERS once before RTL is emitted for each
12248 static struct machine_function *
12249 ix86_init_machine_status (void)
12251 struct machine_function *f;
12253 f = ggc_alloc_cleared (sizeof (struct machine_function));
12254 f->use_fast_prologue_epilogue_nregs = -1;
12259 /* Return a MEM corresponding to a stack slot with mode MODE.
12260 Allocate a new slot if necessary.
12262 The RTL for a function can have several slots available: N is
12263 which slot to use. */
12266 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
12268 struct stack_local_entry *s;
12270 gcc_assert (n < MAX_386_STACK_LOCALS);
12272 for (s = ix86_stack_locals; s; s = s->next)
12273 if (s->mode == mode && s->n == n)
12276 s = (struct stack_local_entry *)
12277 ggc_alloc (sizeof (struct stack_local_entry));
12280 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12282 s->next = ix86_stack_locals;
12283 ix86_stack_locals = s;
12287 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12289 static GTY(()) rtx ix86_tls_symbol;
12291 ix86_tls_get_addr (void)
12294 if (!ix86_tls_symbol)
12296 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12297 (TARGET_GNU_TLS && !TARGET_64BIT)
12298 ? "___tls_get_addr"
12299 : "__tls_get_addr");
12302 return ix86_tls_symbol;
12305 /* Calculate the length of the memory address in the instruction
12306 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12309 memory_address_length (rtx addr)
12311 struct ix86_address parts;
12312 rtx base, index, disp;
12316 if (GET_CODE (addr) == PRE_DEC
12317 || GET_CODE (addr) == POST_INC
12318 || GET_CODE (addr) == PRE_MODIFY
12319 || GET_CODE (addr) == POST_MODIFY)
12322 ok = ix86_decompose_address (addr, &parts);
12325 if (parts.base && GET_CODE (parts.base) == SUBREG)
12326 parts.base = SUBREG_REG (parts.base);
12327 if (parts.index && GET_CODE (parts.index) == SUBREG)
12328 parts.index = SUBREG_REG (parts.index);
12331 index = parts.index;
12336 - esp as the base always wants an index,
12337 - ebp as the base always wants a displacement. */
12339 /* Register Indirect. */
12340 if (base && !index && !disp)
12342 /* esp (for its index) and ebp (for its displacement) need
12343 the two-byte modrm form. */
12344 if (addr == stack_pointer_rtx
12345 || addr == arg_pointer_rtx
12346 || addr == frame_pointer_rtx
12347 || addr == hard_frame_pointer_rtx)
12351 /* Direct Addressing. */
12352 else if (disp && !base && !index)
12357 /* Find the length of the displacement constant. */
12360 if (GET_CODE (disp) == CONST_INT
12361 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12367 /* ebp always wants a displacement. */
12368 else if (base == hard_frame_pointer_rtx)
12371 /* An index requires the two-byte modrm form.... */
12373 /* ...like esp, which always wants an index. */
12374 || base == stack_pointer_rtx
12375 || base == arg_pointer_rtx
12376 || base == frame_pointer_rtx)
12383 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12384 is set, expect that insn have 8bit immediate alternative. */
12386 ix86_attr_length_immediate_default (rtx insn, int shortform)
12390 extract_insn_cached (insn);
12391 for (i = recog_data.n_operands - 1; i >= 0; --i)
12392 if (CONSTANT_P (recog_data.operand[i]))
12396 && GET_CODE (recog_data.operand[i]) == CONST_INT
12397 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12401 switch (get_attr_mode (insn))
12412 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12417 fatal_insn ("unknown insn mode", insn);
12423 /* Compute default value for "length_address" attribute. */
12425 ix86_attr_length_address_default (rtx insn)
12429 if (get_attr_type (insn) == TYPE_LEA)
12431 rtx set = PATTERN (insn);
12433 if (GET_CODE (set) == PARALLEL)
12434 set = XVECEXP (set, 0, 0);
12436 gcc_assert (GET_CODE (set) == SET);
12438 return memory_address_length (SET_SRC (set));
12441 extract_insn_cached (insn);
12442 for (i = recog_data.n_operands - 1; i >= 0; --i)
12443 if (GET_CODE (recog_data.operand[i]) == MEM)
12445 return memory_address_length (XEXP (recog_data.operand[i], 0));
12451 /* Return the maximum number of instructions a cpu can issue. */
12454 ix86_issue_rate (void)
12458 case PROCESSOR_PENTIUM:
12462 case PROCESSOR_PENTIUMPRO:
12463 case PROCESSOR_PENTIUM4:
12464 case PROCESSOR_ATHLON:
12466 case PROCESSOR_NOCONA:
12474 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12475 by DEP_INSN and nothing set by DEP_INSN. */
12478 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12482 /* Simplify the test for uninteresting insns. */
12483 if (insn_type != TYPE_SETCC
12484 && insn_type != TYPE_ICMOV
12485 && insn_type != TYPE_FCMOV
12486 && insn_type != TYPE_IBR)
12489 if ((set = single_set (dep_insn)) != 0)
12491 set = SET_DEST (set);
12494 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12495 && XVECLEN (PATTERN (dep_insn), 0) == 2
12496 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12497 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12499 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12500 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12505 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12508 /* This test is true if the dependent insn reads the flags but
12509 not any other potentially set register. */
12510 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12513 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12519 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12520 address with operands set by DEP_INSN. */
12523 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12527 if (insn_type == TYPE_LEA
12530 addr = PATTERN (insn);
12532 if (GET_CODE (addr) == PARALLEL)
12533 addr = XVECEXP (addr, 0, 0);
12535 gcc_assert (GET_CODE (addr) == SET);
12537 addr = SET_SRC (addr);
12542 extract_insn_cached (insn);
12543 for (i = recog_data.n_operands - 1; i >= 0; --i)
12544 if (GET_CODE (recog_data.operand[i]) == MEM)
12546 addr = XEXP (recog_data.operand[i], 0);
12553 return modified_in_p (addr, dep_insn);
12557 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12559 enum attr_type insn_type, dep_insn_type;
12560 enum attr_memory memory;
12562 int dep_insn_code_number;
12564 /* Anti and output dependencies have zero cost on all CPUs. */
12565 if (REG_NOTE_KIND (link) != 0)
12568 dep_insn_code_number = recog_memoized (dep_insn);
12570 /* If we can't recognize the insns, we can't really do anything. */
12571 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12574 insn_type = get_attr_type (insn);
12575 dep_insn_type = get_attr_type (dep_insn);
12579 case PROCESSOR_PENTIUM:
12580 /* Address Generation Interlock adds a cycle of latency. */
12581 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12584 /* ??? Compares pair with jump/setcc. */
12585 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12588 /* Floating point stores require value to be ready one cycle earlier. */
12589 if (insn_type == TYPE_FMOV
12590 && get_attr_memory (insn) == MEMORY_STORE
12591 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12595 case PROCESSOR_PENTIUMPRO:
12596 memory = get_attr_memory (insn);
12598 /* INT->FP conversion is expensive. */
12599 if (get_attr_fp_int_src (dep_insn))
12602 /* There is one cycle extra latency between an FP op and a store. */
12603 if (insn_type == TYPE_FMOV
12604 && (set = single_set (dep_insn)) != NULL_RTX
12605 && (set2 = single_set (insn)) != NULL_RTX
12606 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12607 && GET_CODE (SET_DEST (set2)) == MEM)
12610 /* Show ability of reorder buffer to hide latency of load by executing
12611 in parallel with previous instruction in case
12612 previous instruction is not needed to compute the address. */
12613 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12614 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12616 /* Claim moves to take one cycle, as core can issue one load
12617 at time and the next load can start cycle later. */
12618 if (dep_insn_type == TYPE_IMOV
12619 || dep_insn_type == TYPE_FMOV)
12627 memory = get_attr_memory (insn);
12629 /* The esp dependency is resolved before the instruction is really
12631 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12632 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12635 /* INT->FP conversion is expensive. */
12636 if (get_attr_fp_int_src (dep_insn))
12639 /* Show ability of reorder buffer to hide latency of load by executing
12640 in parallel with previous instruction in case
12641 previous instruction is not needed to compute the address. */
12642 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12643 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12645 /* Claim moves to take one cycle, as core can issue one load
12646 at time and the next load can start cycle later. */
12647 if (dep_insn_type == TYPE_IMOV
12648 || dep_insn_type == TYPE_FMOV)
12657 case PROCESSOR_ATHLON:
12659 memory = get_attr_memory (insn);
12661 /* Show ability of reorder buffer to hide latency of load by executing
12662 in parallel with previous instruction in case
12663 previous instruction is not needed to compute the address. */
12664 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12665 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12667 enum attr_unit unit = get_attr_unit (insn);
12670 /* Because of the difference between the length of integer and
12671 floating unit pipeline preparation stages, the memory operands
12672 for floating point are cheaper.
12674 ??? For Athlon it the difference is most probably 2. */
12675 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12678 loadcost = TARGET_ATHLON ? 2 : 0;
12680 if (cost >= loadcost)
12693 /* How many alternative schedules to try. This should be as wide as the
12694 scheduling freedom in the DFA, but no wider. Making this value too
12695 large results extra work for the scheduler. */
12698 ia32_multipass_dfa_lookahead (void)
12700 if (ix86_tune == PROCESSOR_PENTIUM)
12703 if (ix86_tune == PROCESSOR_PENTIUMPRO
12704 || ix86_tune == PROCESSOR_K6)
12712 /* Compute the alignment given to a constant that is being placed in memory.
12713 EXP is the constant and ALIGN is the alignment that the object would
12715 The value of this function is used instead of that alignment to align
12719 ix86_constant_alignment (tree exp, int align)
12721 if (TREE_CODE (exp) == REAL_CST)
12723 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12725 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12728 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12729 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12730 return BITS_PER_WORD;
12735 /* Compute the alignment for a static variable.
12736 TYPE is the data type, and ALIGN is the alignment that
12737 the object would ordinarily have. The value of this function is used
12738 instead of that alignment to align the object. */
12741 ix86_data_alignment (tree type, int align)
12743 if (AGGREGATE_TYPE_P (type)
12744 && TYPE_SIZE (type)
12745 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12746 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12747 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12750 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12751 to 16byte boundary. */
12754 if (AGGREGATE_TYPE_P (type)
12755 && TYPE_SIZE (type)
12756 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12757 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12758 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12762 if (TREE_CODE (type) == ARRAY_TYPE)
12764 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12766 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12769 else if (TREE_CODE (type) == COMPLEX_TYPE)
12772 if (TYPE_MODE (type) == DCmode && align < 64)
12774 if (TYPE_MODE (type) == XCmode && align < 128)
12777 else if ((TREE_CODE (type) == RECORD_TYPE
12778 || TREE_CODE (type) == UNION_TYPE
12779 || TREE_CODE (type) == QUAL_UNION_TYPE)
12780 && TYPE_FIELDS (type))
12782 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12784 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12787 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12788 || TREE_CODE (type) == INTEGER_TYPE)
12790 if (TYPE_MODE (type) == DFmode && align < 64)
12792 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12799 /* Compute the alignment for a local variable.
12800 TYPE is the data type, and ALIGN is the alignment that
12801 the object would ordinarily have. The value of this macro is used
12802 instead of that alignment to align the object. */
12805 ix86_local_alignment (tree type, int align)
12807 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12808 to 16byte boundary. */
12811 if (AGGREGATE_TYPE_P (type)
12812 && TYPE_SIZE (type)
12813 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12814 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12815 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12818 if (TREE_CODE (type) == ARRAY_TYPE)
12820 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12822 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12825 else if (TREE_CODE (type) == COMPLEX_TYPE)
12827 if (TYPE_MODE (type) == DCmode && align < 64)
12829 if (TYPE_MODE (type) == XCmode && align < 128)
12832 else if ((TREE_CODE (type) == RECORD_TYPE
12833 || TREE_CODE (type) == UNION_TYPE
12834 || TREE_CODE (type) == QUAL_UNION_TYPE)
12835 && TYPE_FIELDS (type))
12837 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12839 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12842 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12843 || TREE_CODE (type) == INTEGER_TYPE)
12846 if (TYPE_MODE (type) == DFmode && align < 64)
12848 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12854 /* Emit RTL insns to initialize the variable parts of a trampoline.
12855 FNADDR is an RTX for the address of the function's pure code.
12856 CXT is an RTX for the static chain value for the function. */
12858 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12862 /* Compute offset from the end of the jmp to the target function. */
12863 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12864 plus_constant (tramp, 10),
12865 NULL_RTX, 1, OPTAB_DIRECT);
12866 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12867 gen_int_mode (0xb9, QImode));
12868 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12869 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12870 gen_int_mode (0xe9, QImode));
12871 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12876 /* Try to load address using shorter movl instead of movabs.
12877 We may want to support movq for kernel mode, but kernel does not use
12878 trampolines at the moment. */
12879 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12881 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12882 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12883 gen_int_mode (0xbb41, HImode));
12884 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12885 gen_lowpart (SImode, fnaddr));
12890 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12891 gen_int_mode (0xbb49, HImode));
12892 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12896 /* Load static chain using movabs to r10. */
12897 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12898 gen_int_mode (0xba49, HImode));
12899 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12902 /* Jump to the r11 */
12903 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12904 gen_int_mode (0xff49, HImode));
12905 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12906 gen_int_mode (0xe3, QImode));
12908 gcc_assert (offset <= TRAMPOLINE_SIZE);
12911 #ifdef ENABLE_EXECUTE_STACK
12912 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12913 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12917 /* Codes for all the SSE/MMX builtins. */
12920 IX86_BUILTIN_ADDPS,
12921 IX86_BUILTIN_ADDSS,
12922 IX86_BUILTIN_DIVPS,
12923 IX86_BUILTIN_DIVSS,
12924 IX86_BUILTIN_MULPS,
12925 IX86_BUILTIN_MULSS,
12926 IX86_BUILTIN_SUBPS,
12927 IX86_BUILTIN_SUBSS,
12929 IX86_BUILTIN_CMPEQPS,
12930 IX86_BUILTIN_CMPLTPS,
12931 IX86_BUILTIN_CMPLEPS,
12932 IX86_BUILTIN_CMPGTPS,
12933 IX86_BUILTIN_CMPGEPS,
12934 IX86_BUILTIN_CMPNEQPS,
12935 IX86_BUILTIN_CMPNLTPS,
12936 IX86_BUILTIN_CMPNLEPS,
12937 IX86_BUILTIN_CMPNGTPS,
12938 IX86_BUILTIN_CMPNGEPS,
12939 IX86_BUILTIN_CMPORDPS,
12940 IX86_BUILTIN_CMPUNORDPS,
12941 IX86_BUILTIN_CMPNEPS,
12942 IX86_BUILTIN_CMPEQSS,
12943 IX86_BUILTIN_CMPLTSS,
12944 IX86_BUILTIN_CMPLESS,
12945 IX86_BUILTIN_CMPNEQSS,
12946 IX86_BUILTIN_CMPNLTSS,
12947 IX86_BUILTIN_CMPNLESS,
12948 IX86_BUILTIN_CMPNGTSS,
12949 IX86_BUILTIN_CMPNGESS,
12950 IX86_BUILTIN_CMPORDSS,
12951 IX86_BUILTIN_CMPUNORDSS,
12952 IX86_BUILTIN_CMPNESS,
12954 IX86_BUILTIN_COMIEQSS,
12955 IX86_BUILTIN_COMILTSS,
12956 IX86_BUILTIN_COMILESS,
12957 IX86_BUILTIN_COMIGTSS,
12958 IX86_BUILTIN_COMIGESS,
12959 IX86_BUILTIN_COMINEQSS,
12960 IX86_BUILTIN_UCOMIEQSS,
12961 IX86_BUILTIN_UCOMILTSS,
12962 IX86_BUILTIN_UCOMILESS,
12963 IX86_BUILTIN_UCOMIGTSS,
12964 IX86_BUILTIN_UCOMIGESS,
12965 IX86_BUILTIN_UCOMINEQSS,
12967 IX86_BUILTIN_CVTPI2PS,
12968 IX86_BUILTIN_CVTPS2PI,
12969 IX86_BUILTIN_CVTSI2SS,
12970 IX86_BUILTIN_CVTSI642SS,
12971 IX86_BUILTIN_CVTSS2SI,
12972 IX86_BUILTIN_CVTSS2SI64,
12973 IX86_BUILTIN_CVTTPS2PI,
12974 IX86_BUILTIN_CVTTSS2SI,
12975 IX86_BUILTIN_CVTTSS2SI64,
12977 IX86_BUILTIN_MAXPS,
12978 IX86_BUILTIN_MAXSS,
12979 IX86_BUILTIN_MINPS,
12980 IX86_BUILTIN_MINSS,
12982 IX86_BUILTIN_LOADUPS,
12983 IX86_BUILTIN_STOREUPS,
12984 IX86_BUILTIN_MOVSS,
12986 IX86_BUILTIN_MOVHLPS,
12987 IX86_BUILTIN_MOVLHPS,
12988 IX86_BUILTIN_LOADHPS,
12989 IX86_BUILTIN_LOADLPS,
12990 IX86_BUILTIN_STOREHPS,
12991 IX86_BUILTIN_STORELPS,
12993 IX86_BUILTIN_MASKMOVQ,
12994 IX86_BUILTIN_MOVMSKPS,
12995 IX86_BUILTIN_PMOVMSKB,
12997 IX86_BUILTIN_MOVNTPS,
12998 IX86_BUILTIN_MOVNTQ,
13000 IX86_BUILTIN_LOADDQU,
13001 IX86_BUILTIN_STOREDQU,
13003 IX86_BUILTIN_PACKSSWB,
13004 IX86_BUILTIN_PACKSSDW,
13005 IX86_BUILTIN_PACKUSWB,
13007 IX86_BUILTIN_PADDB,
13008 IX86_BUILTIN_PADDW,
13009 IX86_BUILTIN_PADDD,
13010 IX86_BUILTIN_PADDQ,
13011 IX86_BUILTIN_PADDSB,
13012 IX86_BUILTIN_PADDSW,
13013 IX86_BUILTIN_PADDUSB,
13014 IX86_BUILTIN_PADDUSW,
13015 IX86_BUILTIN_PSUBB,
13016 IX86_BUILTIN_PSUBW,
13017 IX86_BUILTIN_PSUBD,
13018 IX86_BUILTIN_PSUBQ,
13019 IX86_BUILTIN_PSUBSB,
13020 IX86_BUILTIN_PSUBSW,
13021 IX86_BUILTIN_PSUBUSB,
13022 IX86_BUILTIN_PSUBUSW,
13025 IX86_BUILTIN_PANDN,
13029 IX86_BUILTIN_PAVGB,
13030 IX86_BUILTIN_PAVGW,
13032 IX86_BUILTIN_PCMPEQB,
13033 IX86_BUILTIN_PCMPEQW,
13034 IX86_BUILTIN_PCMPEQD,
13035 IX86_BUILTIN_PCMPGTB,
13036 IX86_BUILTIN_PCMPGTW,
13037 IX86_BUILTIN_PCMPGTD,
13039 IX86_BUILTIN_PMADDWD,
13041 IX86_BUILTIN_PMAXSW,
13042 IX86_BUILTIN_PMAXUB,
13043 IX86_BUILTIN_PMINSW,
13044 IX86_BUILTIN_PMINUB,
13046 IX86_BUILTIN_PMULHUW,
13047 IX86_BUILTIN_PMULHW,
13048 IX86_BUILTIN_PMULLW,
13050 IX86_BUILTIN_PSADBW,
13051 IX86_BUILTIN_PSHUFW,
13053 IX86_BUILTIN_PSLLW,
13054 IX86_BUILTIN_PSLLD,
13055 IX86_BUILTIN_PSLLQ,
13056 IX86_BUILTIN_PSRAW,
13057 IX86_BUILTIN_PSRAD,
13058 IX86_BUILTIN_PSRLW,
13059 IX86_BUILTIN_PSRLD,
13060 IX86_BUILTIN_PSRLQ,
13061 IX86_BUILTIN_PSLLWI,
13062 IX86_BUILTIN_PSLLDI,
13063 IX86_BUILTIN_PSLLQI,
13064 IX86_BUILTIN_PSRAWI,
13065 IX86_BUILTIN_PSRADI,
13066 IX86_BUILTIN_PSRLWI,
13067 IX86_BUILTIN_PSRLDI,
13068 IX86_BUILTIN_PSRLQI,
13070 IX86_BUILTIN_PUNPCKHBW,
13071 IX86_BUILTIN_PUNPCKHWD,
13072 IX86_BUILTIN_PUNPCKHDQ,
13073 IX86_BUILTIN_PUNPCKLBW,
13074 IX86_BUILTIN_PUNPCKLWD,
13075 IX86_BUILTIN_PUNPCKLDQ,
13077 IX86_BUILTIN_SHUFPS,
13079 IX86_BUILTIN_RCPPS,
13080 IX86_BUILTIN_RCPSS,
13081 IX86_BUILTIN_RSQRTPS,
13082 IX86_BUILTIN_RSQRTSS,
13083 IX86_BUILTIN_SQRTPS,
13084 IX86_BUILTIN_SQRTSS,
13086 IX86_BUILTIN_UNPCKHPS,
13087 IX86_BUILTIN_UNPCKLPS,
13089 IX86_BUILTIN_ANDPS,
13090 IX86_BUILTIN_ANDNPS,
13092 IX86_BUILTIN_XORPS,
13095 IX86_BUILTIN_LDMXCSR,
13096 IX86_BUILTIN_STMXCSR,
13097 IX86_BUILTIN_SFENCE,
13099 /* 3DNow! Original */
13100 IX86_BUILTIN_FEMMS,
13101 IX86_BUILTIN_PAVGUSB,
13102 IX86_BUILTIN_PF2ID,
13103 IX86_BUILTIN_PFACC,
13104 IX86_BUILTIN_PFADD,
13105 IX86_BUILTIN_PFCMPEQ,
13106 IX86_BUILTIN_PFCMPGE,
13107 IX86_BUILTIN_PFCMPGT,
13108 IX86_BUILTIN_PFMAX,
13109 IX86_BUILTIN_PFMIN,
13110 IX86_BUILTIN_PFMUL,
13111 IX86_BUILTIN_PFRCP,
13112 IX86_BUILTIN_PFRCPIT1,
13113 IX86_BUILTIN_PFRCPIT2,
13114 IX86_BUILTIN_PFRSQIT1,
13115 IX86_BUILTIN_PFRSQRT,
13116 IX86_BUILTIN_PFSUB,
13117 IX86_BUILTIN_PFSUBR,
13118 IX86_BUILTIN_PI2FD,
13119 IX86_BUILTIN_PMULHRW,
13121 /* 3DNow! Athlon Extensions */
13122 IX86_BUILTIN_PF2IW,
13123 IX86_BUILTIN_PFNACC,
13124 IX86_BUILTIN_PFPNACC,
13125 IX86_BUILTIN_PI2FW,
13126 IX86_BUILTIN_PSWAPDSI,
13127 IX86_BUILTIN_PSWAPDSF,
13130 IX86_BUILTIN_ADDPD,
13131 IX86_BUILTIN_ADDSD,
13132 IX86_BUILTIN_DIVPD,
13133 IX86_BUILTIN_DIVSD,
13134 IX86_BUILTIN_MULPD,
13135 IX86_BUILTIN_MULSD,
13136 IX86_BUILTIN_SUBPD,
13137 IX86_BUILTIN_SUBSD,
13139 IX86_BUILTIN_CMPEQPD,
13140 IX86_BUILTIN_CMPLTPD,
13141 IX86_BUILTIN_CMPLEPD,
13142 IX86_BUILTIN_CMPGTPD,
13143 IX86_BUILTIN_CMPGEPD,
13144 IX86_BUILTIN_CMPNEQPD,
13145 IX86_BUILTIN_CMPNLTPD,
13146 IX86_BUILTIN_CMPNLEPD,
13147 IX86_BUILTIN_CMPNGTPD,
13148 IX86_BUILTIN_CMPNGEPD,
13149 IX86_BUILTIN_CMPORDPD,
13150 IX86_BUILTIN_CMPUNORDPD,
13151 IX86_BUILTIN_CMPNEPD,
13152 IX86_BUILTIN_CMPEQSD,
13153 IX86_BUILTIN_CMPLTSD,
13154 IX86_BUILTIN_CMPLESD,
13155 IX86_BUILTIN_CMPNEQSD,
13156 IX86_BUILTIN_CMPNLTSD,
13157 IX86_BUILTIN_CMPNLESD,
13158 IX86_BUILTIN_CMPORDSD,
13159 IX86_BUILTIN_CMPUNORDSD,
13160 IX86_BUILTIN_CMPNESD,
13162 IX86_BUILTIN_COMIEQSD,
13163 IX86_BUILTIN_COMILTSD,
13164 IX86_BUILTIN_COMILESD,
13165 IX86_BUILTIN_COMIGTSD,
13166 IX86_BUILTIN_COMIGESD,
13167 IX86_BUILTIN_COMINEQSD,
13168 IX86_BUILTIN_UCOMIEQSD,
13169 IX86_BUILTIN_UCOMILTSD,
13170 IX86_BUILTIN_UCOMILESD,
13171 IX86_BUILTIN_UCOMIGTSD,
13172 IX86_BUILTIN_UCOMIGESD,
13173 IX86_BUILTIN_UCOMINEQSD,
13175 IX86_BUILTIN_MAXPD,
13176 IX86_BUILTIN_MAXSD,
13177 IX86_BUILTIN_MINPD,
13178 IX86_BUILTIN_MINSD,
13180 IX86_BUILTIN_ANDPD,
13181 IX86_BUILTIN_ANDNPD,
13183 IX86_BUILTIN_XORPD,
13185 IX86_BUILTIN_SQRTPD,
13186 IX86_BUILTIN_SQRTSD,
13188 IX86_BUILTIN_UNPCKHPD,
13189 IX86_BUILTIN_UNPCKLPD,
13191 IX86_BUILTIN_SHUFPD,
13193 IX86_BUILTIN_LOADUPD,
13194 IX86_BUILTIN_STOREUPD,
13195 IX86_BUILTIN_MOVSD,
13197 IX86_BUILTIN_LOADHPD,
13198 IX86_BUILTIN_LOADLPD,
13200 IX86_BUILTIN_CVTDQ2PD,
13201 IX86_BUILTIN_CVTDQ2PS,
13203 IX86_BUILTIN_CVTPD2DQ,
13204 IX86_BUILTIN_CVTPD2PI,
13205 IX86_BUILTIN_CVTPD2PS,
13206 IX86_BUILTIN_CVTTPD2DQ,
13207 IX86_BUILTIN_CVTTPD2PI,
13209 IX86_BUILTIN_CVTPI2PD,
13210 IX86_BUILTIN_CVTSI2SD,
13211 IX86_BUILTIN_CVTSI642SD,
13213 IX86_BUILTIN_CVTSD2SI,
13214 IX86_BUILTIN_CVTSD2SI64,
13215 IX86_BUILTIN_CVTSD2SS,
13216 IX86_BUILTIN_CVTSS2SD,
13217 IX86_BUILTIN_CVTTSD2SI,
13218 IX86_BUILTIN_CVTTSD2SI64,
13220 IX86_BUILTIN_CVTPS2DQ,
13221 IX86_BUILTIN_CVTPS2PD,
13222 IX86_BUILTIN_CVTTPS2DQ,
13224 IX86_BUILTIN_MOVNTI,
13225 IX86_BUILTIN_MOVNTPD,
13226 IX86_BUILTIN_MOVNTDQ,
13229 IX86_BUILTIN_MASKMOVDQU,
13230 IX86_BUILTIN_MOVMSKPD,
13231 IX86_BUILTIN_PMOVMSKB128,
13233 IX86_BUILTIN_PACKSSWB128,
13234 IX86_BUILTIN_PACKSSDW128,
13235 IX86_BUILTIN_PACKUSWB128,
13237 IX86_BUILTIN_PADDB128,
13238 IX86_BUILTIN_PADDW128,
13239 IX86_BUILTIN_PADDD128,
13240 IX86_BUILTIN_PADDQ128,
13241 IX86_BUILTIN_PADDSB128,
13242 IX86_BUILTIN_PADDSW128,
13243 IX86_BUILTIN_PADDUSB128,
13244 IX86_BUILTIN_PADDUSW128,
13245 IX86_BUILTIN_PSUBB128,
13246 IX86_BUILTIN_PSUBW128,
13247 IX86_BUILTIN_PSUBD128,
13248 IX86_BUILTIN_PSUBQ128,
13249 IX86_BUILTIN_PSUBSB128,
13250 IX86_BUILTIN_PSUBSW128,
13251 IX86_BUILTIN_PSUBUSB128,
13252 IX86_BUILTIN_PSUBUSW128,
13254 IX86_BUILTIN_PAND128,
13255 IX86_BUILTIN_PANDN128,
13256 IX86_BUILTIN_POR128,
13257 IX86_BUILTIN_PXOR128,
13259 IX86_BUILTIN_PAVGB128,
13260 IX86_BUILTIN_PAVGW128,
13262 IX86_BUILTIN_PCMPEQB128,
13263 IX86_BUILTIN_PCMPEQW128,
13264 IX86_BUILTIN_PCMPEQD128,
13265 IX86_BUILTIN_PCMPGTB128,
13266 IX86_BUILTIN_PCMPGTW128,
13267 IX86_BUILTIN_PCMPGTD128,
13269 IX86_BUILTIN_PMADDWD128,
13271 IX86_BUILTIN_PMAXSW128,
13272 IX86_BUILTIN_PMAXUB128,
13273 IX86_BUILTIN_PMINSW128,
13274 IX86_BUILTIN_PMINUB128,
13276 IX86_BUILTIN_PMULUDQ,
13277 IX86_BUILTIN_PMULUDQ128,
13278 IX86_BUILTIN_PMULHUW128,
13279 IX86_BUILTIN_PMULHW128,
13280 IX86_BUILTIN_PMULLW128,
13282 IX86_BUILTIN_PSADBW128,
13283 IX86_BUILTIN_PSHUFHW,
13284 IX86_BUILTIN_PSHUFLW,
13285 IX86_BUILTIN_PSHUFD,
13287 IX86_BUILTIN_PSLLW128,
13288 IX86_BUILTIN_PSLLD128,
13289 IX86_BUILTIN_PSLLQ128,
13290 IX86_BUILTIN_PSRAW128,
13291 IX86_BUILTIN_PSRAD128,
13292 IX86_BUILTIN_PSRLW128,
13293 IX86_BUILTIN_PSRLD128,
13294 IX86_BUILTIN_PSRLQ128,
13295 IX86_BUILTIN_PSLLDQI128,
13296 IX86_BUILTIN_PSLLWI128,
13297 IX86_BUILTIN_PSLLDI128,
13298 IX86_BUILTIN_PSLLQI128,
13299 IX86_BUILTIN_PSRAWI128,
13300 IX86_BUILTIN_PSRADI128,
13301 IX86_BUILTIN_PSRLDQI128,
13302 IX86_BUILTIN_PSRLWI128,
13303 IX86_BUILTIN_PSRLDI128,
13304 IX86_BUILTIN_PSRLQI128,
13306 IX86_BUILTIN_PUNPCKHBW128,
13307 IX86_BUILTIN_PUNPCKHWD128,
13308 IX86_BUILTIN_PUNPCKHDQ128,
13309 IX86_BUILTIN_PUNPCKHQDQ128,
13310 IX86_BUILTIN_PUNPCKLBW128,
13311 IX86_BUILTIN_PUNPCKLWD128,
13312 IX86_BUILTIN_PUNPCKLDQ128,
13313 IX86_BUILTIN_PUNPCKLQDQ128,
13315 IX86_BUILTIN_CLFLUSH,
13316 IX86_BUILTIN_MFENCE,
13317 IX86_BUILTIN_LFENCE,
13319 /* Prescott New Instructions. */
13320 IX86_BUILTIN_ADDSUBPS,
13321 IX86_BUILTIN_HADDPS,
13322 IX86_BUILTIN_HSUBPS,
13323 IX86_BUILTIN_MOVSHDUP,
13324 IX86_BUILTIN_MOVSLDUP,
13325 IX86_BUILTIN_ADDSUBPD,
13326 IX86_BUILTIN_HADDPD,
13327 IX86_BUILTIN_HSUBPD,
13328 IX86_BUILTIN_LDDQU,
13330 IX86_BUILTIN_MONITOR,
13331 IX86_BUILTIN_MWAIT,
13333 IX86_BUILTIN_VEC_INIT_V2SI,
13334 IX86_BUILTIN_VEC_INIT_V4HI,
13335 IX86_BUILTIN_VEC_INIT_V8QI,
13336 IX86_BUILTIN_VEC_EXT_V2DF,
13337 IX86_BUILTIN_VEC_EXT_V2DI,
13338 IX86_BUILTIN_VEC_EXT_V4SF,
13339 IX86_BUILTIN_VEC_EXT_V4SI,
13340 IX86_BUILTIN_VEC_EXT_V8HI,
13341 IX86_BUILTIN_VEC_EXT_V2SI,
13342 IX86_BUILTIN_VEC_EXT_V4HI,
13343 IX86_BUILTIN_VEC_SET_V8HI,
13344 IX86_BUILTIN_VEC_SET_V4HI,
13349 #define def_builtin(MASK, NAME, TYPE, CODE) \
13351 if ((MASK) & target_flags \
13352 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
13353 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
13354 NULL, NULL_TREE); \
13357 /* Bits for builtin_description.flag. */
13359 /* Set when we don't support the comparison natively, and should
13360 swap_comparison in order to support it. */
13361 #define BUILTIN_DESC_SWAP_OPERANDS 1
13363 struct builtin_description
13365 const unsigned int mask;
13366 const enum insn_code icode;
13367 const char *const name;
13368 const enum ix86_builtins code;
13369 const enum rtx_code comparison;
13370 const unsigned int flag;
13373 static const struct builtin_description bdesc_comi[] =
13375 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13376 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13377 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13378 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13379 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13380 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13381 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13382 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13383 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13384 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13385 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13386 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13387 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13388 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13389 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13390 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13391 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13392 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13393 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13394 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13395 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13396 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13397 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13398 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
13401 static const struct builtin_description bdesc_2arg[] =
13404 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
13405 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
13406 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
13407 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
13408 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
13409 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
13410 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
13411 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
13413 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
13414 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
13415 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
13416 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
13417 BUILTIN_DESC_SWAP_OPERANDS },
13418 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
13419 BUILTIN_DESC_SWAP_OPERANDS },
13420 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
13421 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
13422 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
13423 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
13424 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
13425 BUILTIN_DESC_SWAP_OPERANDS },
13426 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
13427 BUILTIN_DESC_SWAP_OPERANDS },
13428 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
13429 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
13430 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
13431 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
13432 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
13433 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
13434 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
13435 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
13436 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
13437 BUILTIN_DESC_SWAP_OPERANDS },
13438 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
13439 BUILTIN_DESC_SWAP_OPERANDS },
13440 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
13442 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
13443 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
13444 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
13445 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
13447 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
13448 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
13449 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
13450 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
13452 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
13453 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
13454 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
13455 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
13456 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13459 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13460 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13461 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13462 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13463 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13464 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13465 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13466 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13468 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13469 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13470 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13471 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13472 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13473 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13474 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13475 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13477 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13478 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13479 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13481 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13482 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13483 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13484 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13486 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13487 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13489 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13490 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13491 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13492 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13493 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13494 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13496 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13497 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13498 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13499 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13501 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13502 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13503 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13504 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13505 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13506 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13509 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13510 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13511 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13513 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13514 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13515 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13517 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13518 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13519 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13520 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13521 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13522 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13524 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13525 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13526 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13527 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13528 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13529 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13531 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13532 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13533 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13534 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13536 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13537 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13540 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13541 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13542 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13543 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13544 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13545 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13546 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13547 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13549 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13550 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13551 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13552 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13553 BUILTIN_DESC_SWAP_OPERANDS },
13554 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13555 BUILTIN_DESC_SWAP_OPERANDS },
13556 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13557 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13558 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13559 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13560 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13561 BUILTIN_DESC_SWAP_OPERANDS },
13562 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13563 BUILTIN_DESC_SWAP_OPERANDS },
13564 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13565 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13566 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13567 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13568 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13569 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13570 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13571 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13572 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13574 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13575 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13576 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13577 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13579 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13580 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13581 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13582 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13584 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13585 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13586 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13589 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13590 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13591 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13592 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13593 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13594 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13595 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13596 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13598 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13599 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13600 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13601 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13602 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13603 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13604 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13605 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13607 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13608 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13610 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13611 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13612 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13613 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13615 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13616 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13618 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13619 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13620 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13621 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13622 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13623 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13625 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13626 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13627 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13628 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13630 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13631 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13632 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13633 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13634 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13635 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13636 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13637 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13639 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13640 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13641 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13643 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13644 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13646 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13647 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13649 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13650 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13651 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13653 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13654 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13655 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13657 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13658 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13660 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13662 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13663 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13664 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13665 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13668 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13669 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13670 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13671 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13672 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13673 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13676 static const struct builtin_description bdesc_1arg[] =
13678 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13679 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13681 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13682 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13683 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13685 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13686 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13687 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13688 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13689 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13690 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13692 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13693 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13695 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13697 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13698 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13700 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13701 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13702 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13703 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13704 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13706 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13708 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13709 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13710 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13711 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13713 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13714 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13715 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13718 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13719 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13723 ix86_init_builtins (void)
13726 ix86_init_mmx_sse_builtins ();
13729 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13730 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13733 ix86_init_mmx_sse_builtins (void)
13735 const struct builtin_description * d;
13738 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13739 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13740 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13741 tree V2DI_type_node
13742 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13743 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13744 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13745 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13746 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13747 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13748 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13750 tree pchar_type_node = build_pointer_type (char_type_node);
13751 tree pcchar_type_node = build_pointer_type (
13752 build_type_variant (char_type_node, 1, 0));
13753 tree pfloat_type_node = build_pointer_type (float_type_node);
13754 tree pcfloat_type_node = build_pointer_type (
13755 build_type_variant (float_type_node, 1, 0));
13756 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13757 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13758 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13761 tree int_ftype_v4sf_v4sf
13762 = build_function_type_list (integer_type_node,
13763 V4SF_type_node, V4SF_type_node, NULL_TREE);
13764 tree v4si_ftype_v4sf_v4sf
13765 = build_function_type_list (V4SI_type_node,
13766 V4SF_type_node, V4SF_type_node, NULL_TREE);
13767 /* MMX/SSE/integer conversions. */
13768 tree int_ftype_v4sf
13769 = build_function_type_list (integer_type_node,
13770 V4SF_type_node, NULL_TREE);
13771 tree int64_ftype_v4sf
13772 = build_function_type_list (long_long_integer_type_node,
13773 V4SF_type_node, NULL_TREE);
13774 tree int_ftype_v8qi
13775 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13776 tree v4sf_ftype_v4sf_int
13777 = build_function_type_list (V4SF_type_node,
13778 V4SF_type_node, integer_type_node, NULL_TREE);
13779 tree v4sf_ftype_v4sf_int64
13780 = build_function_type_list (V4SF_type_node,
13781 V4SF_type_node, long_long_integer_type_node,
13783 tree v4sf_ftype_v4sf_v2si
13784 = build_function_type_list (V4SF_type_node,
13785 V4SF_type_node, V2SI_type_node, NULL_TREE);
13787 /* Miscellaneous. */
13788 tree v8qi_ftype_v4hi_v4hi
13789 = build_function_type_list (V8QI_type_node,
13790 V4HI_type_node, V4HI_type_node, NULL_TREE);
13791 tree v4hi_ftype_v2si_v2si
13792 = build_function_type_list (V4HI_type_node,
13793 V2SI_type_node, V2SI_type_node, NULL_TREE);
13794 tree v4sf_ftype_v4sf_v4sf_int
13795 = build_function_type_list (V4SF_type_node,
13796 V4SF_type_node, V4SF_type_node,
13797 integer_type_node, NULL_TREE);
13798 tree v2si_ftype_v4hi_v4hi
13799 = build_function_type_list (V2SI_type_node,
13800 V4HI_type_node, V4HI_type_node, NULL_TREE);
13801 tree v4hi_ftype_v4hi_int
13802 = build_function_type_list (V4HI_type_node,
13803 V4HI_type_node, integer_type_node, NULL_TREE);
13804 tree v4hi_ftype_v4hi_di
13805 = build_function_type_list (V4HI_type_node,
13806 V4HI_type_node, long_long_unsigned_type_node,
13808 tree v2si_ftype_v2si_di
13809 = build_function_type_list (V2SI_type_node,
13810 V2SI_type_node, long_long_unsigned_type_node,
13812 tree void_ftype_void
13813 = build_function_type (void_type_node, void_list_node);
13814 tree void_ftype_unsigned
13815 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13816 tree void_ftype_unsigned_unsigned
13817 = build_function_type_list (void_type_node, unsigned_type_node,
13818 unsigned_type_node, NULL_TREE);
13819 tree void_ftype_pcvoid_unsigned_unsigned
13820 = build_function_type_list (void_type_node, const_ptr_type_node,
13821 unsigned_type_node, unsigned_type_node,
13823 tree unsigned_ftype_void
13824 = build_function_type (unsigned_type_node, void_list_node);
13825 tree v2si_ftype_v4sf
13826 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13827 /* Loads/stores. */
13828 tree void_ftype_v8qi_v8qi_pchar
13829 = build_function_type_list (void_type_node,
13830 V8QI_type_node, V8QI_type_node,
13831 pchar_type_node, NULL_TREE);
13832 tree v4sf_ftype_pcfloat
13833 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13834 /* @@@ the type is bogus */
13835 tree v4sf_ftype_v4sf_pv2si
13836 = build_function_type_list (V4SF_type_node,
13837 V4SF_type_node, pv2si_type_node, NULL_TREE);
13838 tree void_ftype_pv2si_v4sf
13839 = build_function_type_list (void_type_node,
13840 pv2si_type_node, V4SF_type_node, NULL_TREE);
13841 tree void_ftype_pfloat_v4sf
13842 = build_function_type_list (void_type_node,
13843 pfloat_type_node, V4SF_type_node, NULL_TREE);
13844 tree void_ftype_pdi_di
13845 = build_function_type_list (void_type_node,
13846 pdi_type_node, long_long_unsigned_type_node,
13848 tree void_ftype_pv2di_v2di
13849 = build_function_type_list (void_type_node,
13850 pv2di_type_node, V2DI_type_node, NULL_TREE);
13851 /* Normal vector unops. */
13852 tree v4sf_ftype_v4sf
13853 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13855 /* Normal vector binops. */
13856 tree v4sf_ftype_v4sf_v4sf
13857 = build_function_type_list (V4SF_type_node,
13858 V4SF_type_node, V4SF_type_node, NULL_TREE);
13859 tree v8qi_ftype_v8qi_v8qi
13860 = build_function_type_list (V8QI_type_node,
13861 V8QI_type_node, V8QI_type_node, NULL_TREE);
13862 tree v4hi_ftype_v4hi_v4hi
13863 = build_function_type_list (V4HI_type_node,
13864 V4HI_type_node, V4HI_type_node, NULL_TREE);
13865 tree v2si_ftype_v2si_v2si
13866 = build_function_type_list (V2SI_type_node,
13867 V2SI_type_node, V2SI_type_node, NULL_TREE);
13868 tree di_ftype_di_di
13869 = build_function_type_list (long_long_unsigned_type_node,
13870 long_long_unsigned_type_node,
13871 long_long_unsigned_type_node, NULL_TREE);
13873 tree v2si_ftype_v2sf
13874 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13875 tree v2sf_ftype_v2si
13876 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13877 tree v2si_ftype_v2si
13878 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13879 tree v2sf_ftype_v2sf
13880 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13881 tree v2sf_ftype_v2sf_v2sf
13882 = build_function_type_list (V2SF_type_node,
13883 V2SF_type_node, V2SF_type_node, NULL_TREE);
13884 tree v2si_ftype_v2sf_v2sf
13885 = build_function_type_list (V2SI_type_node,
13886 V2SF_type_node, V2SF_type_node, NULL_TREE);
13887 tree pint_type_node = build_pointer_type (integer_type_node);
13888 tree pdouble_type_node = build_pointer_type (double_type_node);
13889 tree pcdouble_type_node = build_pointer_type (
13890 build_type_variant (double_type_node, 1, 0));
13891 tree int_ftype_v2df_v2df
13892 = build_function_type_list (integer_type_node,
13893 V2DF_type_node, V2DF_type_node, NULL_TREE);
13895 tree ti_ftype_ti_ti
13896 = build_function_type_list (intTI_type_node,
13897 intTI_type_node, intTI_type_node, NULL_TREE);
13898 tree void_ftype_pcvoid
13899 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13900 tree v4sf_ftype_v4si
13901 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13902 tree v4si_ftype_v4sf
13903 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13904 tree v2df_ftype_v4si
13905 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13906 tree v4si_ftype_v2df
13907 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13908 tree v2si_ftype_v2df
13909 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13910 tree v4sf_ftype_v2df
13911 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13912 tree v2df_ftype_v2si
13913 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13914 tree v2df_ftype_v4sf
13915 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13916 tree int_ftype_v2df
13917 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13918 tree int64_ftype_v2df
13919 = build_function_type_list (long_long_integer_type_node,
13920 V2DF_type_node, NULL_TREE);
13921 tree v2df_ftype_v2df_int
13922 = build_function_type_list (V2DF_type_node,
13923 V2DF_type_node, integer_type_node, NULL_TREE);
13924 tree v2df_ftype_v2df_int64
13925 = build_function_type_list (V2DF_type_node,
13926 V2DF_type_node, long_long_integer_type_node,
13928 tree v4sf_ftype_v4sf_v2df
13929 = build_function_type_list (V4SF_type_node,
13930 V4SF_type_node, V2DF_type_node, NULL_TREE);
13931 tree v2df_ftype_v2df_v4sf
13932 = build_function_type_list (V2DF_type_node,
13933 V2DF_type_node, V4SF_type_node, NULL_TREE);
13934 tree v2df_ftype_v2df_v2df_int
13935 = build_function_type_list (V2DF_type_node,
13936 V2DF_type_node, V2DF_type_node,
13939 tree v2df_ftype_v2df_pcdouble
13940 = build_function_type_list (V2DF_type_node,
13941 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13942 tree void_ftype_pdouble_v2df
13943 = build_function_type_list (void_type_node,
13944 pdouble_type_node, V2DF_type_node, NULL_TREE);
13945 tree void_ftype_pint_int
13946 = build_function_type_list (void_type_node,
13947 pint_type_node, integer_type_node, NULL_TREE);
13948 tree void_ftype_v16qi_v16qi_pchar
13949 = build_function_type_list (void_type_node,
13950 V16QI_type_node, V16QI_type_node,
13951 pchar_type_node, NULL_TREE);
13952 tree v2df_ftype_pcdouble
13953 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13954 tree v2df_ftype_v2df_v2df
13955 = build_function_type_list (V2DF_type_node,
13956 V2DF_type_node, V2DF_type_node, NULL_TREE);
13957 tree v16qi_ftype_v16qi_v16qi
13958 = build_function_type_list (V16QI_type_node,
13959 V16QI_type_node, V16QI_type_node, NULL_TREE);
13960 tree v8hi_ftype_v8hi_v8hi
13961 = build_function_type_list (V8HI_type_node,
13962 V8HI_type_node, V8HI_type_node, NULL_TREE);
13963 tree v4si_ftype_v4si_v4si
13964 = build_function_type_list (V4SI_type_node,
13965 V4SI_type_node, V4SI_type_node, NULL_TREE);
13966 tree v2di_ftype_v2di_v2di
13967 = build_function_type_list (V2DI_type_node,
13968 V2DI_type_node, V2DI_type_node, NULL_TREE);
13969 tree v2di_ftype_v2df_v2df
13970 = build_function_type_list (V2DI_type_node,
13971 V2DF_type_node, V2DF_type_node, NULL_TREE);
13972 tree v2df_ftype_v2df
13973 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13974 tree v2di_ftype_v2di_int
13975 = build_function_type_list (V2DI_type_node,
13976 V2DI_type_node, integer_type_node, NULL_TREE);
13977 tree v4si_ftype_v4si_int
13978 = build_function_type_list (V4SI_type_node,
13979 V4SI_type_node, integer_type_node, NULL_TREE);
13980 tree v8hi_ftype_v8hi_int
13981 = build_function_type_list (V8HI_type_node,
13982 V8HI_type_node, integer_type_node, NULL_TREE);
13983 tree v8hi_ftype_v8hi_v2di
13984 = build_function_type_list (V8HI_type_node,
13985 V8HI_type_node, V2DI_type_node, NULL_TREE);
13986 tree v4si_ftype_v4si_v2di
13987 = build_function_type_list (V4SI_type_node,
13988 V4SI_type_node, V2DI_type_node, NULL_TREE);
13989 tree v4si_ftype_v8hi_v8hi
13990 = build_function_type_list (V4SI_type_node,
13991 V8HI_type_node, V8HI_type_node, NULL_TREE);
13992 tree di_ftype_v8qi_v8qi
13993 = build_function_type_list (long_long_unsigned_type_node,
13994 V8QI_type_node, V8QI_type_node, NULL_TREE);
13995 tree di_ftype_v2si_v2si
13996 = build_function_type_list (long_long_unsigned_type_node,
13997 V2SI_type_node, V2SI_type_node, NULL_TREE);
13998 tree v2di_ftype_v16qi_v16qi
13999 = build_function_type_list (V2DI_type_node,
14000 V16QI_type_node, V16QI_type_node, NULL_TREE);
14001 tree v2di_ftype_v4si_v4si
14002 = build_function_type_list (V2DI_type_node,
14003 V4SI_type_node, V4SI_type_node, NULL_TREE);
14004 tree int_ftype_v16qi
14005 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14006 tree v16qi_ftype_pcchar
14007 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14008 tree void_ftype_pchar_v16qi
14009 = build_function_type_list (void_type_node,
14010 pchar_type_node, V16QI_type_node, NULL_TREE);
14013 tree float128_type;
14016 /* The __float80 type. */
14017 if (TYPE_MODE (long_double_type_node) == XFmode)
14018 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14022 /* The __float80 type. */
14023 float80_type = make_node (REAL_TYPE);
14024 TYPE_PRECISION (float80_type) = 80;
14025 layout_type (float80_type);
14026 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14029 float128_type = make_node (REAL_TYPE);
14030 TYPE_PRECISION (float128_type) = 128;
14031 layout_type (float128_type);
14032 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14034 /* Add all builtins that are more or less simple operations on two
14036 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14038 /* Use one of the operands; the target can have a different mode for
14039 mask-generating compares. */
14040 enum machine_mode mode;
14045 mode = insn_data[d->icode].operand[1].mode;
14050 type = v16qi_ftype_v16qi_v16qi;
14053 type = v8hi_ftype_v8hi_v8hi;
14056 type = v4si_ftype_v4si_v4si;
14059 type = v2di_ftype_v2di_v2di;
14062 type = v2df_ftype_v2df_v2df;
14065 type = ti_ftype_ti_ti;
14068 type = v4sf_ftype_v4sf_v4sf;
14071 type = v8qi_ftype_v8qi_v8qi;
14074 type = v4hi_ftype_v4hi_v4hi;
14077 type = v2si_ftype_v2si_v2si;
14080 type = di_ftype_di_di;
14084 gcc_unreachable ();
14087 /* Override for comparisons. */
14088 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14089 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
14090 type = v4si_ftype_v4sf_v4sf;
14092 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
14093 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14094 type = v2di_ftype_v2df_v2df;
14096 def_builtin (d->mask, d->name, type, d->code);
14099 /* Add the remaining MMX insns with somewhat more complicated types. */
14100 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
14101 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
14102 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
14103 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
14105 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
14106 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
14107 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
14109 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
14110 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
14112 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
14113 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
14115 /* comi/ucomi insns. */
14116 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14117 if (d->mask == MASK_SSE2)
14118 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
14120 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
14122 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
14123 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
14124 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
14126 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
14127 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
14128 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
14129 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
14130 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
14131 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
14132 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
14133 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
14134 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
14135 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
14136 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
14138 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
14140 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
14141 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
14143 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
14144 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
14145 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
14146 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
14148 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
14149 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
14150 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
14151 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
14153 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
14155 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
14157 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
14158 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
14159 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
14160 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
14161 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
14162 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
14164 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
14166 /* Original 3DNow! */
14167 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
14168 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
14169 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
14170 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
14171 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
14172 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
14173 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
14174 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
14175 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
14176 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
14177 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
14178 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
14179 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
14180 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
14181 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
14182 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
14183 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
14184 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
14185 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
14186 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
14188 /* 3DNow! extension as used in the Athlon CPU. */
14189 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
14190 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
14191 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
14192 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
14193 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
14194 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
14197 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
14199 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
14200 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
14202 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
14203 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
14205 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
14206 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
14207 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
14208 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
14209 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
14211 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
14212 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
14213 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
14214 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
14216 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
14217 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
14219 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
14221 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
14222 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
14224 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
14225 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
14226 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
14227 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
14228 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
14230 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
14232 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
14233 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
14234 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
14235 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
14237 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
14238 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
14239 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
14241 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
14242 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
14243 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
14244 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
14246 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
14247 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
14248 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
14250 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
14251 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
14253 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
14254 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
14256 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
14257 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
14258 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
14260 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
14261 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
14262 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
14264 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
14265 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
14267 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
14268 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
14269 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
14270 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
14272 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
14273 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
14274 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
14275 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
14277 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
14278 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
14280 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
14282 /* Prescott New Instructions. */
14283 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
14284 void_ftype_pcvoid_unsigned_unsigned,
14285 IX86_BUILTIN_MONITOR);
14286 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
14287 void_ftype_unsigned_unsigned,
14288 IX86_BUILTIN_MWAIT);
14289 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
14291 IX86_BUILTIN_MOVSHDUP);
14292 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
14294 IX86_BUILTIN_MOVSLDUP);
14295 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
14296 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
14298 /* Access to the vec_init patterns. */
14299 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
14300 integer_type_node, NULL_TREE);
14301 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
14302 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
14304 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
14305 short_integer_type_node,
14306 short_integer_type_node,
14307 short_integer_type_node, NULL_TREE);
14308 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
14309 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
14311 ftype = build_function_type_list (V8QI_type_node, char_type_node,
14312 char_type_node, char_type_node,
14313 char_type_node, char_type_node,
14314 char_type_node, char_type_node,
14315 char_type_node, NULL_TREE);
14316 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
14317 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
14319 /* Access to the vec_extract patterns. */
14320 ftype = build_function_type_list (double_type_node, V2DF_type_node,
14321 integer_type_node, NULL_TREE);
14322 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
14323 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
14325 ftype = build_function_type_list (long_long_integer_type_node,
14326 V2DI_type_node, integer_type_node,
14328 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
14329 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
14331 ftype = build_function_type_list (float_type_node, V4SF_type_node,
14332 integer_type_node, NULL_TREE);
14333 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
14334 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
14336 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14337 integer_type_node, NULL_TREE);
14338 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
14339 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
14341 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14342 integer_type_node, NULL_TREE);
14343 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
14344 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
14346 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
14347 integer_type_node, NULL_TREE);
14348 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
14349 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
14351 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
14352 integer_type_node, NULL_TREE);
14353 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
14354 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
14356 /* Access to the vec_set patterns. */
14357 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14359 integer_type_node, NULL_TREE);
14360 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
14361 ftype, IX86_BUILTIN_VEC_SET_V8HI);
14363 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14365 integer_type_node, NULL_TREE);
14366 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14367 ftype, IX86_BUILTIN_VEC_SET_V4HI);
14370 /* Errors in the source file can cause expand_expr to return const0_rtx
14371 where we expect a vector. To avoid crashing, use one of the vector
14372 clear instructions. */
14374 safe_vector_operand (rtx x, enum machine_mode mode)
14376 if (x == const0_rtx)
14377 x = CONST0_RTX (mode);
14381 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14384 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14387 tree arg0 = TREE_VALUE (arglist);
14388 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14389 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14390 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14391 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14392 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14393 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14395 if (VECTOR_MODE_P (mode0))
14396 op0 = safe_vector_operand (op0, mode0);
14397 if (VECTOR_MODE_P (mode1))
14398 op1 = safe_vector_operand (op1, mode1);
14400 if (optimize || !target
14401 || GET_MODE (target) != tmode
14402 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14403 target = gen_reg_rtx (tmode);
14405 if (GET_MODE (op1) == SImode && mode1 == TImode)
14407 rtx x = gen_reg_rtx (V4SImode);
14408 emit_insn (gen_sse2_loadd (x, op1));
14409 op1 = gen_lowpart (TImode, x);
14412 /* The insn must want input operands in the same modes as the
14414 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
14415 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
14417 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14418 op0 = copy_to_mode_reg (mode0, op0);
14419 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14420 op1 = copy_to_mode_reg (mode1, op1);
14422 /* ??? Using ix86_fixup_binary_operands is problematic when
14423 we've got mismatched modes. Fake it. */
14429 if (tmode == mode0 && tmode == mode1)
14431 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
14435 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
14437 op0 = force_reg (mode0, op0);
14438 op1 = force_reg (mode1, op1);
14439 target = gen_reg_rtx (tmode);
14442 pat = GEN_FCN (icode) (target, op0, op1);
14449 /* Subroutine of ix86_expand_builtin to take care of stores. */
14452 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
14455 tree arg0 = TREE_VALUE (arglist);
14456 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14457 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14458 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14459 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14460 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14462 if (VECTOR_MODE_P (mode1))
14463 op1 = safe_vector_operand (op1, mode1);
14465 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14466 op1 = copy_to_mode_reg (mode1, op1);
14468 pat = GEN_FCN (icode) (op0, op1);
14474 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14477 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14478 rtx target, int do_load)
14481 tree arg0 = TREE_VALUE (arglist);
14482 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14483 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14484 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14486 if (optimize || !target
14487 || GET_MODE (target) != tmode
14488 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14489 target = gen_reg_rtx (tmode);
14491 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14494 if (VECTOR_MODE_P (mode0))
14495 op0 = safe_vector_operand (op0, mode0);
14497 if ((optimize && !register_operand (op0, mode0))
14498 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14499 op0 = copy_to_mode_reg (mode0, op0);
14502 pat = GEN_FCN (icode) (target, op0);
14509 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14510 sqrtss, rsqrtss, rcpss. */
14513 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14516 tree arg0 = TREE_VALUE (arglist);
14517 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14518 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14519 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14521 if (optimize || !target
14522 || GET_MODE (target) != tmode
14523 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14524 target = gen_reg_rtx (tmode);
14526 if (VECTOR_MODE_P (mode0))
14527 op0 = safe_vector_operand (op0, mode0);
14529 if ((optimize && !register_operand (op0, mode0))
14530 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14531 op0 = copy_to_mode_reg (mode0, op0);
14534 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14535 op1 = copy_to_mode_reg (mode0, op1);
14537 pat = GEN_FCN (icode) (target, op0, op1);
14544 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14547 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14551 tree arg0 = TREE_VALUE (arglist);
14552 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14553 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14554 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14556 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14557 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14558 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14559 enum rtx_code comparison = d->comparison;
14561 if (VECTOR_MODE_P (mode0))
14562 op0 = safe_vector_operand (op0, mode0);
14563 if (VECTOR_MODE_P (mode1))
14564 op1 = safe_vector_operand (op1, mode1);
14566 /* Swap operands if we have a comparison that isn't available in
14568 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14570 rtx tmp = gen_reg_rtx (mode1);
14571 emit_move_insn (tmp, op1);
14576 if (optimize || !target
14577 || GET_MODE (target) != tmode
14578 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14579 target = gen_reg_rtx (tmode);
14581 if ((optimize && !register_operand (op0, mode0))
14582 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14583 op0 = copy_to_mode_reg (mode0, op0);
14584 if ((optimize && !register_operand (op1, mode1))
14585 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14586 op1 = copy_to_mode_reg (mode1, op1);
14588 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14589 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14596 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14599 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14603 tree arg0 = TREE_VALUE (arglist);
14604 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14605 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14606 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14608 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14609 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14610 enum rtx_code comparison = d->comparison;
14612 if (VECTOR_MODE_P (mode0))
14613 op0 = safe_vector_operand (op0, mode0);
14614 if (VECTOR_MODE_P (mode1))
14615 op1 = safe_vector_operand (op1, mode1);
14617 /* Swap operands if we have a comparison that isn't available in
14619 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14626 target = gen_reg_rtx (SImode);
14627 emit_move_insn (target, const0_rtx);
14628 target = gen_rtx_SUBREG (QImode, target, 0);
14630 if ((optimize && !register_operand (op0, mode0))
14631 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14632 op0 = copy_to_mode_reg (mode0, op0);
14633 if ((optimize && !register_operand (op1, mode1))
14634 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14635 op1 = copy_to_mode_reg (mode1, op1);
14637 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14638 pat = GEN_FCN (d->icode) (op0, op1);
14642 emit_insn (gen_rtx_SET (VOIDmode,
14643 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14644 gen_rtx_fmt_ee (comparison, QImode,
14648 return SUBREG_REG (target);
14651 /* Return the integer constant in ARG. Constrain it to be in the range
14652 of the subparts of VEC_TYPE; issue an error if not. */
14655 get_element_number (tree vec_type, tree arg)
14657 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14659 if (!host_integerp (arg, 1)
14660 || (elt = tree_low_cst (arg, 1), elt > max))
14662 error ("selector must be an integer constant in the range 0..%wi", max);
14669 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14670 ix86_expand_vector_init. We DO have language-level syntax for this, in
14671 the form of (type){ init-list }. Except that since we can't place emms
14672 instructions from inside the compiler, we can't allow the use of MMX
14673 registers unless the user explicitly asks for it. So we do *not* define
14674 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14675 we have builtins invoked by mmintrin.h that gives us license to emit
14676 these sorts of instructions. */
14679 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14681 enum machine_mode tmode = TYPE_MODE (type);
14682 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14683 int i, n_elt = GET_MODE_NUNITS (tmode);
14684 rtvec v = rtvec_alloc (n_elt);
14686 gcc_assert (VECTOR_MODE_P (tmode));
14688 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14690 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14691 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14694 gcc_assert (arglist == NULL);
14696 if (!target || !register_operand (target, tmode))
14697 target = gen_reg_rtx (tmode);
14699 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14703 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14704 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14705 had a language-level syntax for referencing vector elements. */
14708 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14710 enum machine_mode tmode, mode0;
14715 arg0 = TREE_VALUE (arglist);
14716 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14718 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14719 elt = get_element_number (TREE_TYPE (arg0), arg1);
14721 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14722 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14723 gcc_assert (VECTOR_MODE_P (mode0));
14725 op0 = force_reg (mode0, op0);
14727 if (optimize || !target || !register_operand (target, tmode))
14728 target = gen_reg_rtx (tmode);
14730 ix86_expand_vector_extract (true, target, op0, elt);
14735 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14736 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14737 a language-level syntax for referencing vector elements. */
14740 ix86_expand_vec_set_builtin (tree arglist)
14742 enum machine_mode tmode, mode1;
14743 tree arg0, arg1, arg2;
14747 arg0 = TREE_VALUE (arglist);
14748 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14749 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14751 tmode = TYPE_MODE (TREE_TYPE (arg0));
14752 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14753 gcc_assert (VECTOR_MODE_P (tmode));
14755 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14756 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14757 elt = get_element_number (TREE_TYPE (arg0), arg2);
14759 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14760 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14762 op0 = force_reg (tmode, op0);
14763 op1 = force_reg (mode1, op1);
14765 ix86_expand_vector_set (true, op0, op1, elt);
14770 /* Expand an expression EXP that calls a built-in function,
14771 with result going to TARGET if that's convenient
14772 (and in mode MODE if that's convenient).
14773 SUBTARGET may be used as the target for computing one of EXP's operands.
14774 IGNORE is nonzero if the value is to be ignored. */
14777 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14778 enum machine_mode mode ATTRIBUTE_UNUSED,
14779 int ignore ATTRIBUTE_UNUSED)
14781 const struct builtin_description *d;
14783 enum insn_code icode;
14784 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14785 tree arglist = TREE_OPERAND (exp, 1);
14786 tree arg0, arg1, arg2;
14787 rtx op0, op1, op2, pat;
14788 enum machine_mode tmode, mode0, mode1, mode2;
14789 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14793 case IX86_BUILTIN_EMMS:
14794 emit_insn (gen_mmx_emms ());
14797 case IX86_BUILTIN_SFENCE:
14798 emit_insn (gen_sse_sfence ());
14801 case IX86_BUILTIN_MASKMOVQ:
14802 case IX86_BUILTIN_MASKMOVDQU:
14803 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14804 ? CODE_FOR_mmx_maskmovq
14805 : CODE_FOR_sse2_maskmovdqu);
14806 /* Note the arg order is different from the operand order. */
14807 arg1 = TREE_VALUE (arglist);
14808 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14809 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14810 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14811 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14812 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14813 mode0 = insn_data[icode].operand[0].mode;
14814 mode1 = insn_data[icode].operand[1].mode;
14815 mode2 = insn_data[icode].operand[2].mode;
14817 op0 = force_reg (Pmode, op0);
14818 op0 = gen_rtx_MEM (mode1, op0);
14820 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14821 op0 = copy_to_mode_reg (mode0, op0);
14822 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14823 op1 = copy_to_mode_reg (mode1, op1);
14824 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14825 op2 = copy_to_mode_reg (mode2, op2);
14826 pat = GEN_FCN (icode) (op0, op1, op2);
14832 case IX86_BUILTIN_SQRTSS:
14833 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14834 case IX86_BUILTIN_RSQRTSS:
14835 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14836 case IX86_BUILTIN_RCPSS:
14837 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14839 case IX86_BUILTIN_LOADUPS:
14840 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14842 case IX86_BUILTIN_STOREUPS:
14843 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14845 case IX86_BUILTIN_LOADHPS:
14846 case IX86_BUILTIN_LOADLPS:
14847 case IX86_BUILTIN_LOADHPD:
14848 case IX86_BUILTIN_LOADLPD:
14849 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14850 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14851 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14852 : CODE_FOR_sse2_loadlpd);
14853 arg0 = TREE_VALUE (arglist);
14854 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14855 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14856 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14857 tmode = insn_data[icode].operand[0].mode;
14858 mode0 = insn_data[icode].operand[1].mode;
14859 mode1 = insn_data[icode].operand[2].mode;
14861 op0 = force_reg (mode0, op0);
14862 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14863 if (optimize || target == 0
14864 || GET_MODE (target) != tmode
14865 || !register_operand (target, tmode))
14866 target = gen_reg_rtx (tmode);
14867 pat = GEN_FCN (icode) (target, op0, op1);
14873 case IX86_BUILTIN_STOREHPS:
14874 case IX86_BUILTIN_STORELPS:
14875 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14876 : CODE_FOR_sse_storelps);
14877 arg0 = TREE_VALUE (arglist);
14878 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14879 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14880 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14881 mode0 = insn_data[icode].operand[0].mode;
14882 mode1 = insn_data[icode].operand[1].mode;
14884 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14885 op1 = force_reg (mode1, op1);
14887 pat = GEN_FCN (icode) (op0, op1);
14893 case IX86_BUILTIN_MOVNTPS:
14894 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14895 case IX86_BUILTIN_MOVNTQ:
14896 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14898 case IX86_BUILTIN_LDMXCSR:
14899 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14900 target = assign_386_stack_local (SImode, SLOT_TEMP);
14901 emit_move_insn (target, op0);
14902 emit_insn (gen_sse_ldmxcsr (target));
14905 case IX86_BUILTIN_STMXCSR:
14906 target = assign_386_stack_local (SImode, SLOT_TEMP);
14907 emit_insn (gen_sse_stmxcsr (target));
14908 return copy_to_mode_reg (SImode, target);
14910 case IX86_BUILTIN_SHUFPS:
14911 case IX86_BUILTIN_SHUFPD:
14912 icode = (fcode == IX86_BUILTIN_SHUFPS
14913 ? CODE_FOR_sse_shufps
14914 : CODE_FOR_sse2_shufpd);
14915 arg0 = TREE_VALUE (arglist);
14916 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14917 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14918 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14919 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14920 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14921 tmode = insn_data[icode].operand[0].mode;
14922 mode0 = insn_data[icode].operand[1].mode;
14923 mode1 = insn_data[icode].operand[2].mode;
14924 mode2 = insn_data[icode].operand[3].mode;
14926 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14927 op0 = copy_to_mode_reg (mode0, op0);
14928 if ((optimize && !register_operand (op1, mode1))
14929 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14930 op1 = copy_to_mode_reg (mode1, op1);
14931 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14933 /* @@@ better error message */
14934 error ("mask must be an immediate");
14935 return gen_reg_rtx (tmode);
14937 if (optimize || target == 0
14938 || GET_MODE (target) != tmode
14939 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14940 target = gen_reg_rtx (tmode);
14941 pat = GEN_FCN (icode) (target, op0, op1, op2);
14947 case IX86_BUILTIN_PSHUFW:
14948 case IX86_BUILTIN_PSHUFD:
14949 case IX86_BUILTIN_PSHUFHW:
14950 case IX86_BUILTIN_PSHUFLW:
14951 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14952 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14953 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14954 : CODE_FOR_mmx_pshufw);
14955 arg0 = TREE_VALUE (arglist);
14956 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14957 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14958 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14959 tmode = insn_data[icode].operand[0].mode;
14960 mode1 = insn_data[icode].operand[1].mode;
14961 mode2 = insn_data[icode].operand[2].mode;
14963 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14964 op0 = copy_to_mode_reg (mode1, op0);
14965 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14967 /* @@@ better error message */
14968 error ("mask must be an immediate");
14972 || GET_MODE (target) != tmode
14973 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14974 target = gen_reg_rtx (tmode);
14975 pat = GEN_FCN (icode) (target, op0, op1);
14981 case IX86_BUILTIN_PSLLDQI128:
14982 case IX86_BUILTIN_PSRLDQI128:
14983 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14984 : CODE_FOR_sse2_lshrti3);
14985 arg0 = TREE_VALUE (arglist);
14986 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14987 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14988 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14989 tmode = insn_data[icode].operand[0].mode;
14990 mode1 = insn_data[icode].operand[1].mode;
14991 mode2 = insn_data[icode].operand[2].mode;
14993 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14995 op0 = copy_to_reg (op0);
14996 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14998 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15000 error ("shift must be an immediate");
15003 target = gen_reg_rtx (V2DImode);
15004 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
15010 case IX86_BUILTIN_FEMMS:
15011 emit_insn (gen_mmx_femms ());
15014 case IX86_BUILTIN_PAVGUSB:
15015 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
15017 case IX86_BUILTIN_PF2ID:
15018 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
15020 case IX86_BUILTIN_PFACC:
15021 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
15023 case IX86_BUILTIN_PFADD:
15024 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
15026 case IX86_BUILTIN_PFCMPEQ:
15027 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
15029 case IX86_BUILTIN_PFCMPGE:
15030 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
15032 case IX86_BUILTIN_PFCMPGT:
15033 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
15035 case IX86_BUILTIN_PFMAX:
15036 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
15038 case IX86_BUILTIN_PFMIN:
15039 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
15041 case IX86_BUILTIN_PFMUL:
15042 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
15044 case IX86_BUILTIN_PFRCP:
15045 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
15047 case IX86_BUILTIN_PFRCPIT1:
15048 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
15050 case IX86_BUILTIN_PFRCPIT2:
15051 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
15053 case IX86_BUILTIN_PFRSQIT1:
15054 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
15056 case IX86_BUILTIN_PFRSQRT:
15057 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
15059 case IX86_BUILTIN_PFSUB:
15060 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
15062 case IX86_BUILTIN_PFSUBR:
15063 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
15065 case IX86_BUILTIN_PI2FD:
15066 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
15068 case IX86_BUILTIN_PMULHRW:
15069 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
15071 case IX86_BUILTIN_PF2IW:
15072 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
15074 case IX86_BUILTIN_PFNACC:
15075 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
15077 case IX86_BUILTIN_PFPNACC:
15078 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
15080 case IX86_BUILTIN_PI2FW:
15081 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
15083 case IX86_BUILTIN_PSWAPDSI:
15084 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
15086 case IX86_BUILTIN_PSWAPDSF:
15087 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
15089 case IX86_BUILTIN_SQRTSD:
15090 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
15091 case IX86_BUILTIN_LOADUPD:
15092 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
15093 case IX86_BUILTIN_STOREUPD:
15094 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
15096 case IX86_BUILTIN_MFENCE:
15097 emit_insn (gen_sse2_mfence ());
15099 case IX86_BUILTIN_LFENCE:
15100 emit_insn (gen_sse2_lfence ());
15103 case IX86_BUILTIN_CLFLUSH:
15104 arg0 = TREE_VALUE (arglist);
15105 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15106 icode = CODE_FOR_sse2_clflush;
15107 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
15108 op0 = copy_to_mode_reg (Pmode, op0);
15110 emit_insn (gen_sse2_clflush (op0));
15113 case IX86_BUILTIN_MOVNTPD:
15114 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
15115 case IX86_BUILTIN_MOVNTDQ:
15116 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
15117 case IX86_BUILTIN_MOVNTI:
15118 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
15120 case IX86_BUILTIN_LOADDQU:
15121 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
15122 case IX86_BUILTIN_STOREDQU:
15123 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
15125 case IX86_BUILTIN_MONITOR:
15126 arg0 = TREE_VALUE (arglist);
15127 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15128 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15129 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15130 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15131 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15133 op0 = copy_to_mode_reg (SImode, op0);
15135 op1 = copy_to_mode_reg (SImode, op1);
15137 op2 = copy_to_mode_reg (SImode, op2);
15138 emit_insn (gen_sse3_monitor (op0, op1, op2));
15141 case IX86_BUILTIN_MWAIT:
15142 arg0 = TREE_VALUE (arglist);
15143 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15144 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15145 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15147 op0 = copy_to_mode_reg (SImode, op0);
15149 op1 = copy_to_mode_reg (SImode, op1);
15150 emit_insn (gen_sse3_mwait (op0, op1));
15153 case IX86_BUILTIN_LDDQU:
15154 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
15157 case IX86_BUILTIN_VEC_INIT_V2SI:
15158 case IX86_BUILTIN_VEC_INIT_V4HI:
15159 case IX86_BUILTIN_VEC_INIT_V8QI:
15160 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
15162 case IX86_BUILTIN_VEC_EXT_V2DF:
15163 case IX86_BUILTIN_VEC_EXT_V2DI:
15164 case IX86_BUILTIN_VEC_EXT_V4SF:
15165 case IX86_BUILTIN_VEC_EXT_V4SI:
15166 case IX86_BUILTIN_VEC_EXT_V8HI:
15167 case IX86_BUILTIN_VEC_EXT_V2SI:
15168 case IX86_BUILTIN_VEC_EXT_V4HI:
15169 return ix86_expand_vec_ext_builtin (arglist, target);
15171 case IX86_BUILTIN_VEC_SET_V8HI:
15172 case IX86_BUILTIN_VEC_SET_V4HI:
15173 return ix86_expand_vec_set_builtin (arglist);
15179 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15180 if (d->code == fcode)
15182 /* Compares are treated specially. */
15183 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15184 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
15185 || d->icode == CODE_FOR_sse2_maskcmpv2df3
15186 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15187 return ix86_expand_sse_compare (d, arglist, target);
15189 return ix86_expand_binop_builtin (d->icode, arglist, target);
15192 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15193 if (d->code == fcode)
15194 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
15196 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15197 if (d->code == fcode)
15198 return ix86_expand_sse_comi (d, arglist, target);
15200 gcc_unreachable ();
15203 /* Store OPERAND to the memory after reload is completed. This means
15204 that we can't easily use assign_stack_local. */
15206 ix86_force_to_memory (enum machine_mode mode, rtx operand)
15210 gcc_assert (reload_completed);
15211 if (TARGET_RED_ZONE)
15213 result = gen_rtx_MEM (mode,
15214 gen_rtx_PLUS (Pmode,
15216 GEN_INT (-RED_ZONE_SIZE)));
15217 emit_move_insn (result, operand);
15219 else if (!TARGET_RED_ZONE && TARGET_64BIT)
15225 operand = gen_lowpart (DImode, operand);
15229 gen_rtx_SET (VOIDmode,
15230 gen_rtx_MEM (DImode,
15231 gen_rtx_PRE_DEC (DImode,
15232 stack_pointer_rtx)),
15236 gcc_unreachable ();
15238 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15247 split_di (&operand, 1, operands, operands + 1);
15249 gen_rtx_SET (VOIDmode,
15250 gen_rtx_MEM (SImode,
15251 gen_rtx_PRE_DEC (Pmode,
15252 stack_pointer_rtx)),
15255 gen_rtx_SET (VOIDmode,
15256 gen_rtx_MEM (SImode,
15257 gen_rtx_PRE_DEC (Pmode,
15258 stack_pointer_rtx)),
15263 /* It is better to store HImodes as SImodes. */
15264 if (!TARGET_PARTIAL_REG_STALL)
15265 operand = gen_lowpart (SImode, operand);
15269 gen_rtx_SET (VOIDmode,
15270 gen_rtx_MEM (GET_MODE (operand),
15271 gen_rtx_PRE_DEC (SImode,
15272 stack_pointer_rtx)),
15276 gcc_unreachable ();
15278 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15283 /* Free operand from the memory. */
15285 ix86_free_from_memory (enum machine_mode mode)
15287 if (!TARGET_RED_ZONE)
15291 if (mode == DImode || TARGET_64BIT)
15293 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
15297 /* Use LEA to deallocate stack space. In peephole2 it will be converted
15298 to pop or add instruction if registers are available. */
15299 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15300 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15305 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
15306 QImode must go into class Q_REGS.
15307 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
15308 movdf to do mem-to-mem moves through integer regs. */
15310 ix86_preferred_reload_class (rtx x, enum reg_class class)
15312 /* We're only allowed to return a subclass of CLASS. Many of the
15313 following checks fail for NO_REGS, so eliminate that early. */
15314 if (class == NO_REGS)
15317 /* All classes can load zeros. */
15318 if (x == CONST0_RTX (GET_MODE (x)))
15321 /* Floating-point constants need more complex checks. */
15322 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
15324 /* General regs can load everything. */
15325 if (reg_class_subset_p (class, GENERAL_REGS))
15328 /* Floats can load 0 and 1 plus some others. Note that we eliminated
15329 zero above. We only want to wind up preferring 80387 registers if
15330 we plan on doing computation with them. */
15332 && (TARGET_MIX_SSE_I387
15333 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
15334 && standard_80387_constant_p (x))
15336 /* Limit class to non-sse. */
15337 if (class == FLOAT_SSE_REGS)
15339 if (class == FP_TOP_SSE_REGS)
15341 if (class == FP_SECOND_SSE_REGS)
15342 return FP_SECOND_REG;
15343 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
15349 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
15351 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
15354 /* Generally when we see PLUS here, it's the function invariant
15355 (plus soft-fp const_int). Which can only be computed into general
15357 if (GET_CODE (x) == PLUS)
15358 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
15360 /* QImode constants are easy to load, but non-constant QImode data
15361 must go into Q_REGS. */
15362 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
15364 if (reg_class_subset_p (class, Q_REGS))
15366 if (reg_class_subset_p (Q_REGS, class))
15374 /* If we are copying between general and FP registers, we need a memory
15375 location. The same is true for SSE and MMX registers.
15377 The macro can't work reliably when one of the CLASSES is class containing
15378 registers from multiple units (SSE, MMX, integer). We avoid this by never
15379 combining those units in single alternative in the machine description.
15380 Ensure that this constraint holds to avoid unexpected surprises.
15382 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15383 enforce these sanity checks. */
15386 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
15387 enum machine_mode mode, int strict)
15389 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
15390 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
15391 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
15392 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
15393 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
15394 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
15396 gcc_assert (!strict);
15400 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
15403 /* ??? This is a lie. We do have moves between mmx/general, and for
15404 mmx/sse2. But by saying we need secondary memory we discourage the
15405 register allocator from using the mmx registers unless needed. */
15406 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
15409 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15411 /* SSE1 doesn't have any direct moves from other classes. */
15415 /* If the target says that inter-unit moves are more expensive
15416 than moving through memory, then don't generate them. */
15417 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
15420 /* Between SSE and general, we have moves no larger than word size. */
15421 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15424 /* ??? For the cost of one register reformat penalty, we could use
15425 the same instructions to move SFmode and DFmode data, but the
15426 relevant move patterns don't support those alternatives. */
15427 if (mode == SFmode || mode == DFmode)
15434 /* Return true if the registers in CLASS cannot represent the change from
15435 modes FROM to TO. */
15438 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
15439 enum reg_class class)
15444 /* x87 registers can't do subreg at all, as all values are reformatted
15445 to extended precision. */
15446 if (MAYBE_FLOAT_CLASS_P (class))
15449 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
15451 /* Vector registers do not support QI or HImode loads. If we don't
15452 disallow a change to these modes, reload will assume it's ok to
15453 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
15454 the vec_dupv4hi pattern. */
15455 if (GET_MODE_SIZE (from) < 4)
15458 /* Vector registers do not support subreg with nonzero offsets, which
15459 are otherwise valid for integer registers. Since we can't see
15460 whether we have a nonzero offset from here, prohibit all
15461 nonparadoxical subregs changing size. */
15462 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
15469 /* Return the cost of moving data from a register in class CLASS1 to
15470 one in class CLASS2.
15472 It is not required that the cost always equal 2 when FROM is the same as TO;
15473 on some machines it is expensive to move between registers if they are not
15474 general registers. */
15477 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
15478 enum reg_class class2)
15480 /* In case we require secondary memory, compute cost of the store followed
15481 by load. In order to avoid bad register allocation choices, we need
15482 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15484 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
15488 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
15489 MEMORY_MOVE_COST (mode, class1, 1));
15490 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15491 MEMORY_MOVE_COST (mode, class2, 1));
15493 /* In case of copying from general_purpose_register we may emit multiple
15494 stores followed by single load causing memory size mismatch stall.
15495 Count this as arbitrarily high cost of 20. */
15496 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15499 /* In the case of FP/MMX moves, the registers actually overlap, and we
15500 have to switch modes in order to treat them differently. */
15501 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15502 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15508 /* Moves between SSE/MMX and integer unit are expensive. */
15509 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15510 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15511 return ix86_cost->mmxsse_to_integer;
15512 if (MAYBE_FLOAT_CLASS_P (class1))
15513 return ix86_cost->fp_move;
15514 if (MAYBE_SSE_CLASS_P (class1))
15515 return ix86_cost->sse_move;
15516 if (MAYBE_MMX_CLASS_P (class1))
15517 return ix86_cost->mmx_move;
15521 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15524 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15526 /* Flags and only flags can only hold CCmode values. */
15527 if (CC_REGNO_P (regno))
15528 return GET_MODE_CLASS (mode) == MODE_CC;
15529 if (GET_MODE_CLASS (mode) == MODE_CC
15530 || GET_MODE_CLASS (mode) == MODE_RANDOM
15531 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15533 if (FP_REGNO_P (regno))
15534 return VALID_FP_MODE_P (mode);
15535 if (SSE_REGNO_P (regno))
15537 /* We implement the move patterns for all vector modes into and
15538 out of SSE registers, even when no operation instructions
15540 return (VALID_SSE_REG_MODE (mode)
15541 || VALID_SSE2_REG_MODE (mode)
15542 || VALID_MMX_REG_MODE (mode)
15543 || VALID_MMX_REG_MODE_3DNOW (mode));
15545 if (MMX_REGNO_P (regno))
15547 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15548 so if the register is available at all, then we can move data of
15549 the given mode into or out of it. */
15550 return (VALID_MMX_REG_MODE (mode)
15551 || VALID_MMX_REG_MODE_3DNOW (mode));
15554 if (mode == QImode)
15556 /* Take care for QImode values - they can be in non-QI regs,
15557 but then they do cause partial register stalls. */
15558 if (regno < 4 || TARGET_64BIT)
15560 if (!TARGET_PARTIAL_REG_STALL)
15562 return reload_in_progress || reload_completed;
15564 /* We handle both integer and floats in the general purpose registers. */
15565 else if (VALID_INT_MODE_P (mode))
15567 else if (VALID_FP_MODE_P (mode))
15569 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15570 on to use that value in smaller contexts, this can easily force a
15571 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15572 supporting DImode, allow it. */
15573 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
15579 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15580 tieable integer mode. */
15583 ix86_tieable_integer_mode_p (enum machine_mode mode)
15592 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
15595 return TARGET_64BIT;
15602 /* Return true if MODE1 is accessible in a register that can hold MODE2
15603 without copying. That is, all register classes that can hold MODE2
15604 can also hold MODE1. */
15607 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
15609 if (mode1 == mode2)
15612 if (ix86_tieable_integer_mode_p (mode1)
15613 && ix86_tieable_integer_mode_p (mode2))
15616 /* MODE2 being XFmode implies fp stack or general regs, which means we
15617 can tie any smaller floating point modes to it. Note that we do not
15618 tie this with TFmode. */
15619 if (mode2 == XFmode)
15620 return mode1 == SFmode || mode1 == DFmode;
15622 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15623 that we can tie it with SFmode. */
15624 if (mode2 == DFmode)
15625 return mode1 == SFmode;
15627 /* If MODE2 is only appropriate for an SSE register, then tie with
15628 any other mode acceptable to SSE registers. */
15629 if (GET_MODE_SIZE (mode2) >= 8
15630 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
15631 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
15633 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15634 with any other mode acceptable to MMX registers. */
15635 if (GET_MODE_SIZE (mode2) == 8
15636 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
15637 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
15642 /* Return the cost of moving data of mode M between a
15643 register and memory. A value of 2 is the default; this cost is
15644 relative to those in `REGISTER_MOVE_COST'.
15646 If moving between registers and memory is more expensive than
15647 between two registers, you should define this macro to express the
15650 Model also increased moving costs of QImode registers in non
15654 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
15656 if (FLOAT_CLASS_P (class))
15673 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15675 if (SSE_CLASS_P (class))
15678 switch (GET_MODE_SIZE (mode))
15692 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15694 if (MMX_CLASS_P (class))
15697 switch (GET_MODE_SIZE (mode))
15708 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15710 switch (GET_MODE_SIZE (mode))
15714 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15715 : ix86_cost->movzbl_load);
15717 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15718 : ix86_cost->int_store[0] + 4);
15721 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15723 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15724 if (mode == TFmode)
15726 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15727 * (((int) GET_MODE_SIZE (mode)
15728 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15732 /* Compute a (partial) cost for rtx X. Return true if the complete
15733 cost has been computed, and false if subexpressions should be
15734 scanned. In either case, *TOTAL contains the cost result. */
15737 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15739 enum machine_mode mode = GET_MODE (x);
15747 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15749 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15751 else if (flag_pic && SYMBOLIC_CONST (x)
15753 || (!GET_CODE (x) != LABEL_REF
15754 && (GET_CODE (x) != SYMBOL_REF
15755 || !SYMBOL_REF_LOCAL_P (x)))))
15762 if (mode == VOIDmode)
15765 switch (standard_80387_constant_p (x))
15770 default: /* Other constants */
15775 /* Start with (MEM (SYMBOL_REF)), since that's where
15776 it'll probably end up. Add a penalty for size. */
15777 *total = (COSTS_N_INSNS (1)
15778 + (flag_pic != 0 && !TARGET_64BIT)
15779 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15785 /* The zero extensions is often completely free on x86_64, so make
15786 it as cheap as possible. */
15787 if (TARGET_64BIT && mode == DImode
15788 && GET_MODE (XEXP (x, 0)) == SImode)
15790 else if (TARGET_ZERO_EXTEND_WITH_AND)
15791 *total = COSTS_N_INSNS (ix86_cost->add);
15793 *total = COSTS_N_INSNS (ix86_cost->movzx);
15797 *total = COSTS_N_INSNS (ix86_cost->movsx);
15801 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15802 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15804 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15807 *total = COSTS_N_INSNS (ix86_cost->add);
15810 if ((value == 2 || value == 3)
15811 && ix86_cost->lea <= ix86_cost->shift_const)
15813 *total = COSTS_N_INSNS (ix86_cost->lea);
15823 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15825 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15827 if (INTVAL (XEXP (x, 1)) > 32)
15828 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15830 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15834 if (GET_CODE (XEXP (x, 1)) == AND)
15835 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15837 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15842 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15843 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15845 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15850 if (FLOAT_MODE_P (mode))
15852 *total = COSTS_N_INSNS (ix86_cost->fmul);
15857 rtx op0 = XEXP (x, 0);
15858 rtx op1 = XEXP (x, 1);
15860 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15862 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15863 for (nbits = 0; value != 0; value &= value - 1)
15867 /* This is arbitrary. */
15870 /* Compute costs correctly for widening multiplication. */
15871 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15872 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15873 == GET_MODE_SIZE (mode))
15875 int is_mulwiden = 0;
15876 enum machine_mode inner_mode = GET_MODE (op0);
15878 if (GET_CODE (op0) == GET_CODE (op1))
15879 is_mulwiden = 1, op1 = XEXP (op1, 0);
15880 else if (GET_CODE (op1) == CONST_INT)
15882 if (GET_CODE (op0) == SIGN_EXTEND)
15883 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15886 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15890 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15893 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15894 + nbits * ix86_cost->mult_bit)
15895 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15904 if (FLOAT_MODE_P (mode))
15905 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15907 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15911 if (FLOAT_MODE_P (mode))
15912 *total = COSTS_N_INSNS (ix86_cost->fadd);
15913 else if (GET_MODE_CLASS (mode) == MODE_INT
15914 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15916 if (GET_CODE (XEXP (x, 0)) == PLUS
15917 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15918 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15919 && CONSTANT_P (XEXP (x, 1)))
15921 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15922 if (val == 2 || val == 4 || val == 8)
15924 *total = COSTS_N_INSNS (ix86_cost->lea);
15925 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15926 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15928 *total += rtx_cost (XEXP (x, 1), outer_code);
15932 else if (GET_CODE (XEXP (x, 0)) == MULT
15933 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15935 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15936 if (val == 2 || val == 4 || val == 8)
15938 *total = COSTS_N_INSNS (ix86_cost->lea);
15939 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15940 *total += rtx_cost (XEXP (x, 1), outer_code);
15944 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15946 *total = COSTS_N_INSNS (ix86_cost->lea);
15947 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15948 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15949 *total += rtx_cost (XEXP (x, 1), outer_code);
15956 if (FLOAT_MODE_P (mode))
15958 *total = COSTS_N_INSNS (ix86_cost->fadd);
15966 if (!TARGET_64BIT && mode == DImode)
15968 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15969 + (rtx_cost (XEXP (x, 0), outer_code)
15970 << (GET_MODE (XEXP (x, 0)) != DImode))
15971 + (rtx_cost (XEXP (x, 1), outer_code)
15972 << (GET_MODE (XEXP (x, 1)) != DImode)));
15978 if (FLOAT_MODE_P (mode))
15980 *total = COSTS_N_INSNS (ix86_cost->fchs);
15986 if (!TARGET_64BIT && mode == DImode)
15987 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15989 *total = COSTS_N_INSNS (ix86_cost->add);
15993 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15994 && XEXP (XEXP (x, 0), 1) == const1_rtx
15995 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15996 && XEXP (x, 1) == const0_rtx)
15998 /* This kind of construct is implemented using test[bwl].
15999 Treat it as if we had an AND. */
16000 *total = (COSTS_N_INSNS (ix86_cost->add)
16001 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
16002 + rtx_cost (const1_rtx, outer_code));
16008 if (!TARGET_SSE_MATH
16010 || (mode == DFmode && !TARGET_SSE2))
16015 if (FLOAT_MODE_P (mode))
16016 *total = COSTS_N_INSNS (ix86_cost->fabs);
16020 if (FLOAT_MODE_P (mode))
16021 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
16025 if (XINT (x, 1) == UNSPEC_TP)
16036 static int current_machopic_label_num;
16038 /* Given a symbol name and its associated stub, write out the
16039 definition of the stub. */
16042 machopic_output_stub (FILE *file, const char *symb, const char *stub)
16044 unsigned int length;
16045 char *binder_name, *symbol_name, lazy_ptr_name[32];
16046 int label = ++current_machopic_label_num;
16048 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
16049 symb = (*targetm.strip_name_encoding) (symb);
16051 length = strlen (stub);
16052 binder_name = alloca (length + 32);
16053 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
16055 length = strlen (symb);
16056 symbol_name = alloca (length + 32);
16057 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
16059 sprintf (lazy_ptr_name, "L%d$lz", label);
16062 machopic_picsymbol_stub_section ();
16064 machopic_symbol_stub_section ();
16066 fprintf (file, "%s:\n", stub);
16067 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16071 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
16072 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
16073 fprintf (file, "\tjmp %%edx\n");
16076 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
16078 fprintf (file, "%s:\n", binder_name);
16082 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
16083 fprintf (file, "\tpushl %%eax\n");
16086 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
16088 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
16090 machopic_lazy_symbol_ptr_section ();
16091 fprintf (file, "%s:\n", lazy_ptr_name);
16092 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16093 fprintf (file, "\t.long %s\n", binder_name);
16095 #endif /* TARGET_MACHO */
16097 /* Order the registers for register allocator. */
16100 x86_order_regs_for_local_alloc (void)
16105 /* First allocate the local general purpose registers. */
16106 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16107 if (GENERAL_REGNO_P (i) && call_used_regs[i])
16108 reg_alloc_order [pos++] = i;
16110 /* Global general purpose registers. */
16111 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16112 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
16113 reg_alloc_order [pos++] = i;
16115 /* x87 registers come first in case we are doing FP math
16117 if (!TARGET_SSE_MATH)
16118 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16119 reg_alloc_order [pos++] = i;
16121 /* SSE registers. */
16122 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16123 reg_alloc_order [pos++] = i;
16124 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16125 reg_alloc_order [pos++] = i;
16127 /* x87 registers. */
16128 if (TARGET_SSE_MATH)
16129 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16130 reg_alloc_order [pos++] = i;
16132 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
16133 reg_alloc_order [pos++] = i;
16135 /* Initialize the rest of array as we do not allocate some registers
16137 while (pos < FIRST_PSEUDO_REGISTER)
16138 reg_alloc_order [pos++] = 0;
16141 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
16142 struct attribute_spec.handler. */
16144 ix86_handle_struct_attribute (tree *node, tree name,
16145 tree args ATTRIBUTE_UNUSED,
16146 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
16149 if (DECL_P (*node))
16151 if (TREE_CODE (*node) == TYPE_DECL)
16152 type = &TREE_TYPE (*node);
16157 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
16158 || TREE_CODE (*type) == UNION_TYPE)))
16160 warning (OPT_Wattributes, "%qs attribute ignored",
16161 IDENTIFIER_POINTER (name));
16162 *no_add_attrs = true;
16165 else if ((is_attribute_p ("ms_struct", name)
16166 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
16167 || ((is_attribute_p ("gcc_struct", name)
16168 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
16170 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
16171 IDENTIFIER_POINTER (name));
16172 *no_add_attrs = true;
16179 ix86_ms_bitfield_layout_p (tree record_type)
16181 return (TARGET_MS_BITFIELD_LAYOUT &&
16182 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
16183 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
16186 /* Returns an expression indicating where the this parameter is
16187 located on entry to the FUNCTION. */
16190 x86_this_parameter (tree function)
16192 tree type = TREE_TYPE (function);
16196 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
16197 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
16200 if (ix86_function_regparm (type, function) > 0)
16204 parm = TYPE_ARG_TYPES (type);
16205 /* Figure out whether or not the function has a variable number of
16207 for (; parm; parm = TREE_CHAIN (parm))
16208 if (TREE_VALUE (parm) == void_type_node)
16210 /* If not, the this parameter is in the first argument. */
16214 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
16216 return gen_rtx_REG (SImode, regno);
16220 if (aggregate_value_p (TREE_TYPE (type), type))
16221 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
16223 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
16226 /* Determine whether x86_output_mi_thunk can succeed. */
16229 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
16230 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
16231 HOST_WIDE_INT vcall_offset, tree function)
16233 /* 64-bit can handle anything. */
16237 /* For 32-bit, everything's fine if we have one free register. */
16238 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
16241 /* Need a free register for vcall_offset. */
16245 /* Need a free register for GOT references. */
16246 if (flag_pic && !(*targetm.binds_local_p) (function))
16249 /* Otherwise ok. */
16253 /* Output the assembler code for a thunk function. THUNK_DECL is the
16254 declaration for the thunk function itself, FUNCTION is the decl for
16255 the target function. DELTA is an immediate constant offset to be
16256 added to THIS. If VCALL_OFFSET is nonzero, the word at
16257 *(*this + vcall_offset) should be added to THIS. */
16260 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
16261 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
16262 HOST_WIDE_INT vcall_offset, tree function)
16265 rtx this = x86_this_parameter (function);
16268 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
16269 pull it in now and let DELTA benefit. */
16272 else if (vcall_offset)
16274 /* Put the this parameter into %eax. */
16276 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
16277 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16280 this_reg = NULL_RTX;
16282 /* Adjust the this parameter by a fixed constant. */
16285 xops[0] = GEN_INT (delta);
16286 xops[1] = this_reg ? this_reg : this;
16289 if (!x86_64_general_operand (xops[0], DImode))
16291 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16293 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
16297 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16300 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16303 /* Adjust the this parameter by a value stored in the vtable. */
16307 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16310 int tmp_regno = 2 /* ECX */;
16311 if (lookup_attribute ("fastcall",
16312 TYPE_ATTRIBUTES (TREE_TYPE (function))))
16313 tmp_regno = 0 /* EAX */;
16314 tmp = gen_rtx_REG (SImode, tmp_regno);
16317 xops[0] = gen_rtx_MEM (Pmode, this_reg);
16320 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16322 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16324 /* Adjust the this parameter. */
16325 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
16326 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
16328 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
16329 xops[0] = GEN_INT (vcall_offset);
16331 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16332 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
16334 xops[1] = this_reg;
16336 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16338 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16341 /* If necessary, drop THIS back to its stack slot. */
16342 if (this_reg && this_reg != this)
16344 xops[0] = this_reg;
16346 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16349 xops[0] = XEXP (DECL_RTL (function), 0);
16352 if (!flag_pic || (*targetm.binds_local_p) (function))
16353 output_asm_insn ("jmp\t%P0", xops);
16356 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
16357 tmp = gen_rtx_CONST (Pmode, tmp);
16358 tmp = gen_rtx_MEM (QImode, tmp);
16360 output_asm_insn ("jmp\t%A0", xops);
16365 if (!flag_pic || (*targetm.binds_local_p) (function))
16366 output_asm_insn ("jmp\t%P0", xops);
16371 rtx sym_ref = XEXP (DECL_RTL (function), 0);
16372 tmp = (gen_rtx_SYMBOL_REF
16374 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
16375 tmp = gen_rtx_MEM (QImode, tmp);
16377 output_asm_insn ("jmp\t%0", xops);
16380 #endif /* TARGET_MACHO */
16382 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
16383 output_set_got (tmp);
16386 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
16387 output_asm_insn ("jmp\t{*}%1", xops);
16393 x86_file_start (void)
16395 default_file_start ();
16396 if (X86_FILE_START_VERSION_DIRECTIVE)
16397 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
16398 if (X86_FILE_START_FLTUSED)
16399 fputs ("\t.global\t__fltused\n", asm_out_file);
16400 if (ix86_asm_dialect == ASM_INTEL)
16401 fputs ("\t.intel_syntax\n", asm_out_file);
16405 x86_field_alignment (tree field, int computed)
16407 enum machine_mode mode;
16408 tree type = TREE_TYPE (field);
16410 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
16412 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
16413 ? get_inner_array_type (type) : type);
16414 if (mode == DFmode || mode == DCmode
16415 || GET_MODE_CLASS (mode) == MODE_INT
16416 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
16417 return MIN (32, computed);
16421 /* Output assembler code to FILE to increment profiler label # LABELNO
16422 for profiling a function entry. */
16424 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
16429 #ifndef NO_PROFILE_COUNTERS
16430 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
16432 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
16436 #ifndef NO_PROFILE_COUNTERS
16437 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
16439 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16443 #ifndef NO_PROFILE_COUNTERS
16444 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
16445 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
16447 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
16451 #ifndef NO_PROFILE_COUNTERS
16452 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
16453 PROFILE_COUNT_REGISTER);
16455 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16459 /* We don't have exact information about the insn sizes, but we may assume
16460 quite safely that we are informed about all 1 byte insns and memory
16461 address sizes. This is enough to eliminate unnecessary padding in
16465 min_insn_size (rtx insn)
16469 if (!INSN_P (insn) || !active_insn_p (insn))
16472 /* Discard alignments we've emit and jump instructions. */
16473 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
16474 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
16476 if (GET_CODE (insn) == JUMP_INSN
16477 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
16478 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
16481 /* Important case - calls are always 5 bytes.
16482 It is common to have many calls in the row. */
16483 if (GET_CODE (insn) == CALL_INSN
16484 && symbolic_reference_mentioned_p (PATTERN (insn))
16485 && !SIBLING_CALL_P (insn))
16487 if (get_attr_length (insn) <= 1)
16490 /* For normal instructions we may rely on the sizes of addresses
16491 and the presence of symbol to require 4 bytes of encoding.
16492 This is not the case for jumps where references are PC relative. */
16493 if (GET_CODE (insn) != JUMP_INSN)
16495 l = get_attr_length_address (insn);
16496 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16505 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16509 ix86_avoid_jump_misspredicts (void)
16511 rtx insn, start = get_insns ();
16512 int nbytes = 0, njumps = 0;
16515 /* Look for all minimal intervals of instructions containing 4 jumps.
16516 The intervals are bounded by START and INSN. NBYTES is the total
16517 size of instructions in the interval including INSN and not including
16518 START. When the NBYTES is smaller than 16 bytes, it is possible
16519 that the end of START and INSN ends up in the same 16byte page.
16521 The smallest offset in the page INSN can start is the case where START
16522 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16523 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16525 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16528 nbytes += min_insn_size (insn);
16530 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16531 INSN_UID (insn), min_insn_size (insn));
16532 if ((GET_CODE (insn) == JUMP_INSN
16533 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16534 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16535 || GET_CODE (insn) == CALL_INSN)
16542 start = NEXT_INSN (start);
16543 if ((GET_CODE (start) == JUMP_INSN
16544 && GET_CODE (PATTERN (start)) != ADDR_VEC
16545 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16546 || GET_CODE (start) == CALL_INSN)
16547 njumps--, isjump = 1;
16550 nbytes -= min_insn_size (start);
16552 gcc_assert (njumps >= 0);
16554 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16555 INSN_UID (start), INSN_UID (insn), nbytes);
16557 if (njumps == 3 && isjump && nbytes < 16)
16559 int padsize = 15 - nbytes + min_insn_size (insn);
16562 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16563 INSN_UID (insn), padsize);
16564 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16569 /* AMD Athlon works faster
16570 when RET is not destination of conditional jump or directly preceded
16571 by other jump instruction. We avoid the penalty by inserting NOP just
16572 before the RET instructions in such cases. */
16574 ix86_pad_returns (void)
16579 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16581 basic_block bb = e->src;
16582 rtx ret = BB_END (bb);
16584 bool replace = false;
16586 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16587 || !maybe_hot_bb_p (bb))
16589 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16590 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16592 if (prev && GET_CODE (prev) == CODE_LABEL)
16597 FOR_EACH_EDGE (e, ei, bb->preds)
16598 if (EDGE_FREQUENCY (e) && e->src->index >= 0
16599 && !(e->flags & EDGE_FALLTHRU))
16604 prev = prev_active_insn (ret);
16606 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
16607 || GET_CODE (prev) == CALL_INSN))
16609 /* Empty functions get branch mispredict even when the jump destination
16610 is not visible to us. */
16611 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
16616 emit_insn_before (gen_return_internal_long (), ret);
16622 /* Implement machine specific optimizations. We implement padding of returns
16623 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16627 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
16628 ix86_pad_returns ();
16629 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
16630 ix86_avoid_jump_misspredicts ();
16633 /* Return nonzero when QImode register that must be represented via REX prefix
16636 x86_extended_QIreg_mentioned_p (rtx insn)
16639 extract_insn_cached (insn);
16640 for (i = 0; i < recog_data.n_operands; i++)
16641 if (REG_P (recog_data.operand[i])
16642 && REGNO (recog_data.operand[i]) >= 4)
16647 /* Return nonzero when P points to register encoded via REX prefix.
16648 Called via for_each_rtx. */
16650 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
16652 unsigned int regno;
16655 regno = REGNO (*p);
16656 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
16659 /* Return true when INSN mentions register that must be encoded using REX
16662 x86_extended_reg_mentioned_p (rtx insn)
16664 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
16667 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16668 optabs would emit if we didn't have TFmode patterns. */
16671 x86_emit_floatuns (rtx operands[2])
16673 rtx neglab, donelab, i0, i1, f0, in, out;
16674 enum machine_mode mode, inmode;
16676 inmode = GET_MODE (operands[1]);
16677 gcc_assert (inmode == SImode || inmode == DImode);
16680 in = force_reg (inmode, operands[1]);
16681 mode = GET_MODE (out);
16682 neglab = gen_label_rtx ();
16683 donelab = gen_label_rtx ();
16684 i1 = gen_reg_rtx (Pmode);
16685 f0 = gen_reg_rtx (mode);
16687 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16689 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16690 emit_jump_insn (gen_jump (donelab));
16693 emit_label (neglab);
16695 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16696 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16697 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16698 expand_float (f0, i0, 0);
16699 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16701 emit_label (donelab);
16704 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16705 with all elements equal to VAR. Return true if successful. */
16708 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16709 rtx target, rtx val)
16711 enum machine_mode smode, wsmode, wvmode;
16718 if (!mmx_ok && !TARGET_SSE)
16726 val = force_reg (GET_MODE_INNER (mode), val);
16727 x = gen_rtx_VEC_DUPLICATE (mode, val);
16728 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16734 if (TARGET_SSE || TARGET_3DNOW_A)
16736 val = gen_lowpart (SImode, val);
16737 x = gen_rtx_TRUNCATE (HImode, val);
16738 x = gen_rtx_VEC_DUPLICATE (mode, x);
16739 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16768 /* Replicate the value once into the next wider mode and recurse. */
16769 val = convert_modes (wsmode, smode, val, true);
16770 x = expand_simple_binop (wsmode, ASHIFT, val,
16771 GEN_INT (GET_MODE_BITSIZE (smode)),
16772 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16773 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16775 x = gen_reg_rtx (wvmode);
16776 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16777 gcc_unreachable ();
16778 emit_move_insn (target, gen_lowpart (mode, x));
16786 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16787 whose low element is VAR, and other elements are zero. Return true
16791 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16792 rtx target, rtx var)
16794 enum machine_mode vsimode;
16801 if (!mmx_ok && !TARGET_SSE)
16807 var = force_reg (GET_MODE_INNER (mode), var);
16808 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16809 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16814 var = force_reg (GET_MODE_INNER (mode), var);
16815 x = gen_rtx_VEC_DUPLICATE (mode, var);
16816 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16817 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16822 vsimode = V4SImode;
16828 vsimode = V2SImode;
16831 /* Zero extend the variable element to SImode and recurse. */
16832 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16834 x = gen_reg_rtx (vsimode);
16835 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16836 gcc_unreachable ();
16838 emit_move_insn (target, gen_lowpart (mode, x));
16846 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16847 consisting of the values in VALS. It is known that all elements
16848 except ONE_VAR are constants. Return true if successful. */
16851 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16852 rtx target, rtx vals, int one_var)
16854 rtx var = XVECEXP (vals, 0, one_var);
16855 enum machine_mode wmode;
16858 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16859 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16867 /* For the two element vectors, it's just as easy to use
16868 the general case. */
16884 /* There's no way to set one QImode entry easily. Combine
16885 the variable value with its adjacent constant value, and
16886 promote to an HImode set. */
16887 x = XVECEXP (vals, 0, one_var ^ 1);
16890 var = convert_modes (HImode, QImode, var, true);
16891 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16892 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16893 x = GEN_INT (INTVAL (x) & 0xff);
16897 var = convert_modes (HImode, QImode, var, true);
16898 x = gen_int_mode (INTVAL (x) << 8, HImode);
16900 if (x != const0_rtx)
16901 var = expand_simple_binop (HImode, IOR, var, x, var,
16902 1, OPTAB_LIB_WIDEN);
16904 x = gen_reg_rtx (wmode);
16905 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16906 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
16908 emit_move_insn (target, gen_lowpart (mode, x));
16915 emit_move_insn (target, const_vec);
16916 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16920 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16921 all values variable, and none identical. */
16924 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16925 rtx target, rtx vals)
16927 enum machine_mode half_mode = GET_MODE_INNER (mode);
16928 rtx op0 = NULL, op1 = NULL;
16929 bool use_vec_concat = false;
16935 if (!mmx_ok && !TARGET_SSE)
16941 /* For the two element vectors, we always implement VEC_CONCAT. */
16942 op0 = XVECEXP (vals, 0, 0);
16943 op1 = XVECEXP (vals, 0, 1);
16944 use_vec_concat = true;
16948 half_mode = V2SFmode;
16951 half_mode = V2SImode;
16957 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16958 Recurse to load the two halves. */
16960 op0 = gen_reg_rtx (half_mode);
16961 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16962 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16964 op1 = gen_reg_rtx (half_mode);
16965 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16966 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16968 use_vec_concat = true;
16979 gcc_unreachable ();
16982 if (use_vec_concat)
16984 if (!register_operand (op0, half_mode))
16985 op0 = force_reg (half_mode, op0);
16986 if (!register_operand (op1, half_mode))
16987 op1 = force_reg (half_mode, op1);
16989 emit_insn (gen_rtx_SET (VOIDmode, target,
16990 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16994 int i, j, n_elts, n_words, n_elt_per_word;
16995 enum machine_mode inner_mode;
16996 rtx words[4], shift;
16998 inner_mode = GET_MODE_INNER (mode);
16999 n_elts = GET_MODE_NUNITS (mode);
17000 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
17001 n_elt_per_word = n_elts / n_words;
17002 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
17004 for (i = 0; i < n_words; ++i)
17006 rtx word = NULL_RTX;
17008 for (j = 0; j < n_elt_per_word; ++j)
17010 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
17011 elt = convert_modes (word_mode, inner_mode, elt, true);
17017 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
17018 word, 1, OPTAB_LIB_WIDEN);
17019 word = expand_simple_binop (word_mode, IOR, word, elt,
17020 word, 1, OPTAB_LIB_WIDEN);
17028 emit_move_insn (target, gen_lowpart (mode, words[0]));
17029 else if (n_words == 2)
17031 rtx tmp = gen_reg_rtx (mode);
17032 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
17033 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
17034 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
17035 emit_move_insn (target, tmp);
17037 else if (n_words == 4)
17039 rtx tmp = gen_reg_rtx (V4SImode);
17040 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
17041 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
17042 emit_move_insn (target, gen_lowpart (mode, tmp));
17045 gcc_unreachable ();
17049 /* Initialize vector TARGET via VALS. Suppress the use of MMX
17050 instructions unless MMX_OK is true. */
17053 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
17055 enum machine_mode mode = GET_MODE (target);
17056 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17057 int n_elts = GET_MODE_NUNITS (mode);
17058 int n_var = 0, one_var = -1;
17059 bool all_same = true, all_const_zero = true;
17063 for (i = 0; i < n_elts; ++i)
17065 x = XVECEXP (vals, 0, i);
17066 if (!CONSTANT_P (x))
17067 n_var++, one_var = i;
17068 else if (x != CONST0_RTX (inner_mode))
17069 all_const_zero = false;
17070 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
17074 /* Constants are best loaded from the constant pool. */
17077 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
17081 /* If all values are identical, broadcast the value. */
17083 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
17084 XVECEXP (vals, 0, 0)))
17087 /* Values where only one field is non-constant are best loaded from
17088 the pool and overwritten via move later. */
17091 if (all_const_zero && one_var == 0
17092 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
17093 XVECEXP (vals, 0, 0)))
17096 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
17100 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
17104 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
17106 enum machine_mode mode = GET_MODE (target);
17107 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17108 bool use_vec_merge = false;
17117 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
17118 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
17120 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
17122 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
17123 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17133 /* For the two element vectors, we implement a VEC_CONCAT with
17134 the extraction of the other element. */
17136 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
17137 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
17140 op0 = val, op1 = tmp;
17142 op0 = tmp, op1 = val;
17144 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
17145 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17153 use_vec_merge = true;
17157 /* tmp = target = A B C D */
17158 tmp = copy_to_reg (target);
17159 /* target = A A B B */
17160 emit_insn (gen_sse_unpcklps (target, target, target));
17161 /* target = X A B B */
17162 ix86_expand_vector_set (false, target, val, 0);
17163 /* target = A X C D */
17164 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17165 GEN_INT (1), GEN_INT (0),
17166 GEN_INT (2+4), GEN_INT (3+4)));
17170 /* tmp = target = A B C D */
17171 tmp = copy_to_reg (target);
17172 /* tmp = X B C D */
17173 ix86_expand_vector_set (false, tmp, val, 0);
17174 /* target = A B X D */
17175 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17176 GEN_INT (0), GEN_INT (1),
17177 GEN_INT (0+4), GEN_INT (3+4)));
17181 /* tmp = target = A B C D */
17182 tmp = copy_to_reg (target);
17183 /* tmp = X B C D */
17184 ix86_expand_vector_set (false, tmp, val, 0);
17185 /* target = A B X D */
17186 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17187 GEN_INT (0), GEN_INT (1),
17188 GEN_INT (2+4), GEN_INT (0+4)));
17192 gcc_unreachable ();
17197 /* Element 0 handled by vec_merge below. */
17200 use_vec_merge = true;
17206 /* With SSE2, use integer shuffles to swap element 0 and ELT,
17207 store into element 0, then shuffle them back. */
17211 order[0] = GEN_INT (elt);
17212 order[1] = const1_rtx;
17213 order[2] = const2_rtx;
17214 order[3] = GEN_INT (3);
17215 order[elt] = const0_rtx;
17217 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17218 order[1], order[2], order[3]));
17220 ix86_expand_vector_set (false, target, val, 0);
17222 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17223 order[1], order[2], order[3]));
17227 /* For SSE1, we have to reuse the V4SF code. */
17228 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
17229 gen_lowpart (SFmode, val), elt);
17234 use_vec_merge = TARGET_SSE2;
17237 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17248 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
17249 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
17250 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17254 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17256 emit_move_insn (mem, target);
17258 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17259 emit_move_insn (tmp, val);
17261 emit_move_insn (target, mem);
17266 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
17268 enum machine_mode mode = GET_MODE (vec);
17269 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17270 bool use_vec_extr = false;
17283 use_vec_extr = true;
17295 tmp = gen_reg_rtx (mode);
17296 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
17297 GEN_INT (elt), GEN_INT (elt),
17298 GEN_INT (elt+4), GEN_INT (elt+4)));
17302 tmp = gen_reg_rtx (mode);
17303 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
17307 gcc_unreachable ();
17310 use_vec_extr = true;
17325 tmp = gen_reg_rtx (mode);
17326 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
17327 GEN_INT (elt), GEN_INT (elt),
17328 GEN_INT (elt), GEN_INT (elt)));
17332 tmp = gen_reg_rtx (mode);
17333 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
17337 gcc_unreachable ();
17340 use_vec_extr = true;
17345 /* For SSE1, we have to reuse the V4SF code. */
17346 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
17347 gen_lowpart (V4SFmode, vec), elt);
17353 use_vec_extr = TARGET_SSE2;
17356 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17361 /* ??? Could extract the appropriate HImode element and shift. */
17368 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
17369 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
17371 /* Let the rtl optimizers know about the zero extension performed. */
17372 if (inner_mode == HImode)
17374 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
17375 target = gen_lowpart (SImode, target);
17378 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17382 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17384 emit_move_insn (mem, vec);
17386 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17387 emit_move_insn (target, tmp);
17391 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binar
17392 pattern to reduce; DEST is the destination; IN is the input vector. */
17395 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
17397 rtx tmp1, tmp2, tmp3;
17399 tmp1 = gen_reg_rtx (V4SFmode);
17400 tmp2 = gen_reg_rtx (V4SFmode);
17401 tmp3 = gen_reg_rtx (V4SFmode);
17403 emit_insn (gen_sse_movhlps (tmp1, in, in));
17404 emit_insn (fn (tmp2, tmp1, in));
17406 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
17407 GEN_INT (1), GEN_INT (1),
17408 GEN_INT (1+4), GEN_INT (1+4)));
17409 emit_insn (fn (dest, tmp2, tmp3));
17412 /* Implements target hook vector_mode_supported_p. */
17414 ix86_vector_mode_supported_p (enum machine_mode mode)
17416 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
17418 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
17420 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
17422 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
17427 /* Worker function for TARGET_MD_ASM_CLOBBERS.
17429 We do this in the new i386 backend to maintain source compatibility
17430 with the old cc0-based compiler. */
17433 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
17434 tree inputs ATTRIBUTE_UNUSED,
17437 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
17439 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
17441 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
17446 /* Worker function for REVERSE_CONDITION. */
17449 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
17451 return (mode != CCFPmode && mode != CCFPUmode
17452 ? reverse_condition (code)
17453 : reverse_condition_maybe_unordered (code));
17456 /* Output code to perform an x87 FP register move, from OPERANDS[1]
17460 output_387_reg_move (rtx insn, rtx *operands)
17462 if (REG_P (operands[1])
17463 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17465 if (REGNO (operands[0]) == FIRST_STACK_REG
17466 && TARGET_USE_FFREEP)
17467 return "ffreep\t%y0";
17468 return "fstp\t%y0";
17470 if (STACK_TOP_P (operands[0]))
17471 return "fld%z1\t%y1";
17475 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17476 FP status register is set. */
17479 ix86_emit_fp_unordered_jump (rtx label)
17481 rtx reg = gen_reg_rtx (HImode);
17484 emit_insn (gen_x86_fnstsw_1 (reg));
17486 if (TARGET_USE_SAHF)
17488 emit_insn (gen_x86_sahf_1 (reg));
17490 temp = gen_rtx_REG (CCmode, FLAGS_REG);
17491 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
17495 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
17497 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17498 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
17501 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
17502 gen_rtx_LABEL_REF (VOIDmode, label),
17504 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
17505 emit_jump_insn (temp);
17508 /* Output code to perform a log1p XFmode calculation. */
17510 void ix86_emit_i387_log1p (rtx op0, rtx op1)
17512 rtx label1 = gen_label_rtx ();
17513 rtx label2 = gen_label_rtx ();
17515 rtx tmp = gen_reg_rtx (XFmode);
17516 rtx tmp2 = gen_reg_rtx (XFmode);
17518 emit_insn (gen_absxf2 (tmp, op1));
17519 emit_insn (gen_cmpxf (tmp,
17520 CONST_DOUBLE_FROM_REAL_VALUE (
17521 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
17523 emit_jump_insn (gen_bge (label1));
17525 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17526 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17527 emit_jump (label2);
17529 emit_label (label1);
17530 emit_move_insn (tmp, CONST1_RTX (XFmode));
17531 emit_insn (gen_addxf3 (tmp, op1, tmp));
17532 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17533 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17535 emit_label (label2);
17538 /* Solaris named-section hook. Parameters are as for
17539 named_section_real. */
17542 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17545 /* With Binutils 2.15, the "@unwind" marker must be specified on
17546 every occurrence of the ".eh_frame" section, not just the first
17549 && strcmp (name, ".eh_frame") == 0)
17551 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
17552 flags & SECTION_WRITE ? "aw" : "a");
17555 default_elf_asm_named_section (name, flags, decl);
17558 /* Return the mangling of TYPE if it is an extended fundamental type. */
17560 static const char *
17561 ix86_mangle_fundamental_type (tree type)
17563 switch (TYPE_MODE (type))
17566 /* __float128 is "g". */
17569 /* "long double" or __float80 is "e". */
17576 /* For 32-bit code we can save PIC register setup by using
17577 __stack_chk_fail_local hidden function instead of calling
17578 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
17579 register, so it is better to call __stack_chk_fail directly. */
17582 ix86_stack_protect_fail (void)
17584 return TARGET_64BIT
17585 ? default_external_stack_protect_fail ()
17586 : default_hidden_stack_protect_fail ();
17589 #include "gt-i386.h"