1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_fisttp = m_NOCONA;
529 const int x86_3dnow_a = m_ATHLON_K8;
530 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints = 0;
536 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
537 const int x86_partial_reg_stall = m_PPRO;
538 const int x86_use_loop = m_K6;
539 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541 const int x86_use_mov0 = m_K6;
542 const int x86_use_cltd = ~(m_PENT | m_K6);
543 const int x86_read_modify_write = ~m_PENT;
544 const int x86_read_modify = ~(m_PENT | m_PPRO);
545 const int x86_split_long_moves = m_PPRO;
546 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549 const int x86_qimode_math = ~(0);
550 const int x86_promote_qi_regs = 0;
551 const int x86_himode_math = ~(m_PPRO);
552 const int x86_promote_hi_regs = m_PPRO;
553 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564 const int x86_shift1 = ~m_486;
565 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
571 const int x86_sse_split_regs = m_ATHLON_K8;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
581 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586 const int x86_use_bt = m_ATHLON_K8;
588 /* In case the average insn count for single function invocation is
589 lower than this constant, emit fast (but longer) prologue and
591 #define FAST_PROLOGUE_INSN_COUNT 20
593 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
594 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
595 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
596 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
598 /* Array of the smallest class containing reg number REGNO, indexed by
599 REGNO. Used by REGNO_REG_CLASS in i386.h. */
601 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
604 AREG, DREG, CREG, BREG,
606 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
608 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
609 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
612 /* flags, fpsr, dirflag, frame */
613 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
616 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
618 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
619 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
620 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
624 /* The "default" register map used in 32bit mode. */
626 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
628 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
629 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
630 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
631 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
632 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
633 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
634 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
637 static int const x86_64_int_parameter_registers[6] =
639 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
640 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
643 static int const x86_64_int_return_registers[4] =
645 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
648 /* The "default" register map used in 64bit mode. */
649 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
651 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
652 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
653 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
654 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
655 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
656 8,9,10,11,12,13,14,15, /* extended integer registers */
657 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
660 /* Define the register numbers to be used in Dwarf debugging information.
661 The SVR4 reference port C compiler uses the following register numbers
662 in its Dwarf output code:
663 0 for %eax (gcc regno = 0)
664 1 for %ecx (gcc regno = 2)
665 2 for %edx (gcc regno = 1)
666 3 for %ebx (gcc regno = 3)
667 4 for %esp (gcc regno = 7)
668 5 for %ebp (gcc regno = 6)
669 6 for %esi (gcc regno = 4)
670 7 for %edi (gcc regno = 5)
671 The following three DWARF register numbers are never generated by
672 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
673 believes these numbers have these meanings.
674 8 for %eip (no gcc equivalent)
675 9 for %eflags (gcc regno = 17)
676 10 for %trapno (no gcc equivalent)
677 It is not at all clear how we should number the FP stack registers
678 for the x86 architecture. If the version of SDB on x86/svr4 were
679 a bit less brain dead with respect to floating-point then we would
680 have a precedent to follow with respect to DWARF register numbers
681 for x86 FP registers, but the SDB on x86/svr4 is so completely
682 broken with respect to FP registers that it is hardly worth thinking
683 of it as something to strive for compatibility with.
684 The version of x86/svr4 SDB I have at the moment does (partially)
685 seem to believe that DWARF register number 11 is associated with
686 the x86 register %st(0), but that's about all. Higher DWARF
687 register numbers don't seem to be associated with anything in
688 particular, and even for DWARF regno 11, SDB only seems to under-
689 stand that it should say that a variable lives in %st(0) (when
690 asked via an `=' command) if we said it was in DWARF regno 11,
691 but SDB still prints garbage when asked for the value of the
692 variable in question (via a `/' command).
693 (Also note that the labels SDB prints for various FP stack regs
694 when doing an `x' command are all wrong.)
695 Note that these problems generally don't affect the native SVR4
696 C compiler because it doesn't allow the use of -O with -g and
697 because when it is *not* optimizing, it allocates a memory
698 location for each floating-point variable, and the memory
699 location is what gets described in the DWARF AT_location
700 attribute for the variable in question.
701 Regardless of the severe mental illness of the x86/svr4 SDB, we
702 do something sensible here and we use the following DWARF
703 register numbers. Note that these are all stack-top-relative
705 11 for %st(0) (gcc regno = 8)
706 12 for %st(1) (gcc regno = 9)
707 13 for %st(2) (gcc regno = 10)
708 14 for %st(3) (gcc regno = 11)
709 15 for %st(4) (gcc regno = 12)
710 16 for %st(5) (gcc regno = 13)
711 17 for %st(6) (gcc regno = 14)
712 18 for %st(7) (gcc regno = 15)
714 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
716 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
717 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
718 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
719 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
720 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
722 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
725 /* Test and compare insns in i386.md store the information needed to
726 generate branch and scc insns here. */
728 rtx ix86_compare_op0 = NULL_RTX;
729 rtx ix86_compare_op1 = NULL_RTX;
731 #define MAX_386_STACK_LOCALS 3
732 /* Size of the register save area. */
733 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
735 /* Define the structure for the machine field in struct function. */
737 struct stack_local_entry GTY(())
742 struct stack_local_entry *next;
745 /* Structure describing stack frame layout.
746 Stack grows downward:
752 saved frame pointer if frame_pointer_needed
753 <- HARD_FRAME_POINTER
759 > to_allocate <- FRAME_POINTER
771 int outgoing_arguments_size;
774 HOST_WIDE_INT to_allocate;
775 /* The offsets relative to ARG_POINTER. */
776 HOST_WIDE_INT frame_pointer_offset;
777 HOST_WIDE_INT hard_frame_pointer_offset;
778 HOST_WIDE_INT stack_pointer_offset;
780 /* When save_regs_using_mov is set, emit prologue using
781 move instead of push instructions. */
782 bool save_regs_using_mov;
785 /* Used to enable/disable debugging features. */
786 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
787 /* Code model option as passed by user. */
788 const char *ix86_cmodel_string;
790 enum cmodel ix86_cmodel;
792 const char *ix86_asm_string;
793 enum asm_dialect ix86_asm_dialect = ASM_ATT;
795 const char *ix86_tls_dialect_string;
796 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
798 /* Which unit we are generating floating point math for. */
799 enum fpmath_unit ix86_fpmath;
801 /* Which cpu are we scheduling for. */
802 enum processor_type ix86_tune;
803 /* Which instruction set architecture to use. */
804 enum processor_type ix86_arch;
806 /* Strings to hold which cpu and instruction set architecture to use. */
807 const char *ix86_tune_string; /* for -mtune=<xxx> */
808 const char *ix86_arch_string; /* for -march=<xxx> */
809 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
811 /* # of registers to use to pass arguments. */
812 const char *ix86_regparm_string;
814 /* true if sse prefetch instruction is not NOOP. */
815 int x86_prefetch_sse;
817 /* ix86_regparm_string as a number */
820 /* Alignment to use for loops and jumps: */
822 /* Power of two alignment for loops. */
823 const char *ix86_align_loops_string;
825 /* Power of two alignment for non-loop jumps. */
826 const char *ix86_align_jumps_string;
828 /* Power of two alignment for stack boundary in bytes. */
829 const char *ix86_preferred_stack_boundary_string;
831 /* Preferred alignment for stack boundary in bits. */
832 unsigned int ix86_preferred_stack_boundary;
834 /* Values 1-5: see jump.c */
835 int ix86_branch_cost;
836 const char *ix86_branch_cost_string;
838 /* Power of two alignment for functions. */
839 const char *ix86_align_funcs_string;
841 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
842 char internal_label_prefix[16];
843 int internal_label_prefix_len;
845 static void output_pic_addr_const (FILE *, rtx, int);
846 static void put_condition_code (enum rtx_code, enum machine_mode,
848 static const char *get_some_local_dynamic_name (void);
849 static int get_some_local_dynamic_name_1 (rtx *, void *);
850 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
851 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
853 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
854 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
856 static rtx get_thread_pointer (int);
857 static rtx legitimize_tls_address (rtx, enum tls_model, int);
858 static void get_pc_thunk_name (char [32], unsigned int);
859 static rtx gen_push (rtx);
860 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
861 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
862 static struct machine_function * ix86_init_machine_status (void);
863 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
864 static int ix86_nsaved_regs (void);
865 static void ix86_emit_save_regs (void);
866 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
867 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
868 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
869 static HOST_WIDE_INT ix86_GOT_alias_set (void);
870 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
871 static rtx ix86_expand_aligntest (rtx, int);
872 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
873 static int ix86_issue_rate (void);
874 static int ix86_adjust_cost (rtx, rtx, rtx, int);
875 static int ia32_multipass_dfa_lookahead (void);
876 static void ix86_init_mmx_sse_builtins (void);
877 static rtx x86_this_parameter (tree);
878 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
879 HOST_WIDE_INT, tree);
880 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
881 static void x86_file_start (void);
882 static void ix86_reorg (void);
883 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
884 static tree ix86_build_builtin_va_list (void);
885 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
887 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
888 static bool ix86_vector_mode_supported_p (enum machine_mode);
890 static int ix86_address_cost (rtx);
891 static bool ix86_cannot_force_const_mem (rtx);
892 static rtx ix86_delegitimize_address (rtx);
894 struct builtin_description;
895 static rtx ix86_expand_sse_comi (const struct builtin_description *,
897 static rtx ix86_expand_sse_compare (const struct builtin_description *,
899 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
900 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
901 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
902 static rtx ix86_expand_store_builtin (enum insn_code, tree);
903 static rtx safe_vector_operand (rtx, enum machine_mode);
904 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
905 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
906 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
907 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
908 static int ix86_fp_comparison_cost (enum rtx_code code);
909 static unsigned int ix86_select_alt_pic_regnum (void);
910 static int ix86_save_reg (unsigned int, int);
911 static void ix86_compute_frame_layout (struct ix86_frame *);
912 static int ix86_comp_type_attributes (tree, tree);
913 static int ix86_function_regparm (tree, tree);
914 const struct attribute_spec ix86_attribute_table[];
915 static bool ix86_function_ok_for_sibcall (tree, tree);
916 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
917 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
918 static int ix86_value_regno (enum machine_mode);
919 static bool contains_128bit_aligned_vector_p (tree);
920 static rtx ix86_struct_value_rtx (tree, int);
921 static bool ix86_ms_bitfield_layout_p (tree);
922 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
923 static int extended_reg_mentioned_1 (rtx *, void *);
924 static bool ix86_rtx_costs (rtx, int, int, int *);
925 static int min_insn_size (rtx);
926 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
927 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
928 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
930 static void ix86_init_builtins (void);
931 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
933 /* This function is only used on Solaris. */
934 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
937 /* Register class used for passing given 64bit part of the argument.
938 These represent classes as documented by the PS ABI, with the exception
939 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
940 use SF or DFmode move instead of DImode to avoid reformatting penalties.
942 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
943 whenever possible (upper half does contain padding).
945 enum x86_64_reg_class
948 X86_64_INTEGER_CLASS,
949 X86_64_INTEGERSI_CLASS,
956 X86_64_COMPLEX_X87_CLASS,
959 static const char * const x86_64_reg_class_name[] = {
960 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
961 "sseup", "x87", "x87up", "cplx87", "no"
964 #define MAX_CLASSES 4
966 /* Table of constants used by fldpi, fldln2, etc.... */
967 static REAL_VALUE_TYPE ext_80387_constants_table [5];
968 static bool ext_80387_constants_init = 0;
969 static void init_ext_80387_constants (void);
971 /* Initialize the GCC target structure. */
972 #undef TARGET_ATTRIBUTE_TABLE
973 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
974 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
975 # undef TARGET_MERGE_DECL_ATTRIBUTES
976 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
979 #undef TARGET_COMP_TYPE_ATTRIBUTES
980 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
982 #undef TARGET_INIT_BUILTINS
983 #define TARGET_INIT_BUILTINS ix86_init_builtins
984 #undef TARGET_EXPAND_BUILTIN
985 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
987 #undef TARGET_ASM_FUNCTION_EPILOGUE
988 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
990 #undef TARGET_ASM_OPEN_PAREN
991 #define TARGET_ASM_OPEN_PAREN ""
992 #undef TARGET_ASM_CLOSE_PAREN
993 #define TARGET_ASM_CLOSE_PAREN ""
995 #undef TARGET_ASM_ALIGNED_HI_OP
996 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
997 #undef TARGET_ASM_ALIGNED_SI_OP
998 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1000 #undef TARGET_ASM_ALIGNED_DI_OP
1001 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1004 #undef TARGET_ASM_UNALIGNED_HI_OP
1005 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1006 #undef TARGET_ASM_UNALIGNED_SI_OP
1007 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1008 #undef TARGET_ASM_UNALIGNED_DI_OP
1009 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1011 #undef TARGET_SCHED_ADJUST_COST
1012 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1013 #undef TARGET_SCHED_ISSUE_RATE
1014 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1015 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1016 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1017 ia32_multipass_dfa_lookahead
1019 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1020 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1023 #undef TARGET_HAVE_TLS
1024 #define TARGET_HAVE_TLS true
1026 #undef TARGET_CANNOT_FORCE_CONST_MEM
1027 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1029 #undef TARGET_DELEGITIMIZE_ADDRESS
1030 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1032 #undef TARGET_MS_BITFIELD_LAYOUT_P
1033 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1035 #undef TARGET_ASM_OUTPUT_MI_THUNK
1036 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1037 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1038 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1040 #undef TARGET_ASM_FILE_START
1041 #define TARGET_ASM_FILE_START x86_file_start
1043 #undef TARGET_RTX_COSTS
1044 #define TARGET_RTX_COSTS ix86_rtx_costs
1045 #undef TARGET_ADDRESS_COST
1046 #define TARGET_ADDRESS_COST ix86_address_cost
1048 #undef TARGET_FIXED_CONDITION_CODE_REGS
1049 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1050 #undef TARGET_CC_MODES_COMPATIBLE
1051 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1053 #undef TARGET_MACHINE_DEPENDENT_REORG
1054 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1056 #undef TARGET_BUILD_BUILTIN_VA_LIST
1057 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1059 #undef TARGET_MD_ASM_CLOBBERS
1060 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1062 #undef TARGET_PROMOTE_PROTOTYPES
1063 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1064 #undef TARGET_STRUCT_VALUE_RTX
1065 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1066 #undef TARGET_SETUP_INCOMING_VARARGS
1067 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1068 #undef TARGET_MUST_PASS_IN_STACK
1069 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1070 #undef TARGET_PASS_BY_REFERENCE
1071 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1073 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1074 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1076 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1077 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1079 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1080 #undef TARGET_INSERT_ATTRIBUTES
1081 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1084 struct gcc_target targetm = TARGET_INITIALIZER;
1087 /* The svr4 ABI for the i386 says that records and unions are returned
1089 #ifndef DEFAULT_PCC_STRUCT_RETURN
1090 #define DEFAULT_PCC_STRUCT_RETURN 1
1093 /* Sometimes certain combinations of command options do not make
1094 sense on a particular target machine. You can define a macro
1095 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1096 defined, is executed once just after all the command options have
1099 Don't use this macro to turn on various extra optimizations for
1100 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1103 override_options (void)
1106 int ix86_tune_defaulted = 0;
1108 /* Comes from final.c -- no real reason to change it. */
1109 #define MAX_CODE_ALIGN 16
1113 const struct processor_costs *cost; /* Processor costs */
1114 const int target_enable; /* Target flags to enable. */
1115 const int target_disable; /* Target flags to disable. */
1116 const int align_loop; /* Default alignments. */
1117 const int align_loop_max_skip;
1118 const int align_jump;
1119 const int align_jump_max_skip;
1120 const int align_func;
1122 const processor_target_table[PROCESSOR_max] =
1124 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1125 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1126 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1127 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1128 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1129 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1130 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1131 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1132 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1135 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1138 const char *const name; /* processor name or nickname. */
1139 const enum processor_type processor;
1140 const enum pta_flags
1146 PTA_PREFETCH_SSE = 16,
1152 const processor_alias_table[] =
1154 {"i386", PROCESSOR_I386, 0},
1155 {"i486", PROCESSOR_I486, 0},
1156 {"i586", PROCESSOR_PENTIUM, 0},
1157 {"pentium", PROCESSOR_PENTIUM, 0},
1158 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1159 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1160 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1161 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1162 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1163 {"i686", PROCESSOR_PENTIUMPRO, 0},
1164 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1165 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1166 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1167 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1168 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1169 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1170 | PTA_MMX | PTA_PREFETCH_SSE},
1171 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1172 | PTA_MMX | PTA_PREFETCH_SSE},
1173 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1176 | PTA_MMX | PTA_PREFETCH_SSE},
1177 {"k6", PROCESSOR_K6, PTA_MMX},
1178 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1179 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1180 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1182 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1183 | PTA_3DNOW | PTA_3DNOW_A},
1184 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1185 | PTA_3DNOW_A | PTA_SSE},
1186 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1187 | PTA_3DNOW_A | PTA_SSE},
1188 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1189 | PTA_3DNOW_A | PTA_SSE},
1190 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1191 | PTA_SSE | PTA_SSE2 },
1192 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1195 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1196 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1198 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1199 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1202 int const pta_size = ARRAY_SIZE (processor_alias_table);
1204 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1205 SUBTARGET_OVERRIDE_OPTIONS;
1208 /* Set the default values for switches whose default depends on TARGET_64BIT
1209 in case they weren't overwritten by command line options. */
1212 if (flag_omit_frame_pointer == 2)
1213 flag_omit_frame_pointer = 1;
1214 if (flag_asynchronous_unwind_tables == 2)
1215 flag_asynchronous_unwind_tables = 1;
1216 if (flag_pcc_struct_return == 2)
1217 flag_pcc_struct_return = 0;
1221 if (flag_omit_frame_pointer == 2)
1222 flag_omit_frame_pointer = 0;
1223 if (flag_asynchronous_unwind_tables == 2)
1224 flag_asynchronous_unwind_tables = 0;
1225 if (flag_pcc_struct_return == 2)
1226 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1229 if (!ix86_tune_string && ix86_arch_string)
1230 ix86_tune_string = ix86_arch_string;
1231 if (!ix86_tune_string)
1233 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1234 ix86_tune_defaulted = 1;
1236 if (!ix86_arch_string)
1237 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1239 if (ix86_cmodel_string != 0)
1241 if (!strcmp (ix86_cmodel_string, "small"))
1242 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1244 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1245 else if (!strcmp (ix86_cmodel_string, "32"))
1246 ix86_cmodel = CM_32;
1247 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1248 ix86_cmodel = CM_KERNEL;
1249 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1250 ix86_cmodel = CM_MEDIUM;
1251 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1252 ix86_cmodel = CM_LARGE;
1254 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1258 ix86_cmodel = CM_32;
1260 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1262 if (ix86_asm_string != 0)
1264 if (!strcmp (ix86_asm_string, "intel"))
1265 ix86_asm_dialect = ASM_INTEL;
1266 else if (!strcmp (ix86_asm_string, "att"))
1267 ix86_asm_dialect = ASM_ATT;
1269 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1271 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1272 error ("code model %qs not supported in the %s bit mode",
1273 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1274 if (ix86_cmodel == CM_LARGE)
1275 sorry ("code model %<large%> not supported yet");
1276 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1277 sorry ("%i-bit mode not compiled in",
1278 (target_flags & MASK_64BIT) ? 64 : 32);
1280 for (i = 0; i < pta_size; i++)
1281 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1283 ix86_arch = processor_alias_table[i].processor;
1284 /* Default cpu tuning to the architecture. */
1285 ix86_tune = ix86_arch;
1286 if (processor_alias_table[i].flags & PTA_MMX
1287 && !(target_flags_explicit & MASK_MMX))
1288 target_flags |= MASK_MMX;
1289 if (processor_alias_table[i].flags & PTA_3DNOW
1290 && !(target_flags_explicit & MASK_3DNOW))
1291 target_flags |= MASK_3DNOW;
1292 if (processor_alias_table[i].flags & PTA_3DNOW_A
1293 && !(target_flags_explicit & MASK_3DNOW_A))
1294 target_flags |= MASK_3DNOW_A;
1295 if (processor_alias_table[i].flags & PTA_SSE
1296 && !(target_flags_explicit & MASK_SSE))
1297 target_flags |= MASK_SSE;
1298 if (processor_alias_table[i].flags & PTA_SSE2
1299 && !(target_flags_explicit & MASK_SSE2))
1300 target_flags |= MASK_SSE2;
1301 if (processor_alias_table[i].flags & PTA_SSE3
1302 && !(target_flags_explicit & MASK_SSE3))
1303 target_flags |= MASK_SSE3;
1304 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1305 x86_prefetch_sse = true;
1306 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1307 error ("CPU you selected does not support x86-64 "
1313 error ("bad value (%s) for -march= switch", ix86_arch_string);
1315 for (i = 0; i < pta_size; i++)
1316 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1318 ix86_tune = processor_alias_table[i].processor;
1319 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1321 if (ix86_tune_defaulted)
1323 ix86_tune_string = "x86-64";
1324 for (i = 0; i < pta_size; i++)
1325 if (! strcmp (ix86_tune_string,
1326 processor_alias_table[i].name))
1328 ix86_tune = processor_alias_table[i].processor;
1331 error ("CPU you selected does not support x86-64 "
1334 /* Intel CPUs have always interpreted SSE prefetch instructions as
1335 NOPs; so, we can enable SSE prefetch instructions even when
1336 -mtune (rather than -march) points us to a processor that has them.
1337 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1338 higher processors. */
1339 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1340 x86_prefetch_sse = true;
1344 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1347 ix86_cost = &size_cost;
1349 ix86_cost = processor_target_table[ix86_tune].cost;
1350 target_flags |= processor_target_table[ix86_tune].target_enable;
1351 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1353 /* Arrange to set up i386_stack_locals for all functions. */
1354 init_machine_status = ix86_init_machine_status;
1356 /* Validate -mregparm= value. */
1357 if (ix86_regparm_string)
1359 i = atoi (ix86_regparm_string);
1360 if (i < 0 || i > REGPARM_MAX)
1361 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1367 ix86_regparm = REGPARM_MAX;
1369 /* If the user has provided any of the -malign-* options,
1370 warn and use that value only if -falign-* is not set.
1371 Remove this code in GCC 3.2 or later. */
1372 if (ix86_align_loops_string)
1374 warning ("-malign-loops is obsolete, use -falign-loops");
1375 if (align_loops == 0)
1377 i = atoi (ix86_align_loops_string);
1378 if (i < 0 || i > MAX_CODE_ALIGN)
1379 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1381 align_loops = 1 << i;
1385 if (ix86_align_jumps_string)
1387 warning ("-malign-jumps is obsolete, use -falign-jumps");
1388 if (align_jumps == 0)
1390 i = atoi (ix86_align_jumps_string);
1391 if (i < 0 || i > MAX_CODE_ALIGN)
1392 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1394 align_jumps = 1 << i;
1398 if (ix86_align_funcs_string)
1400 warning ("-malign-functions is obsolete, use -falign-functions");
1401 if (align_functions == 0)
1403 i = atoi (ix86_align_funcs_string);
1404 if (i < 0 || i > MAX_CODE_ALIGN)
1405 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1407 align_functions = 1 << i;
1411 /* Default align_* from the processor table. */
1412 if (align_loops == 0)
1414 align_loops = processor_target_table[ix86_tune].align_loop;
1415 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1417 if (align_jumps == 0)
1419 align_jumps = processor_target_table[ix86_tune].align_jump;
1420 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1422 if (align_functions == 0)
1424 align_functions = processor_target_table[ix86_tune].align_func;
1427 /* Validate -mpreferred-stack-boundary= value, or provide default.
1428 The default of 128 bits is for Pentium III's SSE __m128, but we
1429 don't want additional code to keep the stack aligned when
1430 optimizing for code size. */
1431 ix86_preferred_stack_boundary = (optimize_size
1432 ? TARGET_64BIT ? 128 : 32
1434 if (ix86_preferred_stack_boundary_string)
1436 i = atoi (ix86_preferred_stack_boundary_string);
1437 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1438 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1439 TARGET_64BIT ? 4 : 2);
1441 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1444 /* Validate -mbranch-cost= value, or provide default. */
1445 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1446 if (ix86_branch_cost_string)
1448 i = atoi (ix86_branch_cost_string);
1450 error ("-mbranch-cost=%d is not between 0 and 5", i);
1452 ix86_branch_cost = i;
1455 if (ix86_tls_dialect_string)
1457 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1458 ix86_tls_dialect = TLS_DIALECT_GNU;
1459 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1460 ix86_tls_dialect = TLS_DIALECT_SUN;
1462 error ("bad value (%s) for -mtls-dialect= switch",
1463 ix86_tls_dialect_string);
1466 /* Keep nonleaf frame pointers. */
1467 if (flag_omit_frame_pointer)
1468 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1469 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1470 flag_omit_frame_pointer = 1;
1472 /* If we're doing fast math, we don't care about comparison order
1473 wrt NaNs. This lets us use a shorter comparison sequence. */
1474 if (flag_unsafe_math_optimizations)
1475 target_flags &= ~MASK_IEEE_FP;
1477 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1478 since the insns won't need emulation. */
1479 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1480 target_flags &= ~MASK_NO_FANCY_MATH_387;
1482 /* Likewise, if the target doesn't have a 387, or we've specified
1483 software floating point, don't use 387 inline instrinsics. */
1485 target_flags |= MASK_NO_FANCY_MATH_387;
1487 /* Turn on SSE2 builtins for -msse3. */
1489 target_flags |= MASK_SSE2;
1491 /* Turn on SSE builtins for -msse2. */
1493 target_flags |= MASK_SSE;
1495 /* Turn on MMX builtins for -msse. */
1498 target_flags |= MASK_MMX & ~target_flags_explicit;
1499 x86_prefetch_sse = true;
1502 /* Turn on MMX builtins for 3Dnow. */
1504 target_flags |= MASK_MMX;
1508 if (TARGET_ALIGN_DOUBLE)
1509 error ("-malign-double makes no sense in the 64bit mode");
1511 error ("-mrtd calling convention not supported in the 64bit mode");
1513 /* Enable by default the SSE and MMX builtins. Do allow the user to
1514 explicitly disable any of these. In particular, disabling SSE and
1515 MMX for kernel code is extremely useful. */
1517 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1518 & ~target_flags_explicit);
1521 ix86_fpmath = FPMATH_SSE;
1525 ix86_fpmath = FPMATH_387;
1526 /* i386 ABI does not specify red zone. It still makes sense to use it
1527 when programmer takes care to stack from being destroyed. */
1528 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1529 target_flags |= MASK_NO_RED_ZONE;
1532 if (ix86_fpmath_string != 0)
1534 if (! strcmp (ix86_fpmath_string, "387"))
1535 ix86_fpmath = FPMATH_387;
1536 else if (! strcmp (ix86_fpmath_string, "sse"))
1540 warning ("SSE instruction set disabled, using 387 arithmetics");
1541 ix86_fpmath = FPMATH_387;
1544 ix86_fpmath = FPMATH_SSE;
1546 else if (! strcmp (ix86_fpmath_string, "387,sse")
1547 || ! strcmp (ix86_fpmath_string, "sse,387"))
1551 warning ("SSE instruction set disabled, using 387 arithmetics");
1552 ix86_fpmath = FPMATH_387;
1554 else if (!TARGET_80387)
1556 warning ("387 instruction set disabled, using SSE arithmetics");
1557 ix86_fpmath = FPMATH_SSE;
1560 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1563 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1566 /* If the i387 is disabled, then do not return values in it. */
1568 target_flags &= ~MASK_FLOAT_RETURNS;
1570 if ((x86_accumulate_outgoing_args & TUNEMASK)
1571 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1573 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1575 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1578 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1579 p = strchr (internal_label_prefix, 'X');
1580 internal_label_prefix_len = p - internal_label_prefix;
1584 /* When scheduling description is not available, disable scheduler pass
1585 so it won't slow down the compilation and make x87 code slower. */
1586 if (!TARGET_SCHEDULE)
1587 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1591 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1593 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1594 make the problem with not enough registers even worse. */
1595 #ifdef INSN_SCHEDULING
1597 flag_schedule_insns = 0;
1600 /* The default values of these switches depend on the TARGET_64BIT
1601 that is not known at this moment. Mark these values with 2 and
1602 let user the to override these. In case there is no command line option
1603 specifying them, we will set the defaults in override_options. */
1605 flag_omit_frame_pointer = 2;
1606 flag_pcc_struct_return = 2;
1607 flag_asynchronous_unwind_tables = 2;
1608 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1609 SUBTARGET_OPTIMIZATION_OPTIONS;
1613 /* Table of valid machine attributes. */
1614 const struct attribute_spec ix86_attribute_table[] =
1616 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1617 /* Stdcall attribute says callee is responsible for popping arguments
1618 if they are not variable. */
1619 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1620 /* Fastcall attribute says callee is responsible for popping arguments
1621 if they are not variable. */
1622 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1623 /* Cdecl attribute says the callee is a normal C declaration */
1624 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1625 /* Regparm attribute specifies how many integer arguments are to be
1626 passed in registers. */
1627 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1628 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1629 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1630 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1631 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1633 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1634 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1635 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1636 SUBTARGET_ATTRIBUTE_TABLE,
1638 { NULL, 0, 0, false, false, false, NULL }
1641 /* Decide whether we can make a sibling call to a function. DECL is the
1642 declaration of the function being targeted by the call and EXP is the
1643 CALL_EXPR representing the call. */
1646 ix86_function_ok_for_sibcall (tree decl, tree exp)
1648 /* If we are generating position-independent code, we cannot sibcall
1649 optimize any indirect call, or a direct call to a global function,
1650 as the PLT requires %ebx be live. */
1651 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1654 /* If we are returning floats on the 80387 register stack, we cannot
1655 make a sibcall from a function that doesn't return a float to a
1656 function that does or, conversely, from a function that does return
1657 a float to a function that doesn't; the necessary stack adjustment
1658 would not be executed. */
1659 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1660 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1663 /* If this call is indirect, we'll need to be able to use a call-clobbered
1664 register for the address of the target function. Make sure that all
1665 such registers are not used for passing parameters. */
1666 if (!decl && !TARGET_64BIT)
1670 /* We're looking at the CALL_EXPR, we need the type of the function. */
1671 type = TREE_OPERAND (exp, 0); /* pointer expression */
1672 type = TREE_TYPE (type); /* pointer type */
1673 type = TREE_TYPE (type); /* function type */
1675 if (ix86_function_regparm (type, NULL) >= 3)
1677 /* ??? Need to count the actual number of registers to be used,
1678 not the possible number of registers. Fix later. */
1683 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1684 /* Dllimport'd functions are also called indirectly. */
1685 if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
1686 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1690 /* Otherwise okay. That also includes certain types of indirect calls. */
1694 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1695 arguments as in struct attribute_spec.handler. */
1697 ix86_handle_cdecl_attribute (tree *node, tree name,
1698 tree args ATTRIBUTE_UNUSED,
1699 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1701 if (TREE_CODE (*node) != FUNCTION_TYPE
1702 && TREE_CODE (*node) != METHOD_TYPE
1703 && TREE_CODE (*node) != FIELD_DECL
1704 && TREE_CODE (*node) != TYPE_DECL)
1706 warning ("%qs attribute only applies to functions",
1707 IDENTIFIER_POINTER (name));
1708 *no_add_attrs = true;
1712 if (is_attribute_p ("fastcall", name))
1714 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1716 error ("fastcall and stdcall attributes are not compatible");
1718 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1720 error ("fastcall and regparm attributes are not compatible");
1723 else if (is_attribute_p ("stdcall", name))
1725 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1727 error ("fastcall and stdcall attributes are not compatible");
1734 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1735 *no_add_attrs = true;
1741 /* Handle a "regparm" attribute;
1742 arguments as in struct attribute_spec.handler. */
1744 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1745 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1747 if (TREE_CODE (*node) != FUNCTION_TYPE
1748 && TREE_CODE (*node) != METHOD_TYPE
1749 && TREE_CODE (*node) != FIELD_DECL
1750 && TREE_CODE (*node) != TYPE_DECL)
1752 warning ("%qs attribute only applies to functions",
1753 IDENTIFIER_POINTER (name));
1754 *no_add_attrs = true;
1760 cst = TREE_VALUE (args);
1761 if (TREE_CODE (cst) != INTEGER_CST)
1763 warning ("%qs attribute requires an integer constant argument",
1764 IDENTIFIER_POINTER (name));
1765 *no_add_attrs = true;
1767 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1769 warning ("argument to %qs attribute larger than %d",
1770 IDENTIFIER_POINTER (name), REGPARM_MAX);
1771 *no_add_attrs = true;
1774 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1776 error ("fastcall and regparm attributes are not compatible");
1783 /* Return 0 if the attributes for two types are incompatible, 1 if they
1784 are compatible, and 2 if they are nearly compatible (which causes a
1785 warning to be generated). */
1788 ix86_comp_type_attributes (tree type1, tree type2)
1790 /* Check for mismatch of non-default calling convention. */
1791 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1793 if (TREE_CODE (type1) != FUNCTION_TYPE)
1796 /* Check for mismatched fastcall types */
1797 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1798 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1801 /* Check for mismatched return types (cdecl vs stdcall). */
1802 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1803 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1805 if (ix86_function_regparm (type1, NULL)
1806 != ix86_function_regparm (type2, NULL))
1811 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1812 DECL may be NULL when calling function indirectly
1813 or considering a libcall. */
1816 ix86_function_regparm (tree type, tree decl)
1819 int regparm = ix86_regparm;
1820 bool user_convention = false;
1824 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1827 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1828 user_convention = true;
1831 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1834 user_convention = true;
1837 /* Use register calling convention for local functions when possible. */
1838 if (!TARGET_64BIT && !user_convention && decl
1839 && flag_unit_at_a_time && !profile_flag)
1841 struct cgraph_local_info *i = cgraph_local_info (decl);
1844 /* We can't use regparm(3) for nested functions as these use
1845 static chain pointer in third argument. */
1846 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1856 /* Return true if EAX is live at the start of the function. Used by
1857 ix86_expand_prologue to determine if we need special help before
1858 calling allocate_stack_worker. */
1861 ix86_eax_live_at_start_p (void)
1863 /* Cheat. Don't bother working forward from ix86_function_regparm
1864 to the function type to whether an actual argument is located in
1865 eax. Instead just look at cfg info, which is still close enough
1866 to correct at this point. This gives false positives for broken
1867 functions that might use uninitialized data that happens to be
1868 allocated in eax, but who cares? */
1869 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1872 /* Value is the number of bytes of arguments automatically
1873 popped when returning from a subroutine call.
1874 FUNDECL is the declaration node of the function (as a tree),
1875 FUNTYPE is the data type of the function (as a tree),
1876 or for a library call it is an identifier node for the subroutine name.
1877 SIZE is the number of bytes of arguments passed on the stack.
1879 On the 80386, the RTD insn may be used to pop them if the number
1880 of args is fixed, but if the number is variable then the caller
1881 must pop them all. RTD can't be used for library calls now
1882 because the library is compiled with the Unix compiler.
1883 Use of RTD is a selectable option, since it is incompatible with
1884 standard Unix calling sequences. If the option is not selected,
1885 the caller must always pop the args.
1887 The attribute stdcall is equivalent to RTD on a per module basis. */
1890 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1892 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1894 /* Cdecl functions override -mrtd, and never pop the stack. */
1895 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1897 /* Stdcall and fastcall functions will pop the stack if not
1899 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1900 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1904 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1905 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1906 == void_type_node)))
1910 /* Lose any fake structure return argument if it is passed on the stack. */
1911 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1913 && !KEEP_AGGREGATE_RETURN_POINTER)
1915 int nregs = ix86_function_regparm (funtype, fundecl);
1918 return GET_MODE_SIZE (Pmode);
1924 /* Argument support functions. */
1926 /* Return true when register may be used to pass function parameters. */
1928 ix86_function_arg_regno_p (int regno)
1932 return (regno < REGPARM_MAX
1933 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1934 if (SSE_REGNO_P (regno) && TARGET_SSE)
1936 /* RAX is used as hidden argument to va_arg functions. */
1939 for (i = 0; i < REGPARM_MAX; i++)
1940 if (regno == x86_64_int_parameter_registers[i])
1945 /* Return if we do not know how to pass TYPE solely in registers. */
1948 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1950 if (must_pass_in_stack_var_size_or_pad (mode, type))
1953 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1954 The layout_type routine is crafty and tries to trick us into passing
1955 currently unsupported vector types on the stack by using TImode. */
1956 return (!TARGET_64BIT && mode == TImode
1957 && type && TREE_CODE (type) != VECTOR_TYPE);
1960 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1961 for a call to a function whose data type is FNTYPE.
1962 For a library call, FNTYPE is 0. */
1965 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1966 tree fntype, /* tree ptr for function decl */
1967 rtx libname, /* SYMBOL_REF of library name or 0 */
1970 static CUMULATIVE_ARGS zero_cum;
1971 tree param, next_param;
1973 if (TARGET_DEBUG_ARG)
1975 fprintf (stderr, "\ninit_cumulative_args (");
1977 fprintf (stderr, "fntype code = %s, ret code = %s",
1978 tree_code_name[(int) TREE_CODE (fntype)],
1979 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1981 fprintf (stderr, "no fntype");
1984 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1989 /* Set up the number of registers to use for passing arguments. */
1991 cum->nregs = ix86_function_regparm (fntype, fndecl);
1993 cum->nregs = ix86_regparm;
1995 cum->sse_nregs = SSE_REGPARM_MAX;
1997 cum->mmx_nregs = MMX_REGPARM_MAX;
1998 cum->warn_sse = true;
1999 cum->warn_mmx = true;
2000 cum->maybe_vaarg = false;
2002 /* Use ecx and edx registers if function has fastcall attribute */
2003 if (fntype && !TARGET_64BIT)
2005 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2012 /* Determine if this function has variable arguments. This is
2013 indicated by the last argument being 'void_type_mode' if there
2014 are no variable arguments. If there are variable arguments, then
2015 we won't pass anything in registers in 32-bit mode. */
2017 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2019 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2020 param != 0; param = next_param)
2022 next_param = TREE_CHAIN (param);
2023 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2034 cum->maybe_vaarg = true;
2038 if ((!fntype && !libname)
2039 || (fntype && !TYPE_ARG_TYPES (fntype)))
2040 cum->maybe_vaarg = 1;
2042 if (TARGET_DEBUG_ARG)
2043 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2048 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2049 But in the case of vector types, it is some vector mode.
2051 When we have only some of our vector isa extensions enabled, then there
2052 are some modes for which vector_mode_supported_p is false. For these
2053 modes, the generic vector support in gcc will choose some non-vector mode
2054 in order to implement the type. By computing the natural mode, we'll
2055 select the proper ABI location for the operand and not depend on whatever
2056 the middle-end decides to do with these vector types. */
2058 static enum machine_mode
2059 type_natural_mode (tree type)
2061 enum machine_mode mode = TYPE_MODE (type);
2063 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2065 HOST_WIDE_INT size = int_size_in_bytes (type);
2066 if ((size == 8 || size == 16)
2067 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2068 && TYPE_VECTOR_SUBPARTS (type) > 1)
2070 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2072 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2073 mode = MIN_MODE_VECTOR_FLOAT;
2075 mode = MIN_MODE_VECTOR_INT;
2077 /* Get the mode which has this inner mode and number of units. */
2078 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2079 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2080 && GET_MODE_INNER (mode) == innermode)
2090 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2091 this may not agree with the mode that the type system has chosen for the
2092 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2093 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2096 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2101 if (orig_mode != BLKmode)
2102 tmp = gen_rtx_REG (orig_mode, regno);
2105 tmp = gen_rtx_REG (mode, regno);
2106 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2107 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2113 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2114 of this code is to classify each 8bytes of incoming argument by the register
2115 class and assign registers accordingly. */
2117 /* Return the union class of CLASS1 and CLASS2.
2118 See the x86-64 PS ABI for details. */
2120 static enum x86_64_reg_class
2121 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2123 /* Rule #1: If both classes are equal, this is the resulting class. */
2124 if (class1 == class2)
2127 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2129 if (class1 == X86_64_NO_CLASS)
2131 if (class2 == X86_64_NO_CLASS)
2134 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2135 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2136 return X86_64_MEMORY_CLASS;
2138 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2139 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2140 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2141 return X86_64_INTEGERSI_CLASS;
2142 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2143 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2144 return X86_64_INTEGER_CLASS;
2146 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2148 if (class1 == X86_64_X87_CLASS
2149 || class1 == X86_64_X87UP_CLASS
2150 || class1 == X86_64_COMPLEX_X87_CLASS
2151 || class2 == X86_64_X87_CLASS
2152 || class2 == X86_64_X87UP_CLASS
2153 || class2 == X86_64_COMPLEX_X87_CLASS)
2154 return X86_64_MEMORY_CLASS;
2156 /* Rule #6: Otherwise class SSE is used. */
2157 return X86_64_SSE_CLASS;
2160 /* Classify the argument of type TYPE and mode MODE.
2161 CLASSES will be filled by the register class used to pass each word
2162 of the operand. The number of words is returned. In case the parameter
2163 should be passed in memory, 0 is returned. As a special case for zero
2164 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2166 BIT_OFFSET is used internally for handling records and specifies offset
2167 of the offset in bits modulo 256 to avoid overflow cases.
2169 See the x86-64 PS ABI for details.
2173 classify_argument (enum machine_mode mode, tree type,
2174 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2176 HOST_WIDE_INT bytes =
2177 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2178 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2180 /* Variable sized entities are always passed/returned in memory. */
2184 if (mode != VOIDmode
2185 && targetm.calls.must_pass_in_stack (mode, type))
2188 if (type && AGGREGATE_TYPE_P (type))
2192 enum x86_64_reg_class subclasses[MAX_CLASSES];
2194 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2198 for (i = 0; i < words; i++)
2199 classes[i] = X86_64_NO_CLASS;
2201 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2202 signalize memory class, so handle it as special case. */
2205 classes[0] = X86_64_NO_CLASS;
2209 /* Classify each field of record and merge classes. */
2210 if (TREE_CODE (type) == RECORD_TYPE)
2212 /* For classes first merge in the field of the subclasses. */
2213 if (TYPE_BINFO (type))
2215 tree binfo, base_binfo;
2218 for (binfo = TYPE_BINFO (type), basenum = 0;
2219 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2222 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2223 tree type = BINFO_TYPE (base_binfo);
2225 num = classify_argument (TYPE_MODE (type),
2227 (offset + bit_offset) % 256);
2230 for (i = 0; i < num; i++)
2232 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2234 merge_classes (subclasses[i], classes[i + pos]);
2238 /* And now merge the fields of structure. */
2239 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2241 if (TREE_CODE (field) == FIELD_DECL)
2245 /* Bitfields are always classified as integer. Handle them
2246 early, since later code would consider them to be
2247 misaligned integers. */
2248 if (DECL_BIT_FIELD (field))
2250 for (i = int_bit_position (field) / 8 / 8;
2251 i < (int_bit_position (field)
2252 + tree_low_cst (DECL_SIZE (field), 0)
2255 merge_classes (X86_64_INTEGER_CLASS,
2260 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2261 TREE_TYPE (field), subclasses,
2262 (int_bit_position (field)
2263 + bit_offset) % 256);
2266 for (i = 0; i < num; i++)
2269 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2271 merge_classes (subclasses[i], classes[i + pos]);
2277 /* Arrays are handled as small records. */
2278 else if (TREE_CODE (type) == ARRAY_TYPE)
2281 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2282 TREE_TYPE (type), subclasses, bit_offset);
2286 /* The partial classes are now full classes. */
2287 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2288 subclasses[0] = X86_64_SSE_CLASS;
2289 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2290 subclasses[0] = X86_64_INTEGER_CLASS;
2292 for (i = 0; i < words; i++)
2293 classes[i] = subclasses[i % num];
2295 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2296 else if (TREE_CODE (type) == UNION_TYPE
2297 || TREE_CODE (type) == QUAL_UNION_TYPE)
2299 /* For classes first merge in the field of the subclasses. */
2300 if (TYPE_BINFO (type))
2302 tree binfo, base_binfo;
2305 for (binfo = TYPE_BINFO (type), basenum = 0;
2306 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2309 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2310 tree type = BINFO_TYPE (base_binfo);
2312 num = classify_argument (TYPE_MODE (type),
2314 (offset + (bit_offset % 64)) % 256);
2317 for (i = 0; i < num; i++)
2319 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2321 merge_classes (subclasses[i], classes[i + pos]);
2325 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2327 if (TREE_CODE (field) == FIELD_DECL)
2330 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2331 TREE_TYPE (field), subclasses,
2335 for (i = 0; i < num; i++)
2336 classes[i] = merge_classes (subclasses[i], classes[i]);
2343 /* Final merger cleanup. */
2344 for (i = 0; i < words; i++)
2346 /* If one class is MEMORY, everything should be passed in
2348 if (classes[i] == X86_64_MEMORY_CLASS)
2351 /* The X86_64_SSEUP_CLASS should be always preceded by
2352 X86_64_SSE_CLASS. */
2353 if (classes[i] == X86_64_SSEUP_CLASS
2354 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2355 classes[i] = X86_64_SSE_CLASS;
2357 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2358 if (classes[i] == X86_64_X87UP_CLASS
2359 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2360 classes[i] = X86_64_SSE_CLASS;
2365 /* Compute alignment needed. We align all types to natural boundaries with
2366 exception of XFmode that is aligned to 64bits. */
2367 if (mode != VOIDmode && mode != BLKmode)
2369 int mode_alignment = GET_MODE_BITSIZE (mode);
2372 mode_alignment = 128;
2373 else if (mode == XCmode)
2374 mode_alignment = 256;
2375 if (COMPLEX_MODE_P (mode))
2376 mode_alignment /= 2;
2377 /* Misaligned fields are always returned in memory. */
2378 if (bit_offset % mode_alignment)
2382 /* for V1xx modes, just use the base mode */
2383 if (VECTOR_MODE_P (mode)
2384 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2385 mode = GET_MODE_INNER (mode);
2387 /* Classification of atomic types. */
2397 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2398 classes[0] = X86_64_INTEGERSI_CLASS;
2400 classes[0] = X86_64_INTEGER_CLASS;
2404 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2409 if (!(bit_offset % 64))
2410 classes[0] = X86_64_SSESF_CLASS;
2412 classes[0] = X86_64_SSE_CLASS;
2415 classes[0] = X86_64_SSEDF_CLASS;
2418 classes[0] = X86_64_X87_CLASS;
2419 classes[1] = X86_64_X87UP_CLASS;
2422 classes[0] = X86_64_SSE_CLASS;
2423 classes[1] = X86_64_SSEUP_CLASS;
2426 classes[0] = X86_64_SSE_CLASS;
2429 classes[0] = X86_64_SSEDF_CLASS;
2430 classes[1] = X86_64_SSEDF_CLASS;
2433 classes[0] = X86_64_COMPLEX_X87_CLASS;
2436 /* This modes is larger than 16 bytes. */
2444 classes[0] = X86_64_SSE_CLASS;
2445 classes[1] = X86_64_SSEUP_CLASS;
2451 classes[0] = X86_64_SSE_CLASS;
2457 if (VECTOR_MODE_P (mode))
2461 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2463 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2464 classes[0] = X86_64_INTEGERSI_CLASS;
2466 classes[0] = X86_64_INTEGER_CLASS;
2467 classes[1] = X86_64_INTEGER_CLASS;
2468 return 1 + (bytes > 8);
2475 /* Examine the argument and return set number of register required in each
2476 class. Return 0 iff parameter should be passed in memory. */
2478 examine_argument (enum machine_mode mode, tree type, int in_return,
2479 int *int_nregs, int *sse_nregs)
2481 enum x86_64_reg_class class[MAX_CLASSES];
2482 int n = classify_argument (mode, type, class, 0);
2488 for (n--; n >= 0; n--)
2491 case X86_64_INTEGER_CLASS:
2492 case X86_64_INTEGERSI_CLASS:
2495 case X86_64_SSE_CLASS:
2496 case X86_64_SSESF_CLASS:
2497 case X86_64_SSEDF_CLASS:
2500 case X86_64_NO_CLASS:
2501 case X86_64_SSEUP_CLASS:
2503 case X86_64_X87_CLASS:
2504 case X86_64_X87UP_CLASS:
2508 case X86_64_COMPLEX_X87_CLASS:
2509 return in_return ? 2 : 0;
2510 case X86_64_MEMORY_CLASS:
2516 /* Construct container for the argument used by GCC interface. See
2517 FUNCTION_ARG for the detailed description. */
2520 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2521 tree type, int in_return, int nintregs, int nsseregs,
2522 const int *intreg, int sse_regno)
2524 enum machine_mode tmpmode;
2526 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2527 enum x86_64_reg_class class[MAX_CLASSES];
2531 int needed_sseregs, needed_intregs;
2532 rtx exp[MAX_CLASSES];
2535 n = classify_argument (mode, type, class, 0);
2536 if (TARGET_DEBUG_ARG)
2539 fprintf (stderr, "Memory class\n");
2542 fprintf (stderr, "Classes:");
2543 for (i = 0; i < n; i++)
2545 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2547 fprintf (stderr, "\n");
2552 if (!examine_argument (mode, type, in_return, &needed_intregs,
2555 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2558 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2559 some less clueful developer tries to use floating-point anyway. */
2560 if (needed_sseregs && !TARGET_SSE)
2562 static bool issued_error;
2565 issued_error = true;
2567 error ("SSE register return with SSE disabled");
2569 error ("SSE register argument with SSE disabled");
2574 /* First construct simple cases. Avoid SCmode, since we want to use
2575 single register to pass this type. */
2576 if (n == 1 && mode != SCmode)
2579 case X86_64_INTEGER_CLASS:
2580 case X86_64_INTEGERSI_CLASS:
2581 return gen_rtx_REG (mode, intreg[0]);
2582 case X86_64_SSE_CLASS:
2583 case X86_64_SSESF_CLASS:
2584 case X86_64_SSEDF_CLASS:
2585 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2586 case X86_64_X87_CLASS:
2587 case X86_64_COMPLEX_X87_CLASS:
2588 return gen_rtx_REG (mode, FIRST_STACK_REG);
2589 case X86_64_NO_CLASS:
2590 /* Zero sized array, struct or class. */
2595 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2597 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2599 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2600 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2601 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2602 && class[1] == X86_64_INTEGER_CLASS
2603 && (mode == CDImode || mode == TImode || mode == TFmode)
2604 && intreg[0] + 1 == intreg[1])
2605 return gen_rtx_REG (mode, intreg[0]);
2607 /* Otherwise figure out the entries of the PARALLEL. */
2608 for (i = 0; i < n; i++)
2612 case X86_64_NO_CLASS:
2614 case X86_64_INTEGER_CLASS:
2615 case X86_64_INTEGERSI_CLASS:
2616 /* Merge TImodes on aligned occasions here too. */
2617 if (i * 8 + 8 > bytes)
2618 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2619 else if (class[i] == X86_64_INTEGERSI_CLASS)
2623 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2624 if (tmpmode == BLKmode)
2626 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2627 gen_rtx_REG (tmpmode, *intreg),
2631 case X86_64_SSESF_CLASS:
2632 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2633 gen_rtx_REG (SFmode,
2634 SSE_REGNO (sse_regno)),
2638 case X86_64_SSEDF_CLASS:
2639 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2640 gen_rtx_REG (DFmode,
2641 SSE_REGNO (sse_regno)),
2645 case X86_64_SSE_CLASS:
2646 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2650 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2651 gen_rtx_REG (tmpmode,
2652 SSE_REGNO (sse_regno)),
2654 if (tmpmode == TImode)
2662 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2663 for (i = 0; i < nexps; i++)
2664 XVECEXP (ret, 0, i) = exp [i];
2668 /* Update the data in CUM to advance over an argument
2669 of mode MODE and data type TYPE.
2670 (TYPE is null for libcalls where that information may not be available.) */
2673 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2674 tree type, int named)
2677 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2678 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2681 mode = type_natural_mode (type);
2683 if (TARGET_DEBUG_ARG)
2684 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2685 "mode=%s, named=%d)\n\n",
2686 words, cum->words, cum->nregs, cum->sse_nregs,
2687 GET_MODE_NAME (mode), named);
2691 int int_nregs, sse_nregs;
2692 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2693 cum->words += words;
2694 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2696 cum->nregs -= int_nregs;
2697 cum->sse_nregs -= sse_nregs;
2698 cum->regno += int_nregs;
2699 cum->sse_regno += sse_nregs;
2702 cum->words += words;
2720 cum->words += words;
2721 cum->nregs -= words;
2722 cum->regno += words;
2724 if (cum->nregs <= 0)
2738 if (!type || !AGGREGATE_TYPE_P (type))
2740 cum->sse_words += words;
2741 cum->sse_nregs -= 1;
2742 cum->sse_regno += 1;
2743 if (cum->sse_nregs <= 0)
2755 if (!type || !AGGREGATE_TYPE_P (type))
2757 cum->mmx_words += words;
2758 cum->mmx_nregs -= 1;
2759 cum->mmx_regno += 1;
2760 if (cum->mmx_nregs <= 0)
2771 /* Define where to put the arguments to a function.
2772 Value is zero to push the argument on the stack,
2773 or a hard register in which to store the argument.
2775 MODE is the argument's machine mode.
2776 TYPE is the data type of the argument (as a tree).
2777 This is null for libcalls where that information may
2779 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2780 the preceding args and about the function being called.
2781 NAMED is nonzero if this argument is a named parameter
2782 (otherwise it is an extra parameter matching an ellipsis). */
2785 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2786 tree type, int named)
2788 enum machine_mode mode = orig_mode;
2791 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2792 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2793 static bool warnedsse, warnedmmx;
2795 /* To simplify the code below, represent vector types with a vector mode
2796 even if MMX/SSE are not active. */
2797 if (type && TREE_CODE (type) == VECTOR_TYPE)
2798 mode = type_natural_mode (type);
2800 /* Handle a hidden AL argument containing number of registers for varargs
2801 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2803 if (mode == VOIDmode)
2806 return GEN_INT (cum->maybe_vaarg
2807 ? (cum->sse_nregs < 0
2815 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2817 &x86_64_int_parameter_registers [cum->regno],
2822 /* For now, pass fp/complex values on the stack. */
2834 if (words <= cum->nregs)
2836 int regno = cum->regno;
2838 /* Fastcall allocates the first two DWORD (SImode) or
2839 smaller arguments to ECX and EDX. */
2842 if (mode == BLKmode || mode == DImode)
2845 /* ECX not EAX is the first allocated register. */
2849 ret = gen_rtx_REG (mode, regno);
2859 if (!type || !AGGREGATE_TYPE_P (type))
2861 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2864 warning ("SSE vector argument without SSE enabled "
2868 ret = gen_reg_or_parallel (mode, orig_mode,
2869 cum->sse_regno + FIRST_SSE_REG);
2876 if (!type || !AGGREGATE_TYPE_P (type))
2878 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2881 warning ("MMX vector argument without MMX enabled "
2885 ret = gen_reg_or_parallel (mode, orig_mode,
2886 cum->mmx_regno + FIRST_MMX_REG);
2891 if (TARGET_DEBUG_ARG)
2894 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2895 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2898 print_simple_rtl (stderr, ret);
2900 fprintf (stderr, ", stack");
2902 fprintf (stderr, " )\n");
2908 /* A C expression that indicates when an argument must be passed by
2909 reference. If nonzero for an argument, a copy of that argument is
2910 made in memory and a pointer to the argument is passed instead of
2911 the argument itself. The pointer is passed in whatever way is
2912 appropriate for passing a pointer to that type. */
2915 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2916 enum machine_mode mode ATTRIBUTE_UNUSED,
2917 tree type, bool named ATTRIBUTE_UNUSED)
2922 if (type && int_size_in_bytes (type) == -1)
2924 if (TARGET_DEBUG_ARG)
2925 fprintf (stderr, "function_arg_pass_by_reference\n");
2932 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2933 ABI. Only called if TARGET_SSE. */
2935 contains_128bit_aligned_vector_p (tree type)
2937 enum machine_mode mode = TYPE_MODE (type);
2938 if (SSE_REG_MODE_P (mode)
2939 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2941 if (TYPE_ALIGN (type) < 128)
2944 if (AGGREGATE_TYPE_P (type))
2946 /* Walk the aggregates recursively. */
2947 if (TREE_CODE (type) == RECORD_TYPE
2948 || TREE_CODE (type) == UNION_TYPE
2949 || TREE_CODE (type) == QUAL_UNION_TYPE)
2953 if (TYPE_BINFO (type))
2955 tree binfo, base_binfo;
2958 for (binfo = TYPE_BINFO (type), i = 0;
2959 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2960 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2963 /* And now merge the fields of structure. */
2964 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2966 if (TREE_CODE (field) == FIELD_DECL
2967 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2971 /* Just for use if some languages passes arrays by value. */
2972 else if (TREE_CODE (type) == ARRAY_TYPE)
2974 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2983 /* Gives the alignment boundary, in bits, of an argument with the
2984 specified mode and type. */
2987 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2991 align = TYPE_ALIGN (type);
2993 align = GET_MODE_ALIGNMENT (mode);
2994 if (align < PARM_BOUNDARY)
2995 align = PARM_BOUNDARY;
2998 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2999 make an exception for SSE modes since these require 128bit
3002 The handling here differs from field_alignment. ICC aligns MMX
3003 arguments to 4 byte boundaries, while structure fields are aligned
3004 to 8 byte boundaries. */
3006 align = PARM_BOUNDARY;
3009 if (!SSE_REG_MODE_P (mode))
3010 align = PARM_BOUNDARY;
3014 if (!contains_128bit_aligned_vector_p (type))
3015 align = PARM_BOUNDARY;
3023 /* Return true if N is a possible register number of function value. */
3025 ix86_function_value_regno_p (int regno)
3029 return ((regno) == 0
3030 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3031 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3033 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3034 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3035 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3038 /* Define how to find the value returned by a function.
3039 VALTYPE is the data type of the value (as a tree).
3040 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3041 otherwise, FUNC is 0. */
3043 ix86_function_value (tree valtype)
3045 enum machine_mode natmode = type_natural_mode (valtype);
3049 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3050 1, REGPARM_MAX, SSE_REGPARM_MAX,
3051 x86_64_int_return_registers, 0);
3052 /* For zero sized structures, construct_container return NULL, but we
3053 need to keep rest of compiler happy by returning meaningful value. */
3055 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3059 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
3062 /* Return false iff type is returned in memory. */
3064 ix86_return_in_memory (tree type)
3066 int needed_intregs, needed_sseregs, size;
3067 enum machine_mode mode = type_natural_mode (type);
3070 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3072 if (mode == BLKmode)
3075 size = int_size_in_bytes (type);
3077 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3080 if (VECTOR_MODE_P (mode) || mode == TImode)
3082 /* User-created vectors small enough to fit in EAX. */
3086 /* MMX/3dNow values are returned on the stack, since we've
3087 got to EMMS/FEMMS before returning. */
3091 /* SSE values are returned in XMM0, except when it doesn't exist. */
3093 return (TARGET_SSE ? 0 : 1);
3104 /* When returning SSE vector types, we have a choice of either
3105 (1) being abi incompatible with a -march switch, or
3106 (2) generating an error.
3107 Given no good solution, I think the safest thing is one warning.
3108 The user won't be able to use -Werror, but....
3110 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3111 called in response to actually generating a caller or callee that
3112 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3113 via aggregate_value_p for general type probing from tree-ssa. */
3116 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3120 if (!TARGET_SSE && type && !warned)
3122 /* Look at the return type of the function, not the function type. */
3123 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3126 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3129 warning ("SSE vector return without SSE enabled changes the ABI");
3136 /* Define how to find the value returned by a library function
3137 assuming the value has mode MODE. */
3139 ix86_libcall_value (enum machine_mode mode)
3150 return gen_rtx_REG (mode, FIRST_SSE_REG);
3153 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3157 return gen_rtx_REG (mode, 0);
3161 return gen_rtx_REG (mode, ix86_value_regno (mode));
3164 /* Given a mode, return the register to use for a return value. */
3167 ix86_value_regno (enum machine_mode mode)
3169 /* Floating point return values in %st(0). */
3170 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3171 return FIRST_FLOAT_REG;
3172 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3173 we prevent this case when sse is not available. */
3174 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3175 return FIRST_SSE_REG;
3176 /* Everything else in %eax. */
3180 /* Create the va_list data type. */
3183 ix86_build_builtin_va_list (void)
3185 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3187 /* For i386 we use plain pointer to argument area. */
3189 return build_pointer_type (char_type_node);
3191 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3192 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3194 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3195 unsigned_type_node);
3196 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3197 unsigned_type_node);
3198 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3200 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3203 DECL_FIELD_CONTEXT (f_gpr) = record;
3204 DECL_FIELD_CONTEXT (f_fpr) = record;
3205 DECL_FIELD_CONTEXT (f_ovf) = record;
3206 DECL_FIELD_CONTEXT (f_sav) = record;
3208 TREE_CHAIN (record) = type_decl;
3209 TYPE_NAME (record) = type_decl;
3210 TYPE_FIELDS (record) = f_gpr;
3211 TREE_CHAIN (f_gpr) = f_fpr;
3212 TREE_CHAIN (f_fpr) = f_ovf;
3213 TREE_CHAIN (f_ovf) = f_sav;
3215 layout_type (record);
3217 /* The correct type is an array type of one element. */
3218 return build_array_type (record, build_index_type (size_zero_node));
3221 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3224 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3225 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3228 CUMULATIVE_ARGS next_cum;
3229 rtx save_area = NULL_RTX, mem;
3242 /* Indicate to allocate space on the stack for varargs save area. */
3243 ix86_save_varrargs_registers = 1;
3245 cfun->stack_alignment_needed = 128;
3247 fntype = TREE_TYPE (current_function_decl);
3248 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3249 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3250 != void_type_node));
3252 /* For varargs, we do not want to skip the dummy va_dcl argument.
3253 For stdargs, we do want to skip the last named argument. */
3256 function_arg_advance (&next_cum, mode, type, 1);
3259 save_area = frame_pointer_rtx;
3261 set = get_varargs_alias_set ();
3263 for (i = next_cum.regno; i < ix86_regparm; i++)
3265 mem = gen_rtx_MEM (Pmode,
3266 plus_constant (save_area, i * UNITS_PER_WORD));
3267 set_mem_alias_set (mem, set);
3268 emit_move_insn (mem, gen_rtx_REG (Pmode,
3269 x86_64_int_parameter_registers[i]));
3272 if (next_cum.sse_nregs)
3274 /* Now emit code to save SSE registers. The AX parameter contains number
3275 of SSE parameter registers used to call this function. We use
3276 sse_prologue_save insn template that produces computed jump across
3277 SSE saves. We need some preparation work to get this working. */
3279 label = gen_label_rtx ();
3280 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3282 /* Compute address to jump to :
3283 label - 5*eax + nnamed_sse_arguments*5 */
3284 tmp_reg = gen_reg_rtx (Pmode);
3285 nsse_reg = gen_reg_rtx (Pmode);
3286 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3287 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3288 gen_rtx_MULT (Pmode, nsse_reg,
3290 if (next_cum.sse_regno)
3293 gen_rtx_CONST (DImode,
3294 gen_rtx_PLUS (DImode,
3296 GEN_INT (next_cum.sse_regno * 4))));
3298 emit_move_insn (nsse_reg, label_ref);
3299 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3301 /* Compute address of memory block we save into. We always use pointer
3302 pointing 127 bytes after first byte to store - this is needed to keep
3303 instruction size limited by 4 bytes. */
3304 tmp_reg = gen_reg_rtx (Pmode);
3305 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3306 plus_constant (save_area,
3307 8 * REGPARM_MAX + 127)));
3308 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3309 set_mem_alias_set (mem, set);
3310 set_mem_align (mem, BITS_PER_WORD);
3312 /* And finally do the dirty job! */
3313 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3314 GEN_INT (next_cum.sse_regno), label));
3319 /* Implement va_start. */
3322 ix86_va_start (tree valist, rtx nextarg)
3324 HOST_WIDE_INT words, n_gpr, n_fpr;
3325 tree f_gpr, f_fpr, f_ovf, f_sav;
3326 tree gpr, fpr, ovf, sav, t;
3328 /* Only 64bit target needs something special. */
3331 std_expand_builtin_va_start (valist, nextarg);
3335 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3336 f_fpr = TREE_CHAIN (f_gpr);
3337 f_ovf = TREE_CHAIN (f_fpr);
3338 f_sav = TREE_CHAIN (f_ovf);
3340 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3341 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3342 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3343 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3344 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3346 /* Count number of gp and fp argument registers used. */
3347 words = current_function_args_info.words;
3348 n_gpr = current_function_args_info.regno;
3349 n_fpr = current_function_args_info.sse_regno;
3351 if (TARGET_DEBUG_ARG)
3352 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3353 (int) words, (int) n_gpr, (int) n_fpr);
3355 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3356 build_int_cst (NULL_TREE, n_gpr * 8));
3357 TREE_SIDE_EFFECTS (t) = 1;
3358 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3360 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3361 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3362 TREE_SIDE_EFFECTS (t) = 1;
3363 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3365 /* Find the overflow area. */
3366 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3368 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3369 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3370 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3371 TREE_SIDE_EFFECTS (t) = 1;
3372 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3374 /* Find the register save area.
3375 Prologue of the function save it right above stack frame. */
3376 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3377 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3378 TREE_SIDE_EFFECTS (t) = 1;
3379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3382 /* Implement va_arg. */
3385 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3387 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3388 tree f_gpr, f_fpr, f_ovf, f_sav;
3389 tree gpr, fpr, ovf, sav, t;
3391 tree lab_false, lab_over = NULL_TREE;
3396 enum machine_mode nat_mode;
3398 /* Only 64bit target needs something special. */
3400 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3402 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3403 f_fpr = TREE_CHAIN (f_gpr);
3404 f_ovf = TREE_CHAIN (f_fpr);
3405 f_sav = TREE_CHAIN (f_ovf);
3407 valist = build_va_arg_indirect_ref (valist);
3408 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3409 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3410 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3411 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3413 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3415 type = build_pointer_type (type);
3416 size = int_size_in_bytes (type);
3417 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3419 nat_mode = type_natural_mode (type);
3420 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3421 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3423 /* Pull the value out of the saved registers. */
3425 addr = create_tmp_var (ptr_type_node, "addr");
3426 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3430 int needed_intregs, needed_sseregs;
3432 tree int_addr, sse_addr;
3434 lab_false = create_artificial_label ();
3435 lab_over = create_artificial_label ();
3437 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3439 need_temp = (!REG_P (container)
3440 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3441 || TYPE_ALIGN (type) > 128));
3443 /* In case we are passing structure, verify that it is consecutive block
3444 on the register save area. If not we need to do moves. */
3445 if (!need_temp && !REG_P (container))
3447 /* Verify that all registers are strictly consecutive */
3448 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3452 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3454 rtx slot = XVECEXP (container, 0, i);
3455 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3456 || INTVAL (XEXP (slot, 1)) != i * 16)
3464 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3466 rtx slot = XVECEXP (container, 0, i);
3467 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3468 || INTVAL (XEXP (slot, 1)) != i * 8)
3480 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3481 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3482 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3483 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3486 /* First ensure that we fit completely in registers. */
3489 t = build_int_cst (TREE_TYPE (gpr),
3490 (REGPARM_MAX - needed_intregs + 1) * 8);
3491 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3492 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3493 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3494 gimplify_and_add (t, pre_p);
3498 t = build_int_cst (TREE_TYPE (fpr),
3499 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3501 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3502 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3503 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3504 gimplify_and_add (t, pre_p);
3507 /* Compute index to start of area used for integer regs. */
3510 /* int_addr = gpr + sav; */
3511 t = fold_convert (ptr_type_node, gpr);
3512 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3513 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3514 gimplify_and_add (t, pre_p);
3518 /* sse_addr = fpr + sav; */
3519 t = fold_convert (ptr_type_node, fpr);
3520 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3521 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3522 gimplify_and_add (t, pre_p);
3527 tree temp = create_tmp_var (type, "va_arg_tmp");
3530 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3531 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3532 gimplify_and_add (t, pre_p);
3534 for (i = 0; i < XVECLEN (container, 0); i++)
3536 rtx slot = XVECEXP (container, 0, i);
3537 rtx reg = XEXP (slot, 0);
3538 enum machine_mode mode = GET_MODE (reg);
3539 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3540 tree addr_type = build_pointer_type (piece_type);
3543 tree dest_addr, dest;
3545 if (SSE_REGNO_P (REGNO (reg)))
3547 src_addr = sse_addr;
3548 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3552 src_addr = int_addr;
3553 src_offset = REGNO (reg) * 8;
3555 src_addr = fold_convert (addr_type, src_addr);
3556 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3557 size_int (src_offset)));
3558 src = build_va_arg_indirect_ref (src_addr);
3560 dest_addr = fold_convert (addr_type, addr);
3561 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3562 size_int (INTVAL (XEXP (slot, 1)))));
3563 dest = build_va_arg_indirect_ref (dest_addr);
3565 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3566 gimplify_and_add (t, pre_p);
3572 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3573 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3574 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3575 gimplify_and_add (t, pre_p);
3579 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3580 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3581 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3582 gimplify_and_add (t, pre_p);
3585 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3586 gimplify_and_add (t, pre_p);
3588 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3589 append_to_statement_list (t, pre_p);
3592 /* ... otherwise out of the overflow area. */
3594 /* Care for on-stack alignment if needed. */
3595 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3599 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3600 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3601 build_int_cst (TREE_TYPE (ovf), align - 1));
3602 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3603 build_int_cst (TREE_TYPE (t), -align));
3605 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3607 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3608 gimplify_and_add (t2, pre_p);
3610 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3611 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3612 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3613 gimplify_and_add (t, pre_p);
3617 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3618 append_to_statement_list (t, pre_p);
3621 ptrtype = build_pointer_type (type);
3622 addr = fold_convert (ptrtype, addr);
3625 addr = build_va_arg_indirect_ref (addr);
3626 return build_va_arg_indirect_ref (addr);
3629 /* Return nonzero if OPNUM's MEM should be matched
3630 in movabs* patterns. */
3633 ix86_check_movabs (rtx insn, int opnum)
3637 set = PATTERN (insn);
3638 if (GET_CODE (set) == PARALLEL)
3639 set = XVECEXP (set, 0, 0);
3640 if (GET_CODE (set) != SET)
3642 mem = XEXP (set, opnum);
3643 while (GET_CODE (mem) == SUBREG)
3644 mem = SUBREG_REG (mem);
3645 if (GET_CODE (mem) != MEM)
3647 return (volatile_ok || !MEM_VOLATILE_P (mem));
3650 /* Initialize the table of extra 80387 mathematical constants. */
3653 init_ext_80387_constants (void)
3655 static const char * cst[5] =
3657 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3658 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3659 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3660 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3661 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3665 for (i = 0; i < 5; i++)
3667 real_from_string (&ext_80387_constants_table[i], cst[i]);
3668 /* Ensure each constant is rounded to XFmode precision. */
3669 real_convert (&ext_80387_constants_table[i],
3670 XFmode, &ext_80387_constants_table[i]);
3673 ext_80387_constants_init = 1;
3676 /* Return true if the constant is something that can be loaded with
3677 a special instruction. */
3680 standard_80387_constant_p (rtx x)
3682 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3685 if (x == CONST0_RTX (GET_MODE (x)))
3687 if (x == CONST1_RTX (GET_MODE (x)))
3690 /* For XFmode constants, try to find a special 80387 instruction when
3691 optimizing for size or on those CPUs that benefit from them. */
3692 if (GET_MODE (x) == XFmode
3693 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3698 if (! ext_80387_constants_init)
3699 init_ext_80387_constants ();
3701 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3702 for (i = 0; i < 5; i++)
3703 if (real_identical (&r, &ext_80387_constants_table[i]))
3710 /* Return the opcode of the special instruction to be used to load
3714 standard_80387_constant_opcode (rtx x)
3716 switch (standard_80387_constant_p (x))
3736 /* Return the CONST_DOUBLE representing the 80387 constant that is
3737 loaded by the specified special instruction. The argument IDX
3738 matches the return value from standard_80387_constant_p. */
3741 standard_80387_constant_rtx (int idx)
3745 if (! ext_80387_constants_init)
3746 init_ext_80387_constants ();
3762 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3766 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3769 standard_sse_constant_p (rtx x)
3771 if (x == const0_rtx)
3773 return (x == CONST0_RTX (GET_MODE (x)));
3776 /* Returns 1 if OP contains a symbol reference */
3779 symbolic_reference_mentioned_p (rtx op)
3784 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3787 fmt = GET_RTX_FORMAT (GET_CODE (op));
3788 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3794 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3795 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3799 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3806 /* Return 1 if it is appropriate to emit `ret' instructions in the
3807 body of a function. Do this only if the epilogue is simple, needing a
3808 couple of insns. Prior to reloading, we can't tell how many registers
3809 must be saved, so return 0 then. Return 0 if there is no frame
3810 marker to de-allocate. */
3813 ix86_can_use_return_insn_p (void)
3815 struct ix86_frame frame;
3817 if (! reload_completed || frame_pointer_needed)
3820 /* Don't allow more than 32 pop, since that's all we can do
3821 with one instruction. */
3822 if (current_function_pops_args
3823 && current_function_args_size >= 32768)
3826 ix86_compute_frame_layout (&frame);
3827 return frame.to_allocate == 0 && frame.nregs == 0;
3830 /* Value should be nonzero if functions must have frame pointers.
3831 Zero means the frame pointer need not be set up (and parms may
3832 be accessed via the stack pointer) in functions that seem suitable. */
3835 ix86_frame_pointer_required (void)
3837 /* If we accessed previous frames, then the generated code expects
3838 to be able to access the saved ebp value in our frame. */
3839 if (cfun->machine->accesses_prev_frame)
3842 /* Several x86 os'es need a frame pointer for other reasons,
3843 usually pertaining to setjmp. */
3844 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3847 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3848 the frame pointer by default. Turn it back on now if we've not
3849 got a leaf function. */
3850 if (TARGET_OMIT_LEAF_FRAME_POINTER
3851 && (!current_function_is_leaf))
3854 if (current_function_profile)
3860 /* Record that the current function accesses previous call frames. */
3863 ix86_setup_frame_addresses (void)
3865 cfun->machine->accesses_prev_frame = 1;
3868 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3869 # define USE_HIDDEN_LINKONCE 1
3871 # define USE_HIDDEN_LINKONCE 0
3874 static int pic_labels_used;
3876 /* Fills in the label name that should be used for a pc thunk for
3877 the given register. */
3880 get_pc_thunk_name (char name[32], unsigned int regno)
3882 if (USE_HIDDEN_LINKONCE)
3883 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3885 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3889 /* This function generates code for -fpic that loads %ebx with
3890 the return address of the caller and then returns. */
3893 ix86_file_end (void)
3898 for (regno = 0; regno < 8; ++regno)
3902 if (! ((pic_labels_used >> regno) & 1))
3905 get_pc_thunk_name (name, regno);
3907 if (USE_HIDDEN_LINKONCE)
3911 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3913 TREE_PUBLIC (decl) = 1;
3914 TREE_STATIC (decl) = 1;
3915 DECL_ONE_ONLY (decl) = 1;
3917 (*targetm.asm_out.unique_section) (decl, 0);
3918 named_section (decl, NULL, 0);
3920 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3921 fputs ("\t.hidden\t", asm_out_file);
3922 assemble_name (asm_out_file, name);
3923 fputc ('\n', asm_out_file);
3924 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3929 ASM_OUTPUT_LABEL (asm_out_file, name);
3932 xops[0] = gen_rtx_REG (SImode, regno);
3933 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3934 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3935 output_asm_insn ("ret", xops);
3938 if (NEED_INDICATE_EXEC_STACK)
3939 file_end_indicate_exec_stack ();
3942 /* Emit code for the SET_GOT patterns. */
3945 output_set_got (rtx dest)
3950 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3952 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3954 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3957 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3959 output_asm_insn ("call\t%a2", xops);
3962 /* Output the "canonical" label name ("Lxx$pb") here too. This
3963 is what will be referred to by the Mach-O PIC subsystem. */
3964 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3966 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3967 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3970 output_asm_insn ("pop{l}\t%0", xops);
3975 get_pc_thunk_name (name, REGNO (dest));
3976 pic_labels_used |= 1 << REGNO (dest);
3978 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3979 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3980 output_asm_insn ("call\t%X2", xops);
3983 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3984 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3985 else if (!TARGET_MACHO)
3986 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3991 /* Generate an "push" pattern for input ARG. */
3996 return gen_rtx_SET (VOIDmode,
3998 gen_rtx_PRE_DEC (Pmode,
3999 stack_pointer_rtx)),
4003 /* Return >= 0 if there is an unused call-clobbered register available
4004 for the entire function. */
4007 ix86_select_alt_pic_regnum (void)
4009 if (current_function_is_leaf && !current_function_profile)
4012 for (i = 2; i >= 0; --i)
4013 if (!regs_ever_live[i])
4017 return INVALID_REGNUM;
4020 /* Return 1 if we need to save REGNO. */
4022 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4024 if (pic_offset_table_rtx
4025 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4026 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4027 || current_function_profile
4028 || current_function_calls_eh_return
4029 || current_function_uses_const_pool))
4031 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4036 if (current_function_calls_eh_return && maybe_eh_return)
4041 unsigned test = EH_RETURN_DATA_REGNO (i);
4042 if (test == INVALID_REGNUM)
4049 return (regs_ever_live[regno]
4050 && !call_used_regs[regno]
4051 && !fixed_regs[regno]
4052 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4055 /* Return number of registers to be saved on the stack. */
4058 ix86_nsaved_regs (void)
4063 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4064 if (ix86_save_reg (regno, true))
4069 /* Return the offset between two registers, one to be eliminated, and the other
4070 its replacement, at the start of a routine. */
4073 ix86_initial_elimination_offset (int from, int to)
4075 struct ix86_frame frame;
4076 ix86_compute_frame_layout (&frame);
4078 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4079 return frame.hard_frame_pointer_offset;
4080 else if (from == FRAME_POINTER_REGNUM
4081 && to == HARD_FRAME_POINTER_REGNUM)
4082 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4085 if (to != STACK_POINTER_REGNUM)
4087 else if (from == ARG_POINTER_REGNUM)
4088 return frame.stack_pointer_offset;
4089 else if (from != FRAME_POINTER_REGNUM)
4092 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4096 /* Fill structure ix86_frame about frame of currently computed function. */
4099 ix86_compute_frame_layout (struct ix86_frame *frame)
4101 HOST_WIDE_INT total_size;
4102 unsigned int stack_alignment_needed;
4103 HOST_WIDE_INT offset;
4104 unsigned int preferred_alignment;
4105 HOST_WIDE_INT size = get_frame_size ();
4107 frame->nregs = ix86_nsaved_regs ();
4110 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4111 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4113 /* During reload iteration the amount of registers saved can change.
4114 Recompute the value as needed. Do not recompute when amount of registers
4115 didn't change as reload does multiple calls to the function and does not
4116 expect the decision to change within single iteration. */
4118 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4120 int count = frame->nregs;
4122 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4123 /* The fast prologue uses move instead of push to save registers. This
4124 is significantly longer, but also executes faster as modern hardware
4125 can execute the moves in parallel, but can't do that for push/pop.
4127 Be careful about choosing what prologue to emit: When function takes
4128 many instructions to execute we may use slow version as well as in
4129 case function is known to be outside hot spot (this is known with
4130 feedback only). Weight the size of function by number of registers
4131 to save as it is cheap to use one or two push instructions but very
4132 slow to use many of them. */
4134 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4135 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4136 || (flag_branch_probabilities
4137 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4138 cfun->machine->use_fast_prologue_epilogue = false;
4140 cfun->machine->use_fast_prologue_epilogue
4141 = !expensive_function_p (count);
4143 if (TARGET_PROLOGUE_USING_MOVE
4144 && cfun->machine->use_fast_prologue_epilogue)
4145 frame->save_regs_using_mov = true;
4147 frame->save_regs_using_mov = false;
4150 /* Skip return address and saved base pointer. */
4151 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4153 frame->hard_frame_pointer_offset = offset;
4155 /* Do some sanity checking of stack_alignment_needed and
4156 preferred_alignment, since i386 port is the only using those features
4157 that may break easily. */
4159 if (size && !stack_alignment_needed)
4161 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4163 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4165 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4168 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4169 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4171 /* Register save area */
4172 offset += frame->nregs * UNITS_PER_WORD;
4175 if (ix86_save_varrargs_registers)
4177 offset += X86_64_VARARGS_SIZE;
4178 frame->va_arg_size = X86_64_VARARGS_SIZE;
4181 frame->va_arg_size = 0;
4183 /* Align start of frame for local function. */
4184 frame->padding1 = ((offset + stack_alignment_needed - 1)
4185 & -stack_alignment_needed) - offset;
4187 offset += frame->padding1;
4189 /* Frame pointer points here. */
4190 frame->frame_pointer_offset = offset;
4194 /* Add outgoing arguments area. Can be skipped if we eliminated
4195 all the function calls as dead code.
4196 Skipping is however impossible when function calls alloca. Alloca
4197 expander assumes that last current_function_outgoing_args_size
4198 of stack frame are unused. */
4199 if (ACCUMULATE_OUTGOING_ARGS
4200 && (!current_function_is_leaf || current_function_calls_alloca))
4202 offset += current_function_outgoing_args_size;
4203 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4206 frame->outgoing_arguments_size = 0;
4208 /* Align stack boundary. Only needed if we're calling another function
4210 if (!current_function_is_leaf || current_function_calls_alloca)
4211 frame->padding2 = ((offset + preferred_alignment - 1)
4212 & -preferred_alignment) - offset;
4214 frame->padding2 = 0;
4216 offset += frame->padding2;
4218 /* We've reached end of stack frame. */
4219 frame->stack_pointer_offset = offset;
4221 /* Size prologue needs to allocate. */
4222 frame->to_allocate =
4223 (size + frame->padding1 + frame->padding2
4224 + frame->outgoing_arguments_size + frame->va_arg_size);
4226 if ((!frame->to_allocate && frame->nregs <= 1)
4227 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4228 frame->save_regs_using_mov = false;
4230 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4231 && current_function_is_leaf)
4233 frame->red_zone_size = frame->to_allocate;
4234 if (frame->save_regs_using_mov)
4235 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4236 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4237 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4240 frame->red_zone_size = 0;
4241 frame->to_allocate -= frame->red_zone_size;
4242 frame->stack_pointer_offset -= frame->red_zone_size;
4244 fprintf (stderr, "nregs: %i\n", frame->nregs);
4245 fprintf (stderr, "size: %i\n", size);
4246 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4247 fprintf (stderr, "padding1: %i\n", frame->padding1);
4248 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4249 fprintf (stderr, "padding2: %i\n", frame->padding2);
4250 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4251 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4252 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4253 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4254 frame->hard_frame_pointer_offset);
4255 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4259 /* Emit code to save registers in the prologue. */
4262 ix86_emit_save_regs (void)
4267 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4268 if (ix86_save_reg (regno, true))
4270 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4271 RTX_FRAME_RELATED_P (insn) = 1;
4275 /* Emit code to save registers using MOV insns. First register
4276 is restored from POINTER + OFFSET. */
4278 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4283 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4284 if (ix86_save_reg (regno, true))
4286 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4288 gen_rtx_REG (Pmode, regno));
4289 RTX_FRAME_RELATED_P (insn) = 1;
4290 offset += UNITS_PER_WORD;
4294 /* Expand prologue or epilogue stack adjustment.
4295 The pattern exist to put a dependency on all ebp-based memory accesses.
4296 STYLE should be negative if instructions should be marked as frame related,
4297 zero if %r11 register is live and cannot be freely used and positive
4301 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4306 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4307 else if (x86_64_immediate_operand (offset, DImode))
4308 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4312 /* r11 is used by indirect sibcall return as well, set before the
4313 epilogue and used after the epilogue. ATM indirect sibcall
4314 shouldn't be used together with huge frame sizes in one
4315 function because of the frame_size check in sibcall.c. */
4318 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4319 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4321 RTX_FRAME_RELATED_P (insn) = 1;
4322 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4326 RTX_FRAME_RELATED_P (insn) = 1;
4329 /* Expand the prologue into a bunch of separate insns. */
4332 ix86_expand_prologue (void)
4336 struct ix86_frame frame;
4337 HOST_WIDE_INT allocate;
4339 ix86_compute_frame_layout (&frame);
4341 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4342 slower on all targets. Also sdb doesn't like it. */
4344 if (frame_pointer_needed)
4346 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4347 RTX_FRAME_RELATED_P (insn) = 1;
4349 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4350 RTX_FRAME_RELATED_P (insn) = 1;
4353 allocate = frame.to_allocate;
4355 if (!frame.save_regs_using_mov)
4356 ix86_emit_save_regs ();
4358 allocate += frame.nregs * UNITS_PER_WORD;
4360 /* When using red zone we may start register saving before allocating
4361 the stack frame saving one cycle of the prologue. */
4362 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4363 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4364 : stack_pointer_rtx,
4365 -frame.nregs * UNITS_PER_WORD);
4369 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4370 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4371 GEN_INT (-allocate), -1);
4374 /* Only valid for Win32. */
4375 rtx eax = gen_rtx_REG (SImode, 0);
4376 bool eax_live = ix86_eax_live_at_start_p ();
4384 emit_insn (gen_push (eax));
4388 emit_move_insn (eax, GEN_INT (allocate));
4390 insn = emit_insn (gen_allocate_stack_worker (eax));
4391 RTX_FRAME_RELATED_P (insn) = 1;
4392 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4393 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4394 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4395 t, REG_NOTES (insn));
4399 if (frame_pointer_needed)
4400 t = plus_constant (hard_frame_pointer_rtx,
4403 - frame.nregs * UNITS_PER_WORD);
4405 t = plus_constant (stack_pointer_rtx, allocate);
4406 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4410 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4412 if (!frame_pointer_needed || !frame.to_allocate)
4413 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4415 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4416 -frame.nregs * UNITS_PER_WORD);
4419 pic_reg_used = false;
4420 if (pic_offset_table_rtx
4421 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4422 || current_function_profile))
4424 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4426 if (alt_pic_reg_used != INVALID_REGNUM)
4427 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4429 pic_reg_used = true;
4434 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4436 /* Even with accurate pre-reload life analysis, we can wind up
4437 deleting all references to the pic register after reload.
4438 Consider if cross-jumping unifies two sides of a branch
4439 controlled by a comparison vs the only read from a global.
4440 In which case, allow the set_got to be deleted, though we're
4441 too late to do anything about the ebx save in the prologue. */
4442 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4445 /* Prevent function calls from be scheduled before the call to mcount.
4446 In the pic_reg_used case, make sure that the got load isn't deleted. */
4447 if (current_function_profile)
4448 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4451 /* Emit code to restore saved registers using MOV insns. First register
4452 is restored from POINTER + OFFSET. */
4454 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4455 int maybe_eh_return)
4458 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4460 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4461 if (ix86_save_reg (regno, maybe_eh_return))
4463 /* Ensure that adjust_address won't be forced to produce pointer
4464 out of range allowed by x86-64 instruction set. */
4465 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4469 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4470 emit_move_insn (r11, GEN_INT (offset));
4471 emit_insn (gen_adddi3 (r11, r11, pointer));
4472 base_address = gen_rtx_MEM (Pmode, r11);
4475 emit_move_insn (gen_rtx_REG (Pmode, regno),
4476 adjust_address (base_address, Pmode, offset));
4477 offset += UNITS_PER_WORD;
4481 /* Restore function stack, frame, and registers. */
4484 ix86_expand_epilogue (int style)
4487 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4488 struct ix86_frame frame;
4489 HOST_WIDE_INT offset;
4491 ix86_compute_frame_layout (&frame);
4493 /* Calculate start of saved registers relative to ebp. Special care
4494 must be taken for the normal return case of a function using
4495 eh_return: the eax and edx registers are marked as saved, but not
4496 restored along this path. */
4497 offset = frame.nregs;
4498 if (current_function_calls_eh_return && style != 2)
4500 offset *= -UNITS_PER_WORD;
4502 /* If we're only restoring one register and sp is not valid then
4503 using a move instruction to restore the register since it's
4504 less work than reloading sp and popping the register.
4506 The default code result in stack adjustment using add/lea instruction,
4507 while this code results in LEAVE instruction (or discrete equivalent),
4508 so it is profitable in some other cases as well. Especially when there
4509 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4510 and there is exactly one register to pop. This heuristic may need some
4511 tuning in future. */
4512 if ((!sp_valid && frame.nregs <= 1)
4513 || (TARGET_EPILOGUE_USING_MOVE
4514 && cfun->machine->use_fast_prologue_epilogue
4515 && (frame.nregs > 1 || frame.to_allocate))
4516 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4517 || (frame_pointer_needed && TARGET_USE_LEAVE
4518 && cfun->machine->use_fast_prologue_epilogue
4519 && frame.nregs == 1)
4520 || current_function_calls_eh_return)
4522 /* Restore registers. We can use ebp or esp to address the memory
4523 locations. If both are available, default to ebp, since offsets
4524 are known to be small. Only exception is esp pointing directly to the
4525 end of block of saved registers, where we may simplify addressing
4528 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4529 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4530 frame.to_allocate, style == 2);
4532 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4533 offset, style == 2);
4535 /* eh_return epilogues need %ecx added to the stack pointer. */
4538 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4540 if (frame_pointer_needed)
4542 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4543 tmp = plus_constant (tmp, UNITS_PER_WORD);
4544 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4546 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4547 emit_move_insn (hard_frame_pointer_rtx, tmp);
4549 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4554 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4555 tmp = plus_constant (tmp, (frame.to_allocate
4556 + frame.nregs * UNITS_PER_WORD));
4557 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4560 else if (!frame_pointer_needed)
4561 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4562 GEN_INT (frame.to_allocate
4563 + frame.nregs * UNITS_PER_WORD),
4565 /* If not an i386, mov & pop is faster than "leave". */
4566 else if (TARGET_USE_LEAVE || optimize_size
4567 || !cfun->machine->use_fast_prologue_epilogue)
4568 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4571 pro_epilogue_adjust_stack (stack_pointer_rtx,
4572 hard_frame_pointer_rtx,
4575 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4577 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4582 /* First step is to deallocate the stack frame so that we can
4583 pop the registers. */
4586 if (!frame_pointer_needed)
4588 pro_epilogue_adjust_stack (stack_pointer_rtx,
4589 hard_frame_pointer_rtx,
4590 GEN_INT (offset), style);
4592 else if (frame.to_allocate)
4593 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4594 GEN_INT (frame.to_allocate), style);
4596 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4597 if (ix86_save_reg (regno, false))
4600 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4602 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4604 if (frame_pointer_needed)
4606 /* Leave results in shorter dependency chains on CPUs that are
4607 able to grok it fast. */
4608 if (TARGET_USE_LEAVE)
4609 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4610 else if (TARGET_64BIT)
4611 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4613 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4617 /* Sibcall epilogues don't want a return instruction. */
4621 if (current_function_pops_args && current_function_args_size)
4623 rtx popc = GEN_INT (current_function_pops_args);
4625 /* i386 can only pop 64K bytes. If asked to pop more, pop
4626 return address, do explicit add, and jump indirectly to the
4629 if (current_function_pops_args >= 65536)
4631 rtx ecx = gen_rtx_REG (SImode, 2);
4633 /* There is no "pascal" calling convention in 64bit ABI. */
4637 emit_insn (gen_popsi1 (ecx));
4638 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4639 emit_jump_insn (gen_return_indirect_internal (ecx));
4642 emit_jump_insn (gen_return_pop_internal (popc));
4645 emit_jump_insn (gen_return_internal ());
4648 /* Reset from the function's potential modifications. */
4651 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4652 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4654 if (pic_offset_table_rtx)
4655 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4658 /* Extract the parts of an RTL expression that is a valid memory address
4659 for an instruction. Return 0 if the structure of the address is
4660 grossly off. Return -1 if the address contains ASHIFT, so it is not
4661 strictly valid, but still used for computing length of lea instruction. */
4664 ix86_decompose_address (rtx addr, struct ix86_address *out)
4666 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
4667 rtx base_reg, index_reg;
4668 HOST_WIDE_INT scale = 1;
4669 rtx scale_rtx = NULL_RTX;
4671 enum ix86_address_seg seg = SEG_DEFAULT;
4673 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4675 else if (GET_CODE (addr) == PLUS)
4685 addends[n++] = XEXP (op, 1);
4688 while (GET_CODE (op) == PLUS);
4693 for (i = n; i >= 0; --i)
4696 switch (GET_CODE (op))
4701 index = XEXP (op, 0);
4702 scale_rtx = XEXP (op, 1);
4706 if (XINT (op, 1) == UNSPEC_TP
4707 && TARGET_TLS_DIRECT_SEG_REFS
4708 && seg == SEG_DEFAULT)
4709 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4738 else if (GET_CODE (addr) == MULT)
4740 index = XEXP (addr, 0); /* index*scale */
4741 scale_rtx = XEXP (addr, 1);
4743 else if (GET_CODE (addr) == ASHIFT)
4747 /* We're called for lea too, which implements ashift on occasion. */
4748 index = XEXP (addr, 0);
4749 tmp = XEXP (addr, 1);
4750 if (GET_CODE (tmp) != CONST_INT)
4752 scale = INTVAL (tmp);
4753 if ((unsigned HOST_WIDE_INT) scale > 3)
4759 disp = addr; /* displacement */
4761 /* Extract the integral value of scale. */
4764 if (GET_CODE (scale_rtx) != CONST_INT)
4766 scale = INTVAL (scale_rtx);
4769 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
4770 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
4772 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4773 if (base_reg && index_reg && scale == 1
4774 && (index_reg == arg_pointer_rtx
4775 || index_reg == frame_pointer_rtx
4776 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
4779 tmp = base, base = index, index = tmp;
4780 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
4783 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4784 if ((base_reg == hard_frame_pointer_rtx
4785 || base_reg == frame_pointer_rtx
4786 || base_reg == arg_pointer_rtx) && !disp)
4789 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4790 Avoid this by transforming to [%esi+0]. */
4791 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4792 && base_reg && !index_reg && !disp
4794 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
4797 /* Special case: encode reg+reg instead of reg*2. */
4798 if (!base && index && scale && scale == 2)
4799 base = index, base_reg = index_reg, scale = 1;
4801 /* Special case: scaling cannot be encoded without base or displacement. */
4802 if (!base && !disp && index && scale != 1)
4814 /* Return cost of the memory address x.
4815 For i386, it is better to use a complex address than let gcc copy
4816 the address into a reg and make a new pseudo. But not if the address
4817 requires to two regs - that would mean more pseudos with longer
4820 ix86_address_cost (rtx x)
4822 struct ix86_address parts;
4825 if (!ix86_decompose_address (x, &parts))
4828 if (parts.base && GET_CODE (parts.base) == SUBREG)
4829 parts.base = SUBREG_REG (parts.base);
4830 if (parts.index && GET_CODE (parts.index) == SUBREG)
4831 parts.index = SUBREG_REG (parts.index);
4833 /* More complex memory references are better. */
4834 if (parts.disp && parts.disp != const0_rtx)
4836 if (parts.seg != SEG_DEFAULT)
4839 /* Attempt to minimize number of registers in the address. */
4841 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4843 && (!REG_P (parts.index)
4844 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4848 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4850 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4851 && parts.base != parts.index)
4854 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4855 since it's predecode logic can't detect the length of instructions
4856 and it degenerates to vector decoded. Increase cost of such
4857 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4858 to split such addresses or even refuse such addresses at all.
4860 Following addressing modes are affected:
4865 The first and last case may be avoidable by explicitly coding the zero in
4866 memory address, but I don't have AMD-K6 machine handy to check this
4870 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4871 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4872 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4878 /* If X is a machine specific address (i.e. a symbol or label being
4879 referenced as a displacement from the GOT implemented using an
4880 UNSPEC), then return the base term. Otherwise return X. */
4883 ix86_find_base_term (rtx x)
4889 if (GET_CODE (x) != CONST)
4892 if (GET_CODE (term) == PLUS
4893 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4894 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4895 term = XEXP (term, 0);
4896 if (GET_CODE (term) != UNSPEC
4897 || XINT (term, 1) != UNSPEC_GOTPCREL)
4900 term = XVECEXP (term, 0, 0);
4902 if (GET_CODE (term) != SYMBOL_REF
4903 && GET_CODE (term) != LABEL_REF)
4909 term = ix86_delegitimize_address (x);
4911 if (GET_CODE (term) != SYMBOL_REF
4912 && GET_CODE (term) != LABEL_REF)
4918 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4919 this is used for to form addresses to local data when -fPIC is in
4923 darwin_local_data_pic (rtx disp)
4925 if (GET_CODE (disp) == MINUS)
4927 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4928 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4929 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4931 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4932 if (! strcmp (sym_name, "<pic base>"))
4940 /* Determine if a given RTX is a valid constant. We already know this
4941 satisfies CONSTANT_P. */
4944 legitimate_constant_p (rtx x)
4946 switch (GET_CODE (x))
4951 if (GET_CODE (x) == PLUS)
4953 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4958 if (TARGET_MACHO && darwin_local_data_pic (x))
4961 /* Only some unspecs are valid as "constants". */
4962 if (GET_CODE (x) == UNSPEC)
4963 switch (XINT (x, 1))
4967 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4969 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4974 /* We must have drilled down to a symbol. */
4975 if (!symbolic_operand (x, Pmode))
4980 /* TLS symbols are never valid. */
4981 if (tls_symbolic_operand (x, Pmode))
4989 /* Otherwise we handle everything else in the move patterns. */
4993 /* Determine if it's legal to put X into the constant pool. This
4994 is not possible for the address of thread-local symbols, which
4995 is checked above. */
4998 ix86_cannot_force_const_mem (rtx x)
5000 return !legitimate_constant_p (x);
5003 /* Determine if a given RTX is a valid constant address. */
5006 constant_address_p (rtx x)
5008 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5011 /* Nonzero if the constant value X is a legitimate general operand
5012 when generating PIC code. It is given that flag_pic is on and
5013 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5016 legitimate_pic_operand_p (rtx x)
5020 switch (GET_CODE (x))
5023 inner = XEXP (x, 0);
5025 /* Only some unspecs are valid as "constants". */
5026 if (GET_CODE (inner) == UNSPEC)
5027 switch (XINT (inner, 1))
5030 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5038 return legitimate_pic_address_disp_p (x);
5045 /* Determine if a given CONST RTX is a valid memory displacement
5049 legitimate_pic_address_disp_p (rtx disp)
5053 /* In 64bit mode we can allow direct addresses of symbols and labels
5054 when they are not dynamic symbols. */
5057 /* TLS references should always be enclosed in UNSPEC. */
5058 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5060 if (GET_CODE (disp) == SYMBOL_REF
5061 && ix86_cmodel == CM_SMALL_PIC
5062 && SYMBOL_REF_LOCAL_P (disp))
5064 if (GET_CODE (disp) == LABEL_REF)
5066 if (GET_CODE (disp) == CONST
5067 && GET_CODE (XEXP (disp, 0)) == PLUS)
5069 rtx op0 = XEXP (XEXP (disp, 0), 0);
5070 rtx op1 = XEXP (XEXP (disp, 0), 1);
5072 /* TLS references should always be enclosed in UNSPEC. */
5073 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5075 if (((GET_CODE (op0) == SYMBOL_REF
5076 && ix86_cmodel == CM_SMALL_PIC
5077 && SYMBOL_REF_LOCAL_P (op0))
5078 || GET_CODE (op0) == LABEL_REF)
5079 && GET_CODE (op1) == CONST_INT
5080 && INTVAL (op1) < 16*1024*1024
5081 && INTVAL (op1) >= -16*1024*1024)
5085 if (GET_CODE (disp) != CONST)
5087 disp = XEXP (disp, 0);
5091 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5092 of GOT tables. We should not need these anyway. */
5093 if (GET_CODE (disp) != UNSPEC
5094 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5097 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5098 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5104 if (GET_CODE (disp) == PLUS)
5106 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5108 disp = XEXP (disp, 0);
5112 if (TARGET_MACHO && darwin_local_data_pic (disp))
5115 if (GET_CODE (disp) != UNSPEC)
5118 switch (XINT (disp, 1))
5123 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5125 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5126 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5127 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5129 case UNSPEC_GOTTPOFF:
5130 case UNSPEC_GOTNTPOFF:
5131 case UNSPEC_INDNTPOFF:
5134 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5136 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5138 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5144 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5145 memory address for an instruction. The MODE argument is the machine mode
5146 for the MEM expression that wants to use this address.
5148 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5149 convert common non-canonical forms to canonical form so that they will
5153 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5155 struct ix86_address parts;
5156 rtx base, index, disp;
5157 HOST_WIDE_INT scale;
5158 const char *reason = NULL;
5159 rtx reason_rtx = NULL_RTX;
5161 if (TARGET_DEBUG_ADDR)
5164 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5165 GET_MODE_NAME (mode), strict);
5169 if (ix86_decompose_address (addr, &parts) <= 0)
5171 reason = "decomposition failed";
5176 index = parts.index;
5178 scale = parts.scale;
5180 /* Validate base register.
5182 Don't allow SUBREG's that span more than a word here. It can lead to spill
5183 failures when the base is one word out of a two word structure, which is
5184 represented internally as a DImode int. */
5193 else if (GET_CODE (base) == SUBREG
5194 && REG_P (SUBREG_REG (base))
5195 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5197 reg = SUBREG_REG (base);
5200 reason = "base is not a register";
5204 if (GET_MODE (base) != Pmode)
5206 reason = "base is not in Pmode";
5210 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5211 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5213 reason = "base is not valid";
5218 /* Validate index register.
5220 Don't allow SUBREG's that span more than a word here -- same as above. */
5229 else if (GET_CODE (index) == SUBREG
5230 && REG_P (SUBREG_REG (index))
5231 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5233 reg = SUBREG_REG (index);
5236 reason = "index is not a register";
5240 if (GET_MODE (index) != Pmode)
5242 reason = "index is not in Pmode";
5246 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5247 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5249 reason = "index is not valid";
5254 /* Validate scale factor. */
5257 reason_rtx = GEN_INT (scale);
5260 reason = "scale without index";
5264 if (scale != 2 && scale != 4 && scale != 8)
5266 reason = "scale is not a valid multiplier";
5271 /* Validate displacement. */
5276 if (GET_CODE (disp) == CONST
5277 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5278 switch (XINT (XEXP (disp, 0), 1))
5282 case UNSPEC_GOTPCREL:
5285 goto is_legitimate_pic;
5287 case UNSPEC_GOTTPOFF:
5288 case UNSPEC_GOTNTPOFF:
5289 case UNSPEC_INDNTPOFF:
5295 reason = "invalid address unspec";
5299 else if (flag_pic && (SYMBOLIC_CONST (disp)
5301 && !machopic_operand_p (disp)
5306 if (TARGET_64BIT && (index || base))
5308 /* foo@dtpoff(%rX) is ok. */
5309 if (GET_CODE (disp) != CONST
5310 || GET_CODE (XEXP (disp, 0)) != PLUS
5311 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5312 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5313 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5314 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5316 reason = "non-constant pic memory reference";
5320 else if (! legitimate_pic_address_disp_p (disp))
5322 reason = "displacement is an invalid pic construct";
5326 /* This code used to verify that a symbolic pic displacement
5327 includes the pic_offset_table_rtx register.
5329 While this is good idea, unfortunately these constructs may
5330 be created by "adds using lea" optimization for incorrect
5339 This code is nonsensical, but results in addressing
5340 GOT table with pic_offset_table_rtx base. We can't
5341 just refuse it easily, since it gets matched by
5342 "addsi3" pattern, that later gets split to lea in the
5343 case output register differs from input. While this
5344 can be handled by separate addsi pattern for this case
5345 that never results in lea, this seems to be easier and
5346 correct fix for crash to disable this test. */
5348 else if (GET_CODE (disp) != LABEL_REF
5349 && GET_CODE (disp) != CONST_INT
5350 && (GET_CODE (disp) != CONST
5351 || !legitimate_constant_p (disp))
5352 && (GET_CODE (disp) != SYMBOL_REF
5353 || !legitimate_constant_p (disp)))
5355 reason = "displacement is not constant";
5358 else if (TARGET_64BIT
5359 && !x86_64_immediate_operand (disp, VOIDmode))
5361 reason = "displacement is out of range";
5366 /* Everything looks valid. */
5367 if (TARGET_DEBUG_ADDR)
5368 fprintf (stderr, "Success.\n");
5372 if (TARGET_DEBUG_ADDR)
5374 fprintf (stderr, "Error: %s\n", reason);
5375 debug_rtx (reason_rtx);
5380 /* Return an unique alias set for the GOT. */
5382 static HOST_WIDE_INT
5383 ix86_GOT_alias_set (void)
5385 static HOST_WIDE_INT set = -1;
5387 set = new_alias_set ();
5391 /* Return a legitimate reference for ORIG (an address) using the
5392 register REG. If REG is 0, a new pseudo is generated.
5394 There are two types of references that must be handled:
5396 1. Global data references must load the address from the GOT, via
5397 the PIC reg. An insn is emitted to do this load, and the reg is
5400 2. Static data references, constant pool addresses, and code labels
5401 compute the address as an offset from the GOT, whose base is in
5402 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5403 differentiate them from global data objects. The returned
5404 address is the PIC reg + an unspec constant.
5406 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5407 reg also appears in the address. */
5410 legitimize_pic_address (rtx orig, rtx reg)
5418 reg = gen_reg_rtx (Pmode);
5419 /* Use the generic Mach-O PIC machinery. */
5420 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5423 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5425 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5427 /* This symbol may be referenced via a displacement from the PIC
5428 base address (@GOTOFF). */
5430 if (reload_in_progress)
5431 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5432 if (GET_CODE (addr) == CONST)
5433 addr = XEXP (addr, 0);
5434 if (GET_CODE (addr) == PLUS)
5436 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5437 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5440 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5441 new = gen_rtx_CONST (Pmode, new);
5442 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5446 emit_move_insn (reg, new);
5450 else if (GET_CODE (addr) == SYMBOL_REF)
5454 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5455 new = gen_rtx_CONST (Pmode, new);
5456 new = gen_const_mem (Pmode, new);
5457 set_mem_alias_set (new, ix86_GOT_alias_set ());
5460 reg = gen_reg_rtx (Pmode);
5461 /* Use directly gen_movsi, otherwise the address is loaded
5462 into register for CSE. We don't want to CSE this addresses,
5463 instead we CSE addresses from the GOT table, so skip this. */
5464 emit_insn (gen_movsi (reg, new));
5469 /* This symbol must be referenced via a load from the
5470 Global Offset Table (@GOT). */
5472 if (reload_in_progress)
5473 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5474 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5475 new = gen_rtx_CONST (Pmode, new);
5476 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5477 new = gen_const_mem (Pmode, new);
5478 set_mem_alias_set (new, ix86_GOT_alias_set ());
5481 reg = gen_reg_rtx (Pmode);
5482 emit_move_insn (reg, new);
5488 if (GET_CODE (addr) == CONST)
5490 addr = XEXP (addr, 0);
5492 /* We must match stuff we generate before. Assume the only
5493 unspecs that can get here are ours. Not that we could do
5494 anything with them anyway.... */
5495 if (GET_CODE (addr) == UNSPEC
5496 || (GET_CODE (addr) == PLUS
5497 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5499 if (GET_CODE (addr) != PLUS)
5502 if (GET_CODE (addr) == PLUS)
5504 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5506 /* Check first to see if this is a constant offset from a @GOTOFF
5507 symbol reference. */
5508 if (local_symbolic_operand (op0, Pmode)
5509 && GET_CODE (op1) == CONST_INT)
5513 if (reload_in_progress)
5514 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5515 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5517 new = gen_rtx_PLUS (Pmode, new, op1);
5518 new = gen_rtx_CONST (Pmode, new);
5519 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5523 emit_move_insn (reg, new);
5529 if (INTVAL (op1) < -16*1024*1024
5530 || INTVAL (op1) >= 16*1024*1024)
5531 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5536 base = legitimize_pic_address (XEXP (addr, 0), reg);
5537 new = legitimize_pic_address (XEXP (addr, 1),
5538 base == reg ? NULL_RTX : reg);
5540 if (GET_CODE (new) == CONST_INT)
5541 new = plus_constant (base, INTVAL (new));
5544 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5546 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5547 new = XEXP (new, 1);
5549 new = gen_rtx_PLUS (Pmode, base, new);
5557 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5560 get_thread_pointer (int to_reg)
5564 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5568 reg = gen_reg_rtx (Pmode);
5569 insn = gen_rtx_SET (VOIDmode, reg, tp);
5570 insn = emit_insn (insn);
5575 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5576 false if we expect this to be used for a memory address and true if
5577 we expect to load the address into a register. */
5580 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5582 rtx dest, base, off, pic;
5587 case TLS_MODEL_GLOBAL_DYNAMIC:
5588 dest = gen_reg_rtx (Pmode);
5591 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5594 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5595 insns = get_insns ();
5598 emit_libcall_block (insns, dest, rax, x);
5601 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5604 case TLS_MODEL_LOCAL_DYNAMIC:
5605 base = gen_reg_rtx (Pmode);
5608 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5611 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5612 insns = get_insns ();
5615 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5616 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5617 emit_libcall_block (insns, base, rax, note);
5620 emit_insn (gen_tls_local_dynamic_base_32 (base));
5622 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5623 off = gen_rtx_CONST (Pmode, off);
5625 return gen_rtx_PLUS (Pmode, base, off);
5627 case TLS_MODEL_INITIAL_EXEC:
5631 type = UNSPEC_GOTNTPOFF;
5635 if (reload_in_progress)
5636 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5637 pic = pic_offset_table_rtx;
5638 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5640 else if (!TARGET_GNU_TLS)
5642 pic = gen_reg_rtx (Pmode);
5643 emit_insn (gen_set_got (pic));
5644 type = UNSPEC_GOTTPOFF;
5649 type = UNSPEC_INDNTPOFF;
5652 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5653 off = gen_rtx_CONST (Pmode, off);
5655 off = gen_rtx_PLUS (Pmode, pic, off);
5656 off = gen_const_mem (Pmode, off);
5657 set_mem_alias_set (off, ix86_GOT_alias_set ());
5659 if (TARGET_64BIT || TARGET_GNU_TLS)
5661 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5662 off = force_reg (Pmode, off);
5663 return gen_rtx_PLUS (Pmode, base, off);
5667 base = get_thread_pointer (true);
5668 dest = gen_reg_rtx (Pmode);
5669 emit_insn (gen_subsi3 (dest, base, off));
5673 case TLS_MODEL_LOCAL_EXEC:
5674 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5675 (TARGET_64BIT || TARGET_GNU_TLS)
5676 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5677 off = gen_rtx_CONST (Pmode, off);
5679 if (TARGET_64BIT || TARGET_GNU_TLS)
5681 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5682 return gen_rtx_PLUS (Pmode, base, off);
5686 base = get_thread_pointer (true);
5687 dest = gen_reg_rtx (Pmode);
5688 emit_insn (gen_subsi3 (dest, base, off));
5699 /* Try machine-dependent ways of modifying an illegitimate address
5700 to be legitimate. If we find one, return the new, valid address.
5701 This macro is used in only one place: `memory_address' in explow.c.
5703 OLDX is the address as it was before break_out_memory_refs was called.
5704 In some cases it is useful to look at this to decide what needs to be done.
5706 MODE and WIN are passed so that this macro can use
5707 GO_IF_LEGITIMATE_ADDRESS.
5709 It is always safe for this macro to do nothing. It exists to recognize
5710 opportunities to optimize the output.
5712 For the 80386, we handle X+REG by loading X into a register R and
5713 using R+REG. R will go in a general reg and indexing will be used.
5714 However, if REG is a broken-out memory address or multiplication,
5715 nothing needs to be done because REG can certainly go in a general reg.
5717 When -fpic is used, special handling is needed for symbolic references.
5718 See comments by legitimize_pic_address in i386.c for details. */
5721 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5726 if (TARGET_DEBUG_ADDR)
5728 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5729 GET_MODE_NAME (mode));
5733 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5735 return legitimize_tls_address (x, log, false);
5736 if (GET_CODE (x) == CONST
5737 && GET_CODE (XEXP (x, 0)) == PLUS
5738 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5739 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5741 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5742 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5745 if (flag_pic && SYMBOLIC_CONST (x))
5746 return legitimize_pic_address (x, 0);
5748 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5749 if (GET_CODE (x) == ASHIFT
5750 && GET_CODE (XEXP (x, 1)) == CONST_INT
5751 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5754 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5755 GEN_INT (1 << log));
5758 if (GET_CODE (x) == PLUS)
5760 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5762 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5763 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5764 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5767 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5768 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5769 GEN_INT (1 << log));
5772 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5773 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5774 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5777 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5778 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5779 GEN_INT (1 << log));
5782 /* Put multiply first if it isn't already. */
5783 if (GET_CODE (XEXP (x, 1)) == MULT)
5785 rtx tmp = XEXP (x, 0);
5786 XEXP (x, 0) = XEXP (x, 1);
5791 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5792 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5793 created by virtual register instantiation, register elimination, and
5794 similar optimizations. */
5795 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5798 x = gen_rtx_PLUS (Pmode,
5799 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5800 XEXP (XEXP (x, 1), 0)),
5801 XEXP (XEXP (x, 1), 1));
5805 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5806 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5807 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5808 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5809 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5810 && CONSTANT_P (XEXP (x, 1)))
5813 rtx other = NULL_RTX;
5815 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5817 constant = XEXP (x, 1);
5818 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5820 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5822 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5823 other = XEXP (x, 1);
5831 x = gen_rtx_PLUS (Pmode,
5832 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5833 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5834 plus_constant (other, INTVAL (constant)));
5838 if (changed && legitimate_address_p (mode, x, FALSE))
5841 if (GET_CODE (XEXP (x, 0)) == MULT)
5844 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5847 if (GET_CODE (XEXP (x, 1)) == MULT)
5850 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5854 && GET_CODE (XEXP (x, 1)) == REG
5855 && GET_CODE (XEXP (x, 0)) == REG)
5858 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5861 x = legitimize_pic_address (x, 0);
5864 if (changed && legitimate_address_p (mode, x, FALSE))
5867 if (GET_CODE (XEXP (x, 0)) == REG)
5869 rtx temp = gen_reg_rtx (Pmode);
5870 rtx val = force_operand (XEXP (x, 1), temp);
5872 emit_move_insn (temp, val);
5878 else if (GET_CODE (XEXP (x, 1)) == REG)
5880 rtx temp = gen_reg_rtx (Pmode);
5881 rtx val = force_operand (XEXP (x, 0), temp);
5883 emit_move_insn (temp, val);
5893 /* Print an integer constant expression in assembler syntax. Addition
5894 and subtraction are the only arithmetic that may appear in these
5895 expressions. FILE is the stdio stream to write to, X is the rtx, and
5896 CODE is the operand print code from the output string. */
5899 output_pic_addr_const (FILE *file, rtx x, int code)
5903 switch (GET_CODE (x))
5913 assemble_name (file, XSTR (x, 0));
5914 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5915 fputs ("@PLT", file);
5922 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5923 assemble_name (asm_out_file, buf);
5927 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5931 /* This used to output parentheses around the expression,
5932 but that does not work on the 386 (either ATT or BSD assembler). */
5933 output_pic_addr_const (file, XEXP (x, 0), code);
5937 if (GET_MODE (x) == VOIDmode)
5939 /* We can use %d if the number is <32 bits and positive. */
5940 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5941 fprintf (file, "0x%lx%08lx",
5942 (unsigned long) CONST_DOUBLE_HIGH (x),
5943 (unsigned long) CONST_DOUBLE_LOW (x));
5945 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5948 /* We can't handle floating point constants;
5949 PRINT_OPERAND must handle them. */
5950 output_operand_lossage ("floating constant misused");
5954 /* Some assemblers need integer constants to appear first. */
5955 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5957 output_pic_addr_const (file, XEXP (x, 0), code);
5959 output_pic_addr_const (file, XEXP (x, 1), code);
5961 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5963 output_pic_addr_const (file, XEXP (x, 1), code);
5965 output_pic_addr_const (file, XEXP (x, 0), code);
5973 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5974 output_pic_addr_const (file, XEXP (x, 0), code);
5976 output_pic_addr_const (file, XEXP (x, 1), code);
5978 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5982 if (XVECLEN (x, 0) != 1)
5984 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5985 switch (XINT (x, 1))
5988 fputs ("@GOT", file);
5991 fputs ("@GOTOFF", file);
5993 case UNSPEC_GOTPCREL:
5994 fputs ("@GOTPCREL(%rip)", file);
5996 case UNSPEC_GOTTPOFF:
5997 /* FIXME: This might be @TPOFF in Sun ld too. */
5998 fputs ("@GOTTPOFF", file);
6001 fputs ("@TPOFF", file);
6005 fputs ("@TPOFF", file);
6007 fputs ("@NTPOFF", file);
6010 fputs ("@DTPOFF", file);
6012 case UNSPEC_GOTNTPOFF:
6014 fputs ("@GOTTPOFF(%rip)", file);
6016 fputs ("@GOTNTPOFF", file);
6018 case UNSPEC_INDNTPOFF:
6019 fputs ("@INDNTPOFF", file);
6022 output_operand_lossage ("invalid UNSPEC as operand");
6028 output_operand_lossage ("invalid expression as operand");
6032 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6033 We need to emit DTP-relative relocations. */
6036 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6038 fputs (ASM_LONG, file);
6039 output_addr_const (file, x);
6040 fputs ("@DTPOFF", file);
6046 fputs (", 0", file);
6053 /* In the name of slightly smaller debug output, and to cater to
6054 general assembler lossage, recognize PIC+GOTOFF and turn it back
6055 into a direct symbol reference. */
6058 ix86_delegitimize_address (rtx orig_x)
6062 if (GET_CODE (x) == MEM)
6067 if (GET_CODE (x) != CONST
6068 || GET_CODE (XEXP (x, 0)) != UNSPEC
6069 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6070 || GET_CODE (orig_x) != MEM)
6072 return XVECEXP (XEXP (x, 0), 0, 0);
6075 if (GET_CODE (x) != PLUS
6076 || GET_CODE (XEXP (x, 1)) != CONST)
6079 if (GET_CODE (XEXP (x, 0)) == REG
6080 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6081 /* %ebx + GOT/GOTOFF */
6083 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6085 /* %ebx + %reg * scale + GOT/GOTOFF */
6087 if (GET_CODE (XEXP (y, 0)) == REG
6088 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6090 else if (GET_CODE (XEXP (y, 1)) == REG
6091 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6095 if (GET_CODE (y) != REG
6096 && GET_CODE (y) != MULT
6097 && GET_CODE (y) != ASHIFT)
6103 x = XEXP (XEXP (x, 1), 0);
6104 if (GET_CODE (x) == UNSPEC
6105 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6106 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6109 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6110 return XVECEXP (x, 0, 0);
6113 if (GET_CODE (x) == PLUS
6114 && GET_CODE (XEXP (x, 0)) == UNSPEC
6115 && GET_CODE (XEXP (x, 1)) == CONST_INT
6116 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6117 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6118 && GET_CODE (orig_x) != MEM)))
6120 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6122 return gen_rtx_PLUS (Pmode, y, x);
6130 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6135 if (mode == CCFPmode || mode == CCFPUmode)
6137 enum rtx_code second_code, bypass_code;
6138 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6139 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6141 code = ix86_fp_compare_code_to_integer (code);
6145 code = reverse_condition (code);
6156 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6161 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6162 Those same assemblers have the same but opposite lossage on cmov. */
6165 suffix = fp ? "nbe" : "a";
6168 if (mode == CCNOmode || mode == CCGOCmode)
6170 else if (mode == CCmode || mode == CCGCmode)
6181 if (mode == CCNOmode || mode == CCGOCmode)
6183 else if (mode == CCmode || mode == CCGCmode)
6192 suffix = fp ? "nb" : "ae";
6195 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6205 suffix = fp ? "u" : "p";
6208 suffix = fp ? "nu" : "np";
6213 fputs (suffix, file);
6216 /* Print the name of register X to FILE based on its machine mode and number.
6217 If CODE is 'w', pretend the mode is HImode.
6218 If CODE is 'b', pretend the mode is QImode.
6219 If CODE is 'k', pretend the mode is SImode.
6220 If CODE is 'q', pretend the mode is DImode.
6221 If CODE is 'h', pretend the reg is the `high' byte register.
6222 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6225 print_reg (rtx x, int code, FILE *file)
6227 if (REGNO (x) == ARG_POINTER_REGNUM
6228 || REGNO (x) == FRAME_POINTER_REGNUM
6229 || REGNO (x) == FLAGS_REG
6230 || REGNO (x) == FPSR_REG)
6233 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6236 if (code == 'w' || MMX_REG_P (x))
6238 else if (code == 'b')
6240 else if (code == 'k')
6242 else if (code == 'q')
6244 else if (code == 'y')
6246 else if (code == 'h')
6249 code = GET_MODE_SIZE (GET_MODE (x));
6251 /* Irritatingly, AMD extended registers use different naming convention
6252 from the normal registers. */
6253 if (REX_INT_REG_P (x))
6260 error ("extended registers have no high halves");
6263 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6266 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6269 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6272 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6275 error ("unsupported operand size for extended register");
6283 if (STACK_TOP_P (x))
6285 fputs ("st(0)", file);
6292 if (! ANY_FP_REG_P (x))
6293 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6298 fputs (hi_reg_name[REGNO (x)], file);
6301 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6303 fputs (qi_reg_name[REGNO (x)], file);
6306 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6308 fputs (qi_high_reg_name[REGNO (x)], file);
6315 /* Locate some local-dynamic symbol still in use by this function
6316 so that we can print its name in some tls_local_dynamic_base
6320 get_some_local_dynamic_name (void)
6324 if (cfun->machine->some_ld_name)
6325 return cfun->machine->some_ld_name;
6327 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6329 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6330 return cfun->machine->some_ld_name;
6336 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6340 if (GET_CODE (x) == SYMBOL_REF
6341 && local_dynamic_symbolic_operand (x, Pmode))
6343 cfun->machine->some_ld_name = XSTR (x, 0);
6351 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6352 C -- print opcode suffix for set/cmov insn.
6353 c -- like C, but print reversed condition
6354 F,f -- likewise, but for floating-point.
6355 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6357 R -- print the prefix for register names.
6358 z -- print the opcode suffix for the size of the current operand.
6359 * -- print a star (in certain assembler syntax)
6360 A -- print an absolute memory reference.
6361 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6362 s -- print a shift double count, followed by the assemblers argument
6364 b -- print the QImode name of the register for the indicated operand.
6365 %b0 would print %al if operands[0] is reg 0.
6366 w -- likewise, print the HImode name of the register.
6367 k -- likewise, print the SImode name of the register.
6368 q -- likewise, print the DImode name of the register.
6369 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6370 y -- print "st(0)" instead of "st" as a register.
6371 D -- print condition for SSE cmp instruction.
6372 P -- if PIC, print an @PLT suffix.
6373 X -- don't print any sort of PIC '@' suffix for a symbol.
6374 & -- print some in-use local-dynamic symbol name.
6375 H -- print a memory address offset by 8; used for sse high-parts
6379 print_operand (FILE *file, rtx x, int code)
6386 if (ASSEMBLER_DIALECT == ASM_ATT)
6391 assemble_name (file, get_some_local_dynamic_name ());
6395 if (ASSEMBLER_DIALECT == ASM_ATT)
6397 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6399 /* Intel syntax. For absolute addresses, registers should not
6400 be surrounded by braces. */
6401 if (GET_CODE (x) != REG)
6404 PRINT_OPERAND (file, x, 0);
6412 PRINT_OPERAND (file, x, 0);
6417 if (ASSEMBLER_DIALECT == ASM_ATT)
6422 if (ASSEMBLER_DIALECT == ASM_ATT)
6427 if (ASSEMBLER_DIALECT == ASM_ATT)
6432 if (ASSEMBLER_DIALECT == ASM_ATT)
6437 if (ASSEMBLER_DIALECT == ASM_ATT)
6442 if (ASSEMBLER_DIALECT == ASM_ATT)
6447 /* 387 opcodes don't get size suffixes if the operands are
6449 if (STACK_REG_P (x))
6452 /* Likewise if using Intel opcodes. */
6453 if (ASSEMBLER_DIALECT == ASM_INTEL)
6456 /* This is the size of op from size of operand. */
6457 switch (GET_MODE_SIZE (GET_MODE (x)))
6460 #ifdef HAVE_GAS_FILDS_FISTS
6466 if (GET_MODE (x) == SFmode)
6481 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6483 #ifdef GAS_MNEMONICS
6509 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6511 PRINT_OPERAND (file, x, 0);
6517 /* Little bit of braindamage here. The SSE compare instructions
6518 does use completely different names for the comparisons that the
6519 fp conditional moves. */
6520 switch (GET_CODE (x))
6535 fputs ("unord", file);
6539 fputs ("neq", file);
6543 fputs ("nlt", file);
6547 fputs ("nle", file);
6550 fputs ("ord", file);
6558 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6559 if (ASSEMBLER_DIALECT == ASM_ATT)
6561 switch (GET_MODE (x))
6563 case HImode: putc ('w', file); break;
6565 case SFmode: putc ('l', file); break;
6567 case DFmode: putc ('q', file); break;
6575 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6578 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6579 if (ASSEMBLER_DIALECT == ASM_ATT)
6582 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6585 /* Like above, but reverse condition */
6587 /* Check to see if argument to %c is really a constant
6588 and not a condition code which needs to be reversed. */
6589 if (!COMPARISON_P (x))
6591 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6594 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6597 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6598 if (ASSEMBLER_DIALECT == ASM_ATT)
6601 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6605 /* It doesn't actually matter what mode we use here, as we're
6606 only going to use this for printing. */
6607 x = adjust_address_nv (x, DImode, 8);
6614 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6617 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6620 int pred_val = INTVAL (XEXP (x, 0));
6622 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6623 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6625 int taken = pred_val > REG_BR_PROB_BASE / 2;
6626 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6628 /* Emit hints only in the case default branch prediction
6629 heuristics would fail. */
6630 if (taken != cputaken)
6632 /* We use 3e (DS) prefix for taken branches and
6633 2e (CS) prefix for not taken branches. */
6635 fputs ("ds ; ", file);
6637 fputs ("cs ; ", file);
6644 output_operand_lossage ("invalid operand code '%c'", code);
6648 if (GET_CODE (x) == REG)
6649 print_reg (x, code, file);
6651 else if (GET_CODE (x) == MEM)
6653 /* No `byte ptr' prefix for call instructions. */
6654 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6657 switch (GET_MODE_SIZE (GET_MODE (x)))
6659 case 1: size = "BYTE"; break;
6660 case 2: size = "WORD"; break;
6661 case 4: size = "DWORD"; break;
6662 case 8: size = "QWORD"; break;
6663 case 12: size = "XWORD"; break;
6664 case 16: size = "XMMWORD"; break;
6669 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6672 else if (code == 'w')
6674 else if (code == 'k')
6678 fputs (" PTR ", file);
6682 /* Avoid (%rip) for call operands. */
6683 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6684 && GET_CODE (x) != CONST_INT)
6685 output_addr_const (file, x);
6686 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6687 output_operand_lossage ("invalid constraints for operand");
6692 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6697 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6698 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6700 if (ASSEMBLER_DIALECT == ASM_ATT)
6702 fprintf (file, "0x%08lx", l);
6705 /* These float cases don't actually occur as immediate operands. */
6706 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6710 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6711 fprintf (file, "%s", dstr);
6714 else if (GET_CODE (x) == CONST_DOUBLE
6715 && GET_MODE (x) == XFmode)
6719 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6720 fprintf (file, "%s", dstr);
6727 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6729 if (ASSEMBLER_DIALECT == ASM_ATT)
6732 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6733 || GET_CODE (x) == LABEL_REF)
6735 if (ASSEMBLER_DIALECT == ASM_ATT)
6738 fputs ("OFFSET FLAT:", file);
6741 if (GET_CODE (x) == CONST_INT)
6742 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6744 output_pic_addr_const (file, x, code);
6746 output_addr_const (file, x);
6750 /* Print a memory operand whose address is ADDR. */
6753 print_operand_address (FILE *file, rtx addr)
6755 struct ix86_address parts;
6756 rtx base, index, disp;
6759 if (! ix86_decompose_address (addr, &parts))
6763 index = parts.index;
6765 scale = parts.scale;
6773 if (USER_LABEL_PREFIX[0] == 0)
6775 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6781 if (!base && !index)
6783 /* Displacement only requires special attention. */
6785 if (GET_CODE (disp) == CONST_INT)
6787 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6789 if (USER_LABEL_PREFIX[0] == 0)
6791 fputs ("ds:", file);
6793 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6796 output_pic_addr_const (file, disp, 0);
6798 output_addr_const (file, disp);
6800 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6802 && ((GET_CODE (disp) == SYMBOL_REF
6803 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6804 || GET_CODE (disp) == LABEL_REF
6805 || (GET_CODE (disp) == CONST
6806 && GET_CODE (XEXP (disp, 0)) == PLUS
6807 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6808 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6809 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6810 fputs ("(%rip)", file);
6814 if (ASSEMBLER_DIALECT == ASM_ATT)
6819 output_pic_addr_const (file, disp, 0);
6820 else if (GET_CODE (disp) == LABEL_REF)
6821 output_asm_label (disp);
6823 output_addr_const (file, disp);
6828 print_reg (base, 0, file);
6832 print_reg (index, 0, file);
6834 fprintf (file, ",%d", scale);
6840 rtx offset = NULL_RTX;
6844 /* Pull out the offset of a symbol; print any symbol itself. */
6845 if (GET_CODE (disp) == CONST
6846 && GET_CODE (XEXP (disp, 0)) == PLUS
6847 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6849 offset = XEXP (XEXP (disp, 0), 1);
6850 disp = gen_rtx_CONST (VOIDmode,
6851 XEXP (XEXP (disp, 0), 0));
6855 output_pic_addr_const (file, disp, 0);
6856 else if (GET_CODE (disp) == LABEL_REF)
6857 output_asm_label (disp);
6858 else if (GET_CODE (disp) == CONST_INT)
6861 output_addr_const (file, disp);
6867 print_reg (base, 0, file);
6870 if (INTVAL (offset) >= 0)
6872 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6876 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6883 print_reg (index, 0, file);
6885 fprintf (file, "*%d", scale);
6893 output_addr_const_extra (FILE *file, rtx x)
6897 if (GET_CODE (x) != UNSPEC)
6900 op = XVECEXP (x, 0, 0);
6901 switch (XINT (x, 1))
6903 case UNSPEC_GOTTPOFF:
6904 output_addr_const (file, op);
6905 /* FIXME: This might be @TPOFF in Sun ld. */
6906 fputs ("@GOTTPOFF", file);
6909 output_addr_const (file, op);
6910 fputs ("@TPOFF", file);
6913 output_addr_const (file, op);
6915 fputs ("@TPOFF", file);
6917 fputs ("@NTPOFF", file);
6920 output_addr_const (file, op);
6921 fputs ("@DTPOFF", file);
6923 case UNSPEC_GOTNTPOFF:
6924 output_addr_const (file, op);
6926 fputs ("@GOTTPOFF(%rip)", file);
6928 fputs ("@GOTNTPOFF", file);
6930 case UNSPEC_INDNTPOFF:
6931 output_addr_const (file, op);
6932 fputs ("@INDNTPOFF", file);
6942 /* Split one or more DImode RTL references into pairs of SImode
6943 references. The RTL can be REG, offsettable MEM, integer constant, or
6944 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6945 split and "num" is its length. lo_half and hi_half are output arrays
6946 that parallel "operands". */
6949 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6953 rtx op = operands[num];
6955 /* simplify_subreg refuse to split volatile memory addresses,
6956 but we still have to handle it. */
6957 if (GET_CODE (op) == MEM)
6959 lo_half[num] = adjust_address (op, SImode, 0);
6960 hi_half[num] = adjust_address (op, SImode, 4);
6964 lo_half[num] = simplify_gen_subreg (SImode, op,
6965 GET_MODE (op) == VOIDmode
6966 ? DImode : GET_MODE (op), 0);
6967 hi_half[num] = simplify_gen_subreg (SImode, op,
6968 GET_MODE (op) == VOIDmode
6969 ? DImode : GET_MODE (op), 4);
6973 /* Split one or more TImode RTL references into pairs of SImode
6974 references. The RTL can be REG, offsettable MEM, integer constant, or
6975 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6976 split and "num" is its length. lo_half and hi_half are output arrays
6977 that parallel "operands". */
6980 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6984 rtx op = operands[num];
6986 /* simplify_subreg refuse to split volatile memory addresses, but we
6987 still have to handle it. */
6988 if (GET_CODE (op) == MEM)
6990 lo_half[num] = adjust_address (op, DImode, 0);
6991 hi_half[num] = adjust_address (op, DImode, 8);
6995 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6996 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7001 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7002 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7003 is the expression of the binary operation. The output may either be
7004 emitted here, or returned to the caller, like all output_* functions.
7006 There is no guarantee that the operands are the same mode, as they
7007 might be within FLOAT or FLOAT_EXTEND expressions. */
7009 #ifndef SYSV386_COMPAT
7010 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7011 wants to fix the assemblers because that causes incompatibility
7012 with gcc. No-one wants to fix gcc because that causes
7013 incompatibility with assemblers... You can use the option of
7014 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7015 #define SYSV386_COMPAT 1
7019 output_387_binary_op (rtx insn, rtx *operands)
7021 static char buf[30];
7024 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7026 #ifdef ENABLE_CHECKING
7027 /* Even if we do not want to check the inputs, this documents input
7028 constraints. Which helps in understanding the following code. */
7029 if (STACK_REG_P (operands[0])
7030 && ((REG_P (operands[1])
7031 && REGNO (operands[0]) == REGNO (operands[1])
7032 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7033 || (REG_P (operands[2])
7034 && REGNO (operands[0]) == REGNO (operands[2])
7035 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7036 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7042 switch (GET_CODE (operands[3]))
7045 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7046 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7054 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7055 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7063 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7064 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7072 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7073 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7087 if (GET_MODE (operands[0]) == SFmode)
7088 strcat (buf, "ss\t{%2, %0|%0, %2}");
7090 strcat (buf, "sd\t{%2, %0|%0, %2}");
7095 switch (GET_CODE (operands[3]))
7099 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7101 rtx temp = operands[2];
7102 operands[2] = operands[1];
7106 /* know operands[0] == operands[1]. */
7108 if (GET_CODE (operands[2]) == MEM)
7114 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7116 if (STACK_TOP_P (operands[0]))
7117 /* How is it that we are storing to a dead operand[2]?
7118 Well, presumably operands[1] is dead too. We can't
7119 store the result to st(0) as st(0) gets popped on this
7120 instruction. Instead store to operands[2] (which I
7121 think has to be st(1)). st(1) will be popped later.
7122 gcc <= 2.8.1 didn't have this check and generated
7123 assembly code that the Unixware assembler rejected. */
7124 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7126 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7130 if (STACK_TOP_P (operands[0]))
7131 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7133 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7138 if (GET_CODE (operands[1]) == MEM)
7144 if (GET_CODE (operands[2]) == MEM)
7150 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7153 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7154 derived assemblers, confusingly reverse the direction of
7155 the operation for fsub{r} and fdiv{r} when the
7156 destination register is not st(0). The Intel assembler
7157 doesn't have this brain damage. Read !SYSV386_COMPAT to
7158 figure out what the hardware really does. */
7159 if (STACK_TOP_P (operands[0]))
7160 p = "{p\t%0, %2|rp\t%2, %0}";
7162 p = "{rp\t%2, %0|p\t%0, %2}";
7164 if (STACK_TOP_P (operands[0]))
7165 /* As above for fmul/fadd, we can't store to st(0). */
7166 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7168 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7173 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7176 if (STACK_TOP_P (operands[0]))
7177 p = "{rp\t%0, %1|p\t%1, %0}";
7179 p = "{p\t%1, %0|rp\t%0, %1}";
7181 if (STACK_TOP_P (operands[0]))
7182 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7184 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7189 if (STACK_TOP_P (operands[0]))
7191 if (STACK_TOP_P (operands[1]))
7192 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7194 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7197 else if (STACK_TOP_P (operands[1]))
7200 p = "{\t%1, %0|r\t%0, %1}";
7202 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7208 p = "{r\t%2, %0|\t%0, %2}";
7210 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7223 /* Output code to initialize control word copies used by trunc?f?i and
7224 rounding patterns. CURRENT_MODE is set to current control word,
7225 while NEW_MODE is set to new control word. */
7228 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7230 rtx reg = gen_reg_rtx (HImode);
7232 emit_insn (gen_x86_fnstcw_1 (current_mode));
7233 emit_move_insn (reg, current_mode);
7235 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7241 /* round down toward -oo */
7242 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7246 /* round up toward +oo */
7247 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7251 /* round toward zero (truncate) */
7252 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7255 case I387_CW_MASK_PM:
7256 /* mask precision exception for nearbyint() */
7257 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7269 /* round down toward -oo */
7270 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7271 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7275 /* round up toward +oo */
7276 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7277 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7281 /* round toward zero (truncate) */
7282 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7285 case I387_CW_MASK_PM:
7286 /* mask precision exception for nearbyint() */
7287 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7295 emit_move_insn (new_mode, reg);
7298 /* Output code for INSN to convert a float to a signed int. OPERANDS
7299 are the insn operands. The output may be [HSD]Imode and the input
7300 operand may be [SDX]Fmode. */
7303 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
7305 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7306 int dimode_p = GET_MODE (operands[0]) == DImode;
7307 int round_mode = get_attr_i387_cw (insn);
7309 /* Jump through a hoop or two for DImode, since the hardware has no
7310 non-popping instruction. We used to do this a different way, but
7311 that was somewhat fragile and broke with post-reload splitters. */
7312 if ((dimode_p || fisttp) && !stack_top_dies)
7313 output_asm_insn ("fld\t%y1", operands);
7315 if (!STACK_TOP_P (operands[1]))
7318 if (GET_CODE (operands[0]) != MEM)
7322 output_asm_insn ("fisttp%z0\t%0", operands);
7325 if (round_mode != I387_CW_ANY)
7326 output_asm_insn ("fldcw\t%3", operands);
7327 if (stack_top_dies || dimode_p)
7328 output_asm_insn ("fistp%z0\t%0", operands);
7330 output_asm_insn ("fist%z0\t%0", operands);
7331 if (round_mode != I387_CW_ANY)
7332 output_asm_insn ("fldcw\t%2", operands);
7338 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7339 should be used. UNORDERED_P is true when fucom should be used. */
7342 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7345 rtx cmp_op0, cmp_op1;
7346 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7350 cmp_op0 = operands[0];
7351 cmp_op1 = operands[1];
7355 cmp_op0 = operands[1];
7356 cmp_op1 = operands[2];
7361 if (GET_MODE (operands[0]) == SFmode)
7363 return "ucomiss\t{%1, %0|%0, %1}";
7365 return "comiss\t{%1, %0|%0, %1}";
7368 return "ucomisd\t{%1, %0|%0, %1}";
7370 return "comisd\t{%1, %0|%0, %1}";
7373 if (! STACK_TOP_P (cmp_op0))
7376 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7378 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7382 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7383 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7386 return "ftst\n\tfnstsw\t%0";
7389 if (STACK_REG_P (cmp_op1)
7391 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7392 && REGNO (cmp_op1) != FIRST_STACK_REG)
7394 /* If both the top of the 387 stack dies, and the other operand
7395 is also a stack register that dies, then this must be a
7396 `fcompp' float compare */
7400 /* There is no double popping fcomi variant. Fortunately,
7401 eflags is immune from the fstp's cc clobbering. */
7403 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7405 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7406 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7411 return "fucompp\n\tfnstsw\t%0";
7413 return "fcompp\n\tfnstsw\t%0";
7418 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7420 static const char * const alt[16] =
7422 "fcom%z2\t%y2\n\tfnstsw\t%0",
7423 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7424 "fucom%z2\t%y2\n\tfnstsw\t%0",
7425 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7427 "ficom%z2\t%y2\n\tfnstsw\t%0",
7428 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7432 "fcomi\t{%y1, %0|%0, %y1}",
7433 "fcomip\t{%y1, %0|%0, %y1}",
7434 "fucomi\t{%y1, %0|%0, %y1}",
7435 "fucomip\t{%y1, %0|%0, %y1}",
7446 mask = eflags_p << 3;
7447 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7448 mask |= unordered_p << 1;
7449 mask |= stack_top_dies;
7462 ix86_output_addr_vec_elt (FILE *file, int value)
7464 const char *directive = ASM_LONG;
7469 directive = ASM_QUAD;
7475 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7479 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7482 fprintf (file, "%s%s%d-%s%d\n",
7483 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7484 else if (HAVE_AS_GOTOFF_IN_DATA)
7485 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7487 else if (TARGET_MACHO)
7489 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7490 machopic_output_function_base_name (file);
7491 fprintf(file, "\n");
7495 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7496 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7499 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7503 ix86_expand_clear (rtx dest)
7507 /* We play register width games, which are only valid after reload. */
7508 if (!reload_completed)
7511 /* Avoid HImode and its attendant prefix byte. */
7512 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7513 dest = gen_rtx_REG (SImode, REGNO (dest));
7515 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7517 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7518 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7520 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7521 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7527 /* X is an unchanging MEM. If it is a constant pool reference, return
7528 the constant pool rtx, else NULL. */
7531 maybe_get_pool_constant (rtx x)
7533 x = ix86_delegitimize_address (XEXP (x, 0));
7535 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7536 return get_pool_constant (x);
7542 ix86_expand_move (enum machine_mode mode, rtx operands[])
7544 int strict = (reload_in_progress || reload_completed);
7546 enum tls_model model;
7551 if (GET_CODE (op1) == SYMBOL_REF)
7553 model = SYMBOL_REF_TLS_MODEL (op1);
7556 op1 = legitimize_tls_address (op1, model, true);
7557 op1 = force_operand (op1, op0);
7562 else if (GET_CODE (op1) == CONST
7563 && GET_CODE (XEXP (op1, 0)) == PLUS
7564 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7566 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7569 rtx addend = XEXP (XEXP (op1, 0), 1);
7570 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7571 op1 = force_operand (op1, NULL);
7572 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7573 op0, 1, OPTAB_DIRECT);
7579 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7584 rtx temp = ((reload_in_progress
7585 || ((op0 && GET_CODE (op0) == REG)
7587 ? op0 : gen_reg_rtx (Pmode));
7588 op1 = machopic_indirect_data_reference (op1, temp);
7589 op1 = machopic_legitimize_pic_address (op1, mode,
7590 temp == op1 ? 0 : temp);
7592 else if (MACHOPIC_INDIRECT)
7593 op1 = machopic_indirect_data_reference (op1, 0);
7597 if (GET_CODE (op0) == MEM)
7598 op1 = force_reg (Pmode, op1);
7600 op1 = legitimize_address (op1, op1, Pmode);
7601 #endif /* TARGET_MACHO */
7605 if (GET_CODE (op0) == MEM
7606 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7607 || !push_operand (op0, mode))
7608 && GET_CODE (op1) == MEM)
7609 op1 = force_reg (mode, op1);
7611 if (push_operand (op0, mode)
7612 && ! general_no_elim_operand (op1, mode))
7613 op1 = copy_to_mode_reg (mode, op1);
7615 /* Force large constants in 64bit compilation into register
7616 to get them CSEed. */
7617 if (TARGET_64BIT && mode == DImode
7618 && immediate_operand (op1, mode)
7619 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7620 && !register_operand (op0, mode)
7621 && optimize && !reload_completed && !reload_in_progress)
7622 op1 = copy_to_mode_reg (mode, op1);
7624 if (FLOAT_MODE_P (mode))
7626 /* If we are loading a floating point constant to a register,
7627 force the value to memory now, since we'll get better code
7628 out the back end. */
7632 else if (GET_CODE (op1) == CONST_DOUBLE)
7634 op1 = validize_mem (force_const_mem (mode, op1));
7635 if (!register_operand (op0, mode))
7637 rtx temp = gen_reg_rtx (mode);
7638 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7639 emit_move_insn (op0, temp);
7646 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7650 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7652 rtx op0 = operands[0], op1 = operands[1];
7654 /* Force constants other than zero into memory. We do not know how
7655 the instructions used to build constants modify the upper 64 bits
7656 of the register, once we have that information we may be able
7657 to handle some of them more efficiently. */
7658 if ((reload_in_progress | reload_completed) == 0
7659 && register_operand (op0, mode)
7660 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7661 op1 = validize_mem (force_const_mem (mode, op1));
7663 /* Make operand1 a register if it isn't already. */
7665 && !register_operand (op0, mode)
7666 && !register_operand (op1, mode))
7668 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7672 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7675 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7676 straight to ix86_expand_vector_move. */
7679 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7688 /* If we're optimizing for size, movups is the smallest. */
7691 op0 = gen_lowpart (V4SFmode, op0);
7692 op1 = gen_lowpart (V4SFmode, op1);
7693 emit_insn (gen_sse_movups (op0, op1));
7697 /* ??? If we have typed data, then it would appear that using
7698 movdqu is the only way to get unaligned data loaded with
7700 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7702 op0 = gen_lowpart (V16QImode, op0);
7703 op1 = gen_lowpart (V16QImode, op1);
7704 emit_insn (gen_sse2_movdqu (op0, op1));
7708 if (TARGET_SSE2 && mode == V2DFmode)
7712 /* When SSE registers are split into halves, we can avoid
7713 writing to the top half twice. */
7714 if (TARGET_SSE_SPLIT_REGS)
7716 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7721 /* ??? Not sure about the best option for the Intel chips.
7722 The following would seem to satisfy; the register is
7723 entirely cleared, breaking the dependency chain. We
7724 then store to the upper half, with a dependency depth
7725 of one. A rumor has it that Intel recommends two movsd
7726 followed by an unpacklpd, but this is unconfirmed. And
7727 given that the dependency depth of the unpacklpd would
7728 still be one, I'm not sure why this would be better. */
7729 zero = CONST0_RTX (V2DFmode);
7732 m = adjust_address (op1, DFmode, 0);
7733 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7734 m = adjust_address (op1, DFmode, 8);
7735 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7739 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7740 emit_move_insn (op0, CONST0_RTX (mode));
7742 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7744 if (mode != V4SFmode)
7745 op0 = gen_lowpart (V4SFmode, op0);
7746 m = adjust_address (op1, V2SFmode, 0);
7747 emit_insn (gen_sse_loadlps (op0, op0, m));
7748 m = adjust_address (op1, V2SFmode, 8);
7749 emit_insn (gen_sse_loadhps (op0, op0, m));
7752 else if (MEM_P (op0))
7754 /* If we're optimizing for size, movups is the smallest. */
7757 op0 = gen_lowpart (V4SFmode, op0);
7758 op1 = gen_lowpart (V4SFmode, op1);
7759 emit_insn (gen_sse_movups (op0, op1));
7763 /* ??? Similar to above, only less clear because of quote
7764 typeless stores unquote. */
7765 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7766 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7768 op0 = gen_lowpart (V16QImode, op0);
7769 op1 = gen_lowpart (V16QImode, op1);
7770 emit_insn (gen_sse2_movdqu (op0, op1));
7774 if (TARGET_SSE2 && mode == V2DFmode)
7776 m = adjust_address (op0, DFmode, 0);
7777 emit_insn (gen_sse2_storelpd (m, op1));
7778 m = adjust_address (op0, DFmode, 8);
7779 emit_insn (gen_sse2_storehpd (m, op1));
7783 if (mode != V4SFmode)
7784 op1 = gen_lowpart (V4SFmode, op1);
7785 m = adjust_address (op0, V2SFmode, 0);
7786 emit_insn (gen_sse_storelps (m, op1));
7787 m = adjust_address (op0, V2SFmode, 8);
7788 emit_insn (gen_sse_storehps (m, op1));
7795 /* Expand a push in MODE. This is some mode for which we do not support
7796 proper push instructions, at least from the registers that we expect
7797 the value to live in. */
7800 ix86_expand_push (enum machine_mode mode, rtx x)
7804 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
7805 GEN_INT (-GET_MODE_SIZE (mode)),
7806 stack_pointer_rtx, 1, OPTAB_DIRECT);
7807 if (tmp != stack_pointer_rtx)
7808 emit_move_insn (stack_pointer_rtx, tmp);
7810 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
7811 emit_move_insn (tmp, x);
7814 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7815 destination to use for the operation. If different from the true
7816 destination in operands[0], a copy operation will be required. */
7819 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7822 int matching_memory;
7823 rtx src1, src2, dst;
7829 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7830 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7831 && (rtx_equal_p (dst, src2)
7832 || immediate_operand (src1, mode)))
7839 /* If the destination is memory, and we do not have matching source
7840 operands, do things in registers. */
7841 matching_memory = 0;
7842 if (GET_CODE (dst) == MEM)
7844 if (rtx_equal_p (dst, src1))
7845 matching_memory = 1;
7846 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7847 && rtx_equal_p (dst, src2))
7848 matching_memory = 2;
7850 dst = gen_reg_rtx (mode);
7853 /* Both source operands cannot be in memory. */
7854 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7856 if (matching_memory != 2)
7857 src2 = force_reg (mode, src2);
7859 src1 = force_reg (mode, src1);
7862 /* If the operation is not commutable, source 1 cannot be a constant
7863 or non-matching memory. */
7864 if ((CONSTANT_P (src1)
7865 || (!matching_memory && GET_CODE (src1) == MEM))
7866 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7867 src1 = force_reg (mode, src1);
7869 /* If optimizing, copy to regs to improve CSE */
7870 if (optimize && ! no_new_pseudos)
7872 if (GET_CODE (dst) == MEM)
7873 dst = gen_reg_rtx (mode);
7874 if (GET_CODE (src1) == MEM)
7875 src1 = force_reg (mode, src1);
7876 if (GET_CODE (src2) == MEM)
7877 src2 = force_reg (mode, src2);
7880 src1 = operands[1] = src1;
7881 src2 = operands[2] = src2;
7885 /* Similarly, but assume that the destination has already been
7889 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7890 enum machine_mode mode, rtx operands[])
7892 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7893 gcc_assert (dst == operands[0]);
7896 /* Attempt to expand a binary operator. Make the expansion closer to the
7897 actual machine, then just general_operand, which will allow 3 separate
7898 memory references (one output, two input) in a single insn. */
7901 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7904 rtx src1, src2, dst, op, clob;
7906 dst = ix86_fixup_binary_operands (code, mode, operands);
7910 /* Emit the instruction. */
7912 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7913 if (reload_in_progress)
7915 /* Reload doesn't know about the flags register, and doesn't know that
7916 it doesn't want to clobber it. We can only do this with PLUS. */
7923 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7924 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7927 /* Fix up the destination if needed. */
7928 if (dst != operands[0])
7929 emit_move_insn (operands[0], dst);
7932 /* Return TRUE or FALSE depending on whether the binary operator meets the
7933 appropriate constraints. */
7936 ix86_binary_operator_ok (enum rtx_code code,
7937 enum machine_mode mode ATTRIBUTE_UNUSED,
7940 /* Both source operands cannot be in memory. */
7941 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7943 /* If the operation is not commutable, source 1 cannot be a constant. */
7944 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7946 /* If the destination is memory, we must have a matching source operand. */
7947 if (GET_CODE (operands[0]) == MEM
7948 && ! (rtx_equal_p (operands[0], operands[1])
7949 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7950 && rtx_equal_p (operands[0], operands[2]))))
7952 /* If the operation is not commutable and the source 1 is memory, we must
7953 have a matching destination. */
7954 if (GET_CODE (operands[1]) == MEM
7955 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7956 && ! rtx_equal_p (operands[0], operands[1]))
7961 /* Attempt to expand a unary operator. Make the expansion closer to the
7962 actual machine, then just general_operand, which will allow 2 separate
7963 memory references (one output, one input) in a single insn. */
7966 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7969 int matching_memory;
7970 rtx src, dst, op, clob;
7975 /* If the destination is memory, and we do not have matching source
7976 operands, do things in registers. */
7977 matching_memory = 0;
7980 if (rtx_equal_p (dst, src))
7981 matching_memory = 1;
7983 dst = gen_reg_rtx (mode);
7986 /* When source operand is memory, destination must match. */
7987 if (MEM_P (src) && !matching_memory)
7988 src = force_reg (mode, src);
7990 /* If optimizing, copy to regs to improve CSE. */
7991 if (optimize && ! no_new_pseudos)
7993 if (GET_CODE (dst) == MEM)
7994 dst = gen_reg_rtx (mode);
7995 if (GET_CODE (src) == MEM)
7996 src = force_reg (mode, src);
7999 /* Emit the instruction. */
8001 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8002 if (reload_in_progress || code == NOT)
8004 /* Reload doesn't know about the flags register, and doesn't know that
8005 it doesn't want to clobber it. */
8012 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8013 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8016 /* Fix up the destination if needed. */
8017 if (dst != operands[0])
8018 emit_move_insn (operands[0], dst);
8021 /* Return TRUE or FALSE depending on whether the unary operator meets the
8022 appropriate constraints. */
8025 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8026 enum machine_mode mode ATTRIBUTE_UNUSED,
8027 rtx operands[2] ATTRIBUTE_UNUSED)
8029 /* If one of operands is memory, source and destination must match. */
8030 if ((GET_CODE (operands[0]) == MEM
8031 || GET_CODE (operands[1]) == MEM)
8032 && ! rtx_equal_p (operands[0], operands[1]))
8037 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8038 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8039 true, then replicate the mask for all elements of the vector register.
8040 If INVERT is true, then create a mask excluding the sign bit. */
8043 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8045 enum machine_mode vec_mode;
8046 HOST_WIDE_INT hi, lo;
8051 /* Find the sign bit, sign extended to 2*HWI. */
8053 lo = 0x80000000, hi = lo < 0;
8054 else if (HOST_BITS_PER_WIDE_INT >= 64)
8055 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8057 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8062 /* Force this value into the low part of a fp vector constant. */
8063 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8064 mask = gen_lowpart (mode, mask);
8069 v = gen_rtvec (4, mask, mask, mask, mask);
8071 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8072 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8073 vec_mode = V4SFmode;
8078 v = gen_rtvec (2, mask, mask);
8080 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8081 vec_mode = V2DFmode;
8084 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8087 /* Generate code for floating point ABS or NEG. */
8090 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8093 rtx mask, set, use, clob, dst, src;
8094 bool matching_memory;
8095 bool use_sse = false;
8096 bool vector_mode = VECTOR_MODE_P (mode);
8097 enum machine_mode elt_mode = mode;
8101 elt_mode = GET_MODE_INNER (mode);
8104 else if (TARGET_SSE_MATH)
8105 use_sse = SSE_FLOAT_MODE_P (mode);
8107 /* NEG and ABS performed with SSE use bitwise mask operations.
8108 Create the appropriate mask now. */
8110 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8113 /* When not using SSE, we don't use the mask, but prefer to keep the
8114 same general form of the insn pattern to reduce duplication when
8115 it comes time to split. */
8122 /* If the destination is memory, and we don't have matching source
8123 operands, do things in registers. */
8124 matching_memory = false;
8127 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8128 matching_memory = true;
8130 dst = gen_reg_rtx (mode);
8132 if (MEM_P (src) && !matching_memory)
8133 src = force_reg (mode, src);
8137 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8138 set = gen_rtx_SET (VOIDmode, dst, set);
8143 set = gen_rtx_fmt_e (code, mode, src);
8144 set = gen_rtx_SET (VOIDmode, dst, set);
8145 use = gen_rtx_USE (VOIDmode, mask);
8146 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8147 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8150 if (dst != operands[0])
8151 emit_move_insn (operands[0], dst);
8154 /* Expand a copysign operation. Special case operand 0 being a constant. */
8157 ix86_expand_copysign (rtx operands[])
8159 enum machine_mode mode, vmode;
8160 rtx dest, op0, op1, mask, nmask;
8166 mode = GET_MODE (dest);
8167 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8169 if (GET_CODE (op0) == CONST_DOUBLE)
8173 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8174 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8176 if (op0 == CONST0_RTX (mode))
8177 op0 = CONST0_RTX (vmode);
8181 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8182 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8184 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8185 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8188 mask = ix86_build_signbit_mask (mode, 0, 0);
8191 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8193 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8197 nmask = ix86_build_signbit_mask (mode, 0, 1);
8198 mask = ix86_build_signbit_mask (mode, 0, 0);
8201 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8203 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8207 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8208 be a constant, and so has already been expanded into a vector constant. */
8211 ix86_split_copysign_const (rtx operands[])
8213 enum machine_mode mode, vmode;
8214 rtx dest, op0, op1, mask, x;
8221 mode = GET_MODE (dest);
8222 vmode = GET_MODE (mask);
8224 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8225 x = gen_rtx_AND (vmode, dest, mask);
8226 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8228 if (op0 != CONST0_RTX (vmode))
8230 x = gen_rtx_IOR (vmode, dest, op0);
8231 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8235 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8236 so we have to do two masks. */
8239 ix86_split_copysign_var (rtx operands[])
8241 enum machine_mode mode, vmode;
8242 rtx dest, scratch, op0, op1, mask, nmask, x;
8245 scratch = operands[1];
8248 nmask = operands[4];
8251 mode = GET_MODE (dest);
8252 vmode = GET_MODE (mask);
8254 if (rtx_equal_p (op0, op1))
8256 /* Shouldn't happen often (it's useless, obviously), but when it does
8257 we'd generate incorrect code if we continue below. */
8258 emit_move_insn (dest, op0);
8262 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8264 gcc_assert (REGNO (op1) == REGNO (scratch));
8266 x = gen_rtx_AND (vmode, scratch, mask);
8267 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8270 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8271 x = gen_rtx_NOT (vmode, dest);
8272 x = gen_rtx_AND (vmode, x, op0);
8273 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8277 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8279 x = gen_rtx_AND (vmode, scratch, mask);
8281 else /* alternative 2,4 */
8283 gcc_assert (REGNO (mask) == REGNO (scratch));
8284 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8285 x = gen_rtx_AND (vmode, scratch, op1);
8287 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8289 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8291 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8292 x = gen_rtx_AND (vmode, dest, nmask);
8294 else /* alternative 3,4 */
8296 gcc_assert (REGNO (nmask) == REGNO (dest));
8298 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8299 x = gen_rtx_AND (vmode, dest, op0);
8301 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8304 x = gen_rtx_IOR (vmode, dest, scratch);
8305 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8308 /* Return TRUE or FALSE depending on whether the first SET in INSN
8309 has source and destination with matching CC modes, and that the
8310 CC mode is at least as constrained as REQ_MODE. */
8313 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8316 enum machine_mode set_mode;
8318 set = PATTERN (insn);
8319 if (GET_CODE (set) == PARALLEL)
8320 set = XVECEXP (set, 0, 0);
8321 if (GET_CODE (set) != SET)
8323 if (GET_CODE (SET_SRC (set)) != COMPARE)
8326 set_mode = GET_MODE (SET_DEST (set));
8330 if (req_mode != CCNOmode
8331 && (req_mode != CCmode
8332 || XEXP (SET_SRC (set), 1) != const0_rtx))
8336 if (req_mode == CCGCmode)
8340 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8344 if (req_mode == CCZmode)
8354 return (GET_MODE (SET_SRC (set)) == set_mode);
8357 /* Generate insn patterns to do an integer compare of OPERANDS. */
8360 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8362 enum machine_mode cmpmode;
8365 cmpmode = SELECT_CC_MODE (code, op0, op1);
8366 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8368 /* This is very simple, but making the interface the same as in the
8369 FP case makes the rest of the code easier. */
8370 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8371 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8373 /* Return the test that should be put into the flags user, i.e.
8374 the bcc, scc, or cmov instruction. */
8375 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8378 /* Figure out whether to use ordered or unordered fp comparisons.
8379 Return the appropriate mode to use. */
8382 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8384 /* ??? In order to make all comparisons reversible, we do all comparisons
8385 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8386 all forms trapping and nontrapping comparisons, we can make inequality
8387 comparisons trapping again, since it results in better code when using
8388 FCOM based compares. */
8389 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8393 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8395 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8396 return ix86_fp_compare_mode (code);
8399 /* Only zero flag is needed. */
8401 case NE: /* ZF!=0 */
8403 /* Codes needing carry flag. */
8404 case GEU: /* CF=0 */
8405 case GTU: /* CF=0 & ZF=0 */
8406 case LTU: /* CF=1 */
8407 case LEU: /* CF=1 | ZF=1 */
8409 /* Codes possibly doable only with sign flag when
8410 comparing against zero. */
8411 case GE: /* SF=OF or SF=0 */
8412 case LT: /* SF<>OF or SF=1 */
8413 if (op1 == const0_rtx)
8416 /* For other cases Carry flag is not required. */
8418 /* Codes doable only with sign flag when comparing
8419 against zero, but we miss jump instruction for it
8420 so we need to use relational tests against overflow
8421 that thus needs to be zero. */
8422 case GT: /* ZF=0 & SF=OF */
8423 case LE: /* ZF=1 | SF<>OF */
8424 if (op1 == const0_rtx)
8428 /* strcmp pattern do (use flags) and combine may ask us for proper
8437 /* Return the fixed registers used for condition codes. */
8440 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8447 /* If two condition code modes are compatible, return a condition code
8448 mode which is compatible with both. Otherwise, return
8451 static enum machine_mode
8452 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8457 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8460 if ((m1 == CCGCmode && m2 == CCGOCmode)
8461 || (m1 == CCGOCmode && m2 == CCGCmode))
8489 /* These are only compatible with themselves, which we already
8495 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8498 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8500 enum rtx_code swapped_code = swap_condition (code);
8501 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8502 || (ix86_fp_comparison_cost (swapped_code)
8503 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8506 /* Swap, force into registers, or otherwise massage the two operands
8507 to a fp comparison. The operands are updated in place; the new
8508 comparison code is returned. */
8510 static enum rtx_code
8511 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8513 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8514 rtx op0 = *pop0, op1 = *pop1;
8515 enum machine_mode op_mode = GET_MODE (op0);
8516 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8518 /* All of the unordered compare instructions only work on registers.
8519 The same is true of the fcomi compare instructions. The same is
8520 true of the XFmode compare instructions if not comparing with
8521 zero (ftst insn is used in this case). */
8524 && (fpcmp_mode == CCFPUmode
8525 || (op_mode == XFmode
8526 && ! (standard_80387_constant_p (op0) == 1
8527 || standard_80387_constant_p (op1) == 1))
8528 || ix86_use_fcomi_compare (code)))
8530 op0 = force_reg (op_mode, op0);
8531 op1 = force_reg (op_mode, op1);
8535 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8536 things around if they appear profitable, otherwise force op0
8539 if (standard_80387_constant_p (op0) == 0
8540 || (GET_CODE (op0) == MEM
8541 && ! (standard_80387_constant_p (op1) == 0
8542 || GET_CODE (op1) == MEM)))
8545 tmp = op0, op0 = op1, op1 = tmp;
8546 code = swap_condition (code);
8549 if (GET_CODE (op0) != REG)
8550 op0 = force_reg (op_mode, op0);
8552 if (CONSTANT_P (op1))
8554 int tmp = standard_80387_constant_p (op1);
8556 op1 = validize_mem (force_const_mem (op_mode, op1));
8560 op1 = force_reg (op_mode, op1);
8563 op1 = force_reg (op_mode, op1);
8567 /* Try to rearrange the comparison to make it cheaper. */
8568 if (ix86_fp_comparison_cost (code)
8569 > ix86_fp_comparison_cost (swap_condition (code))
8570 && (GET_CODE (op1) == REG || !no_new_pseudos))
8573 tmp = op0, op0 = op1, op1 = tmp;
8574 code = swap_condition (code);
8575 if (GET_CODE (op0) != REG)
8576 op0 = force_reg (op_mode, op0);
8584 /* Convert comparison codes we use to represent FP comparison to integer
8585 code that will result in proper branch. Return UNKNOWN if no such code
8589 ix86_fp_compare_code_to_integer (enum rtx_code code)
8618 /* Split comparison code CODE into comparisons we can do using branch
8619 instructions. BYPASS_CODE is comparison code for branch that will
8620 branch around FIRST_CODE and SECOND_CODE. If some of branches
8621 is not required, set value to UNKNOWN.
8622 We never require more than two branches. */
8625 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8626 enum rtx_code *first_code,
8627 enum rtx_code *second_code)
8630 *bypass_code = UNKNOWN;
8631 *second_code = UNKNOWN;
8633 /* The fcomi comparison sets flags as follows:
8643 case GT: /* GTU - CF=0 & ZF=0 */
8644 case GE: /* GEU - CF=0 */
8645 case ORDERED: /* PF=0 */
8646 case UNORDERED: /* PF=1 */
8647 case UNEQ: /* EQ - ZF=1 */
8648 case UNLT: /* LTU - CF=1 */
8649 case UNLE: /* LEU - CF=1 | ZF=1 */
8650 case LTGT: /* EQ - ZF=0 */
8652 case LT: /* LTU - CF=1 - fails on unordered */
8654 *bypass_code = UNORDERED;
8656 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8658 *bypass_code = UNORDERED;
8660 case EQ: /* EQ - ZF=1 - fails on unordered */
8662 *bypass_code = UNORDERED;
8664 case NE: /* NE - ZF=0 - fails on unordered */
8666 *second_code = UNORDERED;
8668 case UNGE: /* GEU - CF=0 - fails on unordered */
8670 *second_code = UNORDERED;
8672 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8674 *second_code = UNORDERED;
8679 if (!TARGET_IEEE_FP)
8681 *second_code = UNKNOWN;
8682 *bypass_code = UNKNOWN;
8686 /* Return cost of comparison done fcom + arithmetics operations on AX.
8687 All following functions do use number of instructions as a cost metrics.
8688 In future this should be tweaked to compute bytes for optimize_size and
8689 take into account performance of various instructions on various CPUs. */
8691 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8693 if (!TARGET_IEEE_FP)
8695 /* The cost of code output by ix86_expand_fp_compare. */
8723 /* Return cost of comparison done using fcomi operation.
8724 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8726 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8728 enum rtx_code bypass_code, first_code, second_code;
8729 /* Return arbitrarily high cost when instruction is not supported - this
8730 prevents gcc from using it. */
8733 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8734 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8737 /* Return cost of comparison done using sahf operation.
8738 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8740 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8742 enum rtx_code bypass_code, first_code, second_code;
8743 /* Return arbitrarily high cost when instruction is not preferred - this
8744 avoids gcc from using it. */
8745 if (!TARGET_USE_SAHF && !optimize_size)
8747 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8748 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8751 /* Compute cost of the comparison done using any method.
8752 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8754 ix86_fp_comparison_cost (enum rtx_code code)
8756 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8759 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8760 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8762 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8763 if (min > sahf_cost)
8765 if (min > fcomi_cost)
8770 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8773 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8774 rtx *second_test, rtx *bypass_test)
8776 enum machine_mode fpcmp_mode, intcmp_mode;
8778 int cost = ix86_fp_comparison_cost (code);
8779 enum rtx_code bypass_code, first_code, second_code;
8781 fpcmp_mode = ix86_fp_compare_mode (code);
8782 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8785 *second_test = NULL_RTX;
8787 *bypass_test = NULL_RTX;
8789 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8791 /* Do fcomi/sahf based test when profitable. */
8792 if ((bypass_code == UNKNOWN || bypass_test)
8793 && (second_code == UNKNOWN || second_test)
8794 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8798 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8799 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8805 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8806 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8808 scratch = gen_reg_rtx (HImode);
8809 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8810 emit_insn (gen_x86_sahf_1 (scratch));
8813 /* The FP codes work out to act like unsigned. */
8814 intcmp_mode = fpcmp_mode;
8816 if (bypass_code != UNKNOWN)
8817 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8818 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8820 if (second_code != UNKNOWN)
8821 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8822 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8827 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8828 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8829 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8831 scratch = gen_reg_rtx (HImode);
8832 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8834 /* In the unordered case, we have to check C2 for NaN's, which
8835 doesn't happen to work out to anything nice combination-wise.
8836 So do some bit twiddling on the value we've got in AH to come
8837 up with an appropriate set of condition codes. */
8839 intcmp_mode = CCNOmode;
8844 if (code == GT || !TARGET_IEEE_FP)
8846 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8851 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8852 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8853 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8854 intcmp_mode = CCmode;
8860 if (code == LT && TARGET_IEEE_FP)
8862 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8863 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8864 intcmp_mode = CCmode;
8869 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8875 if (code == GE || !TARGET_IEEE_FP)
8877 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8882 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8883 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8890 if (code == LE && TARGET_IEEE_FP)
8892 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8893 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8894 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8895 intcmp_mode = CCmode;
8900 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8906 if (code == EQ && TARGET_IEEE_FP)
8908 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8909 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8910 intcmp_mode = CCmode;
8915 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8922 if (code == NE && TARGET_IEEE_FP)
8924 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8925 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8931 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8937 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8941 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8950 /* Return the test that should be put into the flags user, i.e.
8951 the bcc, scc, or cmov instruction. */
8952 return gen_rtx_fmt_ee (code, VOIDmode,
8953 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8958 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8961 op0 = ix86_compare_op0;
8962 op1 = ix86_compare_op1;
8965 *second_test = NULL_RTX;
8967 *bypass_test = NULL_RTX;
8969 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8970 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8971 second_test, bypass_test);
8973 ret = ix86_expand_int_compare (code, op0, op1);
8978 /* Return true if the CODE will result in nontrivial jump sequence. */
8980 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8982 enum rtx_code bypass_code, first_code, second_code;
8985 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8986 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8990 ix86_expand_branch (enum rtx_code code, rtx label)
8994 switch (GET_MODE (ix86_compare_op0))
9000 tmp = ix86_expand_compare (code, NULL, NULL);
9001 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9002 gen_rtx_LABEL_REF (VOIDmode, label),
9004 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9013 enum rtx_code bypass_code, first_code, second_code;
9015 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9018 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9020 /* Check whether we will use the natural sequence with one jump. If
9021 so, we can expand jump early. Otherwise delay expansion by
9022 creating compound insn to not confuse optimizers. */
9023 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9026 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9027 gen_rtx_LABEL_REF (VOIDmode, label),
9028 pc_rtx, NULL_RTX, NULL_RTX);
9032 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9033 ix86_compare_op0, ix86_compare_op1);
9034 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9035 gen_rtx_LABEL_REF (VOIDmode, label),
9037 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9039 use_fcomi = ix86_use_fcomi_compare (code);
9040 vec = rtvec_alloc (3 + !use_fcomi);
9041 RTVEC_ELT (vec, 0) = tmp;
9043 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9045 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9048 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9050 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9058 /* Expand DImode branch into multiple compare+branch. */
9060 rtx lo[2], hi[2], label2;
9061 enum rtx_code code1, code2, code3;
9063 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9065 tmp = ix86_compare_op0;
9066 ix86_compare_op0 = ix86_compare_op1;
9067 ix86_compare_op1 = tmp;
9068 code = swap_condition (code);
9070 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9071 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9073 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9074 avoid two branches. This costs one extra insn, so disable when
9075 optimizing for size. */
9077 if ((code == EQ || code == NE)
9079 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9084 if (hi[1] != const0_rtx)
9085 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9086 NULL_RTX, 0, OPTAB_WIDEN);
9089 if (lo[1] != const0_rtx)
9090 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9091 NULL_RTX, 0, OPTAB_WIDEN);
9093 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9094 NULL_RTX, 0, OPTAB_WIDEN);
9096 ix86_compare_op0 = tmp;
9097 ix86_compare_op1 = const0_rtx;
9098 ix86_expand_branch (code, label);
9102 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9103 op1 is a constant and the low word is zero, then we can just
9104 examine the high word. */
9106 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9109 case LT: case LTU: case GE: case GEU:
9110 ix86_compare_op0 = hi[0];
9111 ix86_compare_op1 = hi[1];
9112 ix86_expand_branch (code, label);
9118 /* Otherwise, we need two or three jumps. */
9120 label2 = gen_label_rtx ();
9123 code2 = swap_condition (code);
9124 code3 = unsigned_condition (code);
9128 case LT: case GT: case LTU: case GTU:
9131 case LE: code1 = LT; code2 = GT; break;
9132 case GE: code1 = GT; code2 = LT; break;
9133 case LEU: code1 = LTU; code2 = GTU; break;
9134 case GEU: code1 = GTU; code2 = LTU; break;
9136 case EQ: code1 = UNKNOWN; code2 = NE; break;
9137 case NE: code2 = UNKNOWN; break;
9145 * if (hi(a) < hi(b)) goto true;
9146 * if (hi(a) > hi(b)) goto false;
9147 * if (lo(a) < lo(b)) goto true;
9151 ix86_compare_op0 = hi[0];
9152 ix86_compare_op1 = hi[1];
9154 if (code1 != UNKNOWN)
9155 ix86_expand_branch (code1, label);
9156 if (code2 != UNKNOWN)
9157 ix86_expand_branch (code2, label2);
9159 ix86_compare_op0 = lo[0];
9160 ix86_compare_op1 = lo[1];
9161 ix86_expand_branch (code3, label);
9163 if (code2 != UNKNOWN)
9164 emit_label (label2);
9173 /* Split branch based on floating point condition. */
9175 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9176 rtx target1, rtx target2, rtx tmp, rtx pushed)
9179 rtx label = NULL_RTX;
9181 int bypass_probability = -1, second_probability = -1, probability = -1;
9184 if (target2 != pc_rtx)
9187 code = reverse_condition_maybe_unordered (code);
9192 condition = ix86_expand_fp_compare (code, op1, op2,
9193 tmp, &second, &bypass);
9195 /* Remove pushed operand from stack. */
9197 ix86_free_from_memory (GET_MODE (pushed));
9199 if (split_branch_probability >= 0)
9201 /* Distribute the probabilities across the jumps.
9202 Assume the BYPASS and SECOND to be always test
9204 probability = split_branch_probability;
9206 /* Value of 1 is low enough to make no need for probability
9207 to be updated. Later we may run some experiments and see
9208 if unordered values are more frequent in practice. */
9210 bypass_probability = 1;
9212 second_probability = 1;
9214 if (bypass != NULL_RTX)
9216 label = gen_label_rtx ();
9217 i = emit_jump_insn (gen_rtx_SET
9219 gen_rtx_IF_THEN_ELSE (VOIDmode,
9221 gen_rtx_LABEL_REF (VOIDmode,
9224 if (bypass_probability >= 0)
9226 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9227 GEN_INT (bypass_probability),
9230 i = emit_jump_insn (gen_rtx_SET
9232 gen_rtx_IF_THEN_ELSE (VOIDmode,
9233 condition, target1, target2)));
9234 if (probability >= 0)
9236 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9237 GEN_INT (probability),
9239 if (second != NULL_RTX)
9241 i = emit_jump_insn (gen_rtx_SET
9243 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9245 if (second_probability >= 0)
9247 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9248 GEN_INT (second_probability),
9251 if (label != NULL_RTX)
9256 ix86_expand_setcc (enum rtx_code code, rtx dest)
9258 rtx ret, tmp, tmpreg, equiv;
9259 rtx second_test, bypass_test;
9261 if (GET_MODE (ix86_compare_op0) == DImode
9263 return 0; /* FAIL */
9265 if (GET_MODE (dest) != QImode)
9268 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9269 PUT_MODE (ret, QImode);
9274 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9275 if (bypass_test || second_test)
9277 rtx test = second_test;
9279 rtx tmp2 = gen_reg_rtx (QImode);
9286 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9288 PUT_MODE (test, QImode);
9289 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9292 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9294 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9297 /* Attach a REG_EQUAL note describing the comparison result. */
9298 equiv = simplify_gen_relational (code, QImode,
9299 GET_MODE (ix86_compare_op0),
9300 ix86_compare_op0, ix86_compare_op1);
9301 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9303 return 1; /* DONE */
9306 /* Expand comparison setting or clearing carry flag. Return true when
9307 successful and set pop for the operation. */
9309 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9311 enum machine_mode mode =
9312 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9314 /* Do not handle DImode compares that go trought special path. Also we can't
9315 deal with FP compares yet. This is possible to add. */
9316 if ((mode == DImode && !TARGET_64BIT))
9318 if (FLOAT_MODE_P (mode))
9320 rtx second_test = NULL, bypass_test = NULL;
9321 rtx compare_op, compare_seq;
9323 /* Shortcut: following common codes never translate into carry flag compares. */
9324 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9325 || code == ORDERED || code == UNORDERED)
9328 /* These comparisons require zero flag; swap operands so they won't. */
9329 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9335 code = swap_condition (code);
9338 /* Try to expand the comparison and verify that we end up with carry flag
9339 based comparison. This is fails to be true only when we decide to expand
9340 comparison using arithmetic that is not too common scenario. */
9342 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9343 &second_test, &bypass_test);
9344 compare_seq = get_insns ();
9347 if (second_test || bypass_test)
9349 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9350 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9351 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9353 code = GET_CODE (compare_op);
9354 if (code != LTU && code != GEU)
9356 emit_insn (compare_seq);
9360 if (!INTEGRAL_MODE_P (mode))
9368 /* Convert a==0 into (unsigned)a<1. */
9371 if (op1 != const0_rtx)
9374 code = (code == EQ ? LTU : GEU);
9377 /* Convert a>b into b<a or a>=b-1. */
9380 if (GET_CODE (op1) == CONST_INT)
9382 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9383 /* Bail out on overflow. We still can swap operands but that
9384 would force loading of the constant into register. */
9385 if (op1 == const0_rtx
9386 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9388 code = (code == GTU ? GEU : LTU);
9395 code = (code == GTU ? LTU : GEU);
9399 /* Convert a>=0 into (unsigned)a<0x80000000. */
9402 if (mode == DImode || op1 != const0_rtx)
9404 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9405 code = (code == LT ? GEU : LTU);
9409 if (mode == DImode || op1 != constm1_rtx)
9411 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9412 code = (code == LE ? GEU : LTU);
9418 /* Swapping operands may cause constant to appear as first operand. */
9419 if (!nonimmediate_operand (op0, VOIDmode))
9423 op0 = force_reg (mode, op0);
9425 ix86_compare_op0 = op0;
9426 ix86_compare_op1 = op1;
9427 *pop = ix86_expand_compare (code, NULL, NULL);
9428 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9434 ix86_expand_int_movcc (rtx operands[])
9436 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9437 rtx compare_seq, compare_op;
9438 rtx second_test, bypass_test;
9439 enum machine_mode mode = GET_MODE (operands[0]);
9440 bool sign_bit_compare_p = false;;
9443 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9444 compare_seq = get_insns ();
9447 compare_code = GET_CODE (compare_op);
9449 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9450 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9451 sign_bit_compare_p = true;
9453 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9454 HImode insns, we'd be swallowed in word prefix ops. */
9456 if ((mode != HImode || TARGET_FAST_PREFIX)
9457 && (mode != DImode || TARGET_64BIT)
9458 && GET_CODE (operands[2]) == CONST_INT
9459 && GET_CODE (operands[3]) == CONST_INT)
9461 rtx out = operands[0];
9462 HOST_WIDE_INT ct = INTVAL (operands[2]);
9463 HOST_WIDE_INT cf = INTVAL (operands[3]);
9467 /* Sign bit compares are better done using shifts than we do by using
9469 if (sign_bit_compare_p
9470 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9471 ix86_compare_op1, &compare_op))
9473 /* Detect overlap between destination and compare sources. */
9476 if (!sign_bit_compare_p)
9480 compare_code = GET_CODE (compare_op);
9482 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9483 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9486 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9489 /* To simplify rest of code, restrict to the GEU case. */
9490 if (compare_code == LTU)
9492 HOST_WIDE_INT tmp = ct;
9495 compare_code = reverse_condition (compare_code);
9496 code = reverse_condition (code);
9501 PUT_CODE (compare_op,
9502 reverse_condition_maybe_unordered
9503 (GET_CODE (compare_op)));
9505 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9509 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9510 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9511 tmp = gen_reg_rtx (mode);
9514 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9516 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9520 if (code == GT || code == GE)
9521 code = reverse_condition (code);
9524 HOST_WIDE_INT tmp = ct;
9529 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9530 ix86_compare_op1, VOIDmode, 0, -1);
9543 tmp = expand_simple_binop (mode, PLUS,
9545 copy_rtx (tmp), 1, OPTAB_DIRECT);
9556 tmp = expand_simple_binop (mode, IOR,
9558 copy_rtx (tmp), 1, OPTAB_DIRECT);
9560 else if (diff == -1 && ct)
9570 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9572 tmp = expand_simple_binop (mode, PLUS,
9573 copy_rtx (tmp), GEN_INT (cf),
9574 copy_rtx (tmp), 1, OPTAB_DIRECT);
9582 * andl cf - ct, dest
9592 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9595 tmp = expand_simple_binop (mode, AND,
9597 gen_int_mode (cf - ct, mode),
9598 copy_rtx (tmp), 1, OPTAB_DIRECT);
9600 tmp = expand_simple_binop (mode, PLUS,
9601 copy_rtx (tmp), GEN_INT (ct),
9602 copy_rtx (tmp), 1, OPTAB_DIRECT);
9605 if (!rtx_equal_p (tmp, out))
9606 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9608 return 1; /* DONE */
9614 tmp = ct, ct = cf, cf = tmp;
9616 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9618 /* We may be reversing unordered compare to normal compare, that
9619 is not valid in general (we may convert non-trapping condition
9620 to trapping one), however on i386 we currently emit all
9621 comparisons unordered. */
9622 compare_code = reverse_condition_maybe_unordered (compare_code);
9623 code = reverse_condition_maybe_unordered (code);
9627 compare_code = reverse_condition (compare_code);
9628 code = reverse_condition (code);
9632 compare_code = UNKNOWN;
9633 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9634 && GET_CODE (ix86_compare_op1) == CONST_INT)
9636 if (ix86_compare_op1 == const0_rtx
9637 && (code == LT || code == GE))
9638 compare_code = code;
9639 else if (ix86_compare_op1 == constm1_rtx)
9643 else if (code == GT)
9648 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9649 if (compare_code != UNKNOWN
9650 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9651 && (cf == -1 || ct == -1))
9653 /* If lea code below could be used, only optimize
9654 if it results in a 2 insn sequence. */
9656 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9657 || diff == 3 || diff == 5 || diff == 9)
9658 || (compare_code == LT && ct == -1)
9659 || (compare_code == GE && cf == -1))
9662 * notl op1 (if necessary)
9670 code = reverse_condition (code);
9673 out = emit_store_flag (out, code, ix86_compare_op0,
9674 ix86_compare_op1, VOIDmode, 0, -1);
9676 out = expand_simple_binop (mode, IOR,
9678 out, 1, OPTAB_DIRECT);
9679 if (out != operands[0])
9680 emit_move_insn (operands[0], out);
9682 return 1; /* DONE */
9687 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9688 || diff == 3 || diff == 5 || diff == 9)
9689 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9691 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9697 * lea cf(dest*(ct-cf)),dest
9701 * This also catches the degenerate setcc-only case.
9707 out = emit_store_flag (out, code, ix86_compare_op0,
9708 ix86_compare_op1, VOIDmode, 0, 1);
9711 /* On x86_64 the lea instruction operates on Pmode, so we need
9712 to get arithmetics done in proper mode to match. */
9714 tmp = copy_rtx (out);
9718 out1 = copy_rtx (out);
9719 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9723 tmp = gen_rtx_PLUS (mode, tmp, out1);
9729 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9732 if (!rtx_equal_p (tmp, out))
9735 out = force_operand (tmp, copy_rtx (out));
9737 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9739 if (!rtx_equal_p (out, operands[0]))
9740 emit_move_insn (operands[0], copy_rtx (out));
9742 return 1; /* DONE */
9746 * General case: Jumpful:
9747 * xorl dest,dest cmpl op1, op2
9748 * cmpl op1, op2 movl ct, dest
9750 * decl dest movl cf, dest
9751 * andl (cf-ct),dest 1:
9756 * This is reasonably steep, but branch mispredict costs are
9757 * high on modern cpus, so consider failing only if optimizing
9761 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9762 && BRANCH_COST >= 2)
9768 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9769 /* We may be reversing unordered compare to normal compare,
9770 that is not valid in general (we may convert non-trapping
9771 condition to trapping one), however on i386 we currently
9772 emit all comparisons unordered. */
9773 code = reverse_condition_maybe_unordered (code);
9776 code = reverse_condition (code);
9777 if (compare_code != UNKNOWN)
9778 compare_code = reverse_condition (compare_code);
9782 if (compare_code != UNKNOWN)
9784 /* notl op1 (if needed)
9789 For x < 0 (resp. x <= -1) there will be no notl,
9790 so if possible swap the constants to get rid of the
9792 True/false will be -1/0 while code below (store flag
9793 followed by decrement) is 0/-1, so the constants need
9794 to be exchanged once more. */
9796 if (compare_code == GE || !cf)
9798 code = reverse_condition (code);
9803 HOST_WIDE_INT tmp = cf;
9808 out = emit_store_flag (out, code, ix86_compare_op0,
9809 ix86_compare_op1, VOIDmode, 0, -1);
9813 out = emit_store_flag (out, code, ix86_compare_op0,
9814 ix86_compare_op1, VOIDmode, 0, 1);
9816 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9817 copy_rtx (out), 1, OPTAB_DIRECT);
9820 out = expand_simple_binop (mode, AND, copy_rtx (out),
9821 gen_int_mode (cf - ct, mode),
9822 copy_rtx (out), 1, OPTAB_DIRECT);
9824 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9825 copy_rtx (out), 1, OPTAB_DIRECT);
9826 if (!rtx_equal_p (out, operands[0]))
9827 emit_move_insn (operands[0], copy_rtx (out));
9829 return 1; /* DONE */
9833 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9835 /* Try a few things more with specific constants and a variable. */
9838 rtx var, orig_out, out, tmp;
9840 if (BRANCH_COST <= 2)
9841 return 0; /* FAIL */
9843 /* If one of the two operands is an interesting constant, load a
9844 constant with the above and mask it in with a logical operation. */
9846 if (GET_CODE (operands[2]) == CONST_INT)
9849 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9850 operands[3] = constm1_rtx, op = and_optab;
9851 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9852 operands[3] = const0_rtx, op = ior_optab;
9854 return 0; /* FAIL */
9856 else if (GET_CODE (operands[3]) == CONST_INT)
9859 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9860 operands[2] = constm1_rtx, op = and_optab;
9861 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9862 operands[2] = const0_rtx, op = ior_optab;
9864 return 0; /* FAIL */
9867 return 0; /* FAIL */
9869 orig_out = operands[0];
9870 tmp = gen_reg_rtx (mode);
9873 /* Recurse to get the constant loaded. */
9874 if (ix86_expand_int_movcc (operands) == 0)
9875 return 0; /* FAIL */
9877 /* Mask in the interesting variable. */
9878 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9880 if (!rtx_equal_p (out, orig_out))
9881 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9883 return 1; /* DONE */
9887 * For comparison with above,
9897 if (! nonimmediate_operand (operands[2], mode))
9898 operands[2] = force_reg (mode, operands[2]);
9899 if (! nonimmediate_operand (operands[3], mode))
9900 operands[3] = force_reg (mode, operands[3]);
9902 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9904 rtx tmp = gen_reg_rtx (mode);
9905 emit_move_insn (tmp, operands[3]);
9908 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9910 rtx tmp = gen_reg_rtx (mode);
9911 emit_move_insn (tmp, operands[2]);
9915 if (! register_operand (operands[2], VOIDmode)
9917 || ! register_operand (operands[3], VOIDmode)))
9918 operands[2] = force_reg (mode, operands[2]);
9921 && ! register_operand (operands[3], VOIDmode))
9922 operands[3] = force_reg (mode, operands[3]);
9924 emit_insn (compare_seq);
9925 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9926 gen_rtx_IF_THEN_ELSE (mode,
9927 compare_op, operands[2],
9930 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9931 gen_rtx_IF_THEN_ELSE (mode,
9933 copy_rtx (operands[3]),
9934 copy_rtx (operands[0]))));
9936 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9937 gen_rtx_IF_THEN_ELSE (mode,
9939 copy_rtx (operands[2]),
9940 copy_rtx (operands[0]))));
9942 return 1; /* DONE */
9946 ix86_expand_fp_movcc (rtx operands[])
9948 enum machine_mode mode = GET_MODE (operands[0]);
9949 enum rtx_code code = GET_CODE (operands[1]);
9950 rtx tmp, compare_op, second_test, bypass_test;
9952 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
9954 rtx cmp_op0, cmp_op1, if_true, if_false;
9956 enum machine_mode vmode, cmode;
9957 bool is_minmax = false;
9959 cmp_op0 = ix86_compare_op0;
9960 cmp_op1 = ix86_compare_op1;
9961 if_true = operands[2];
9962 if_false = operands[3];
9964 /* Since we've no cmove for sse registers, don't force bad register
9965 allocation just to gain access to it. Deny movcc when the
9966 comparison mode doesn't match the move mode. */
9967 cmode = GET_MODE (cmp_op0);
9968 if (cmode == VOIDmode)
9969 cmode = GET_MODE (cmp_op1);
9973 /* We have no LTGT as an operator. We could implement it with
9974 NE & ORDERED, but this requires an extra temporary. It's
9975 not clear that it's worth it. */
9976 if (code == LTGT || code == UNEQ)
9979 /* Massage condition to satisfy sse_comparison_operator. Try
9980 to canonicalize the destination operand to be first in the
9981 comparison - this helps reload to avoid extra moves. */
9982 if (!sse_comparison_operator (operands[1], VOIDmode)
9983 || (COMMUTATIVE_P (operands[1])
9984 && rtx_equal_p (operands[0], cmp_op1)))
9989 code = swap_condition (code);
9992 /* Detect conditional moves that exactly match min/max operational
9993 semantics. Note that this is IEEE safe, as long as we don't
9994 interchange the operands. Which is why we keep this in the form
9995 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
9996 if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
9998 if (((cmp_op0 == if_true && cmp_op1 == if_false)
9999 || (cmp_op0 == if_false && cmp_op1 == if_true)))
10006 if_true = if_false;
10012 if (mode == SFmode)
10014 else if (mode == DFmode)
10017 gcc_unreachable ();
10019 cmp_op0 = force_reg (mode, cmp_op0);
10020 if (!nonimmediate_operand (cmp_op1, mode))
10021 cmp_op1 = force_reg (mode, cmp_op1);
10023 tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10024 gcc_assert (sse_comparison_operator (tmp, VOIDmode));
10026 tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
10027 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
10031 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
10032 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10039 /* The floating point conditional move instructions don't directly
10040 support conditions resulting from a signed integer comparison. */
10042 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10044 /* The floating point conditional move instructions don't directly
10045 support signed integer comparisons. */
10047 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10049 if (second_test != NULL || bypass_test != NULL)
10051 tmp = gen_reg_rtx (QImode);
10052 ix86_expand_setcc (code, tmp);
10054 ix86_compare_op0 = tmp;
10055 ix86_compare_op1 = const0_rtx;
10056 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10058 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10060 tmp = gen_reg_rtx (mode);
10061 emit_move_insn (tmp, operands[3]);
10064 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10066 tmp = gen_reg_rtx (mode);
10067 emit_move_insn (tmp, operands[2]);
10071 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10072 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10073 operands[2], operands[3])));
10075 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10076 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10077 operands[3], operands[0])));
10079 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10080 gen_rtx_IF_THEN_ELSE (mode, second_test,
10081 operands[2], operands[0])));
10087 ix86_split_sse_movcc (rtx operands[])
10089 rtx dest, scratch, cmp, op_true, op_false, x;
10090 enum machine_mode mode, vmode;
10092 /* Note that the operator CMP has been set up with matching constraints
10093 such that dest is valid for the comparison. Unless one of the true
10094 or false operands are zero, the true operand has already been placed
10096 dest = operands[0];
10097 scratch = operands[1];
10098 op_true = operands[2];
10099 op_false = operands[3];
10102 mode = GET_MODE (dest);
10103 vmode = GET_MODE (scratch);
10105 /* We need to make sure that the TRUE and FALSE operands are out of the
10106 way of the destination. Marking the destination earlyclobber doesn't
10107 work, since we want matching constraints for the actual comparison, so
10108 at some point we always wind up having to do a copy ourselves here.
10109 We very much prefer the TRUE value to be in SCRATCH. If it turns out
10110 that FALSE overlaps DEST, then we invert the comparison so that we
10111 still only have to do one move. */
10112 if (rtx_equal_p (op_false, dest))
10114 enum rtx_code code;
10116 if (rtx_equal_p (op_true, dest))
10118 /* ??? Really ought not happen. It means some optimizer managed
10119 to prove the operands were identical, but failed to fold the
10120 conditional move to a straight move. Do so here, because
10121 otherwise we'll generate incorrect code. And since they're
10122 both already in the destination register, nothing to do. */
10126 x = gen_rtx_REG (mode, REGNO (scratch));
10127 emit_move_insn (x, op_false);
10128 op_false = op_true;
10131 code = GET_CODE (cmp);
10132 code = reverse_condition_maybe_unordered (code);
10133 cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
10135 else if (op_true == CONST0_RTX (mode))
10137 else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
10141 x = gen_rtx_REG (mode, REGNO (scratch));
10142 emit_move_insn (x, op_true);
10146 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
10147 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10149 if (op_false == CONST0_RTX (mode))
10151 op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
10152 x = gen_rtx_AND (vmode, dest, op_true);
10153 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10157 op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
10159 if (op_true == CONST0_RTX (mode))
10161 x = gen_rtx_NOT (vmode, dest);
10162 x = gen_rtx_AND (vmode, x, op_false);
10163 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10167 x = gen_rtx_AND (vmode, scratch, dest);
10168 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10170 x = gen_rtx_NOT (vmode, dest);
10171 x = gen_rtx_AND (vmode, x, op_false);
10172 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10174 x = gen_rtx_IOR (vmode, dest, scratch);
10175 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10180 /* Expand conditional increment or decrement using adb/sbb instructions.
10181 The default case using setcc followed by the conditional move can be
10182 done by generic code. */
10184 ix86_expand_int_addcc (rtx operands[])
10186 enum rtx_code code = GET_CODE (operands[1]);
10188 rtx val = const0_rtx;
10189 bool fpcmp = false;
10190 enum machine_mode mode = GET_MODE (operands[0]);
10192 if (operands[3] != const1_rtx
10193 && operands[3] != constm1_rtx)
10195 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10196 ix86_compare_op1, &compare_op))
10198 code = GET_CODE (compare_op);
10200 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10201 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10204 code = ix86_fp_compare_code_to_integer (code);
10211 PUT_CODE (compare_op,
10212 reverse_condition_maybe_unordered
10213 (GET_CODE (compare_op)));
10215 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10217 PUT_MODE (compare_op, mode);
10219 /* Construct either adc or sbb insn. */
10220 if ((code == LTU) == (operands[3] == constm1_rtx))
10222 switch (GET_MODE (operands[0]))
10225 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10228 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10231 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10234 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10242 switch (GET_MODE (operands[0]))
10245 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10248 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10251 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10254 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10260 return 1; /* DONE */
10264 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10265 works for floating pointer parameters and nonoffsetable memories.
10266 For pushes, it returns just stack offsets; the values will be saved
10267 in the right order. Maximally three parts are generated. */
10270 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10275 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10277 size = (GET_MODE_SIZE (mode) + 4) / 8;
10279 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10281 if (size < 2 || size > 3)
10284 /* Optimize constant pool reference to immediates. This is used by fp
10285 moves, that force all constants to memory to allow combining. */
10286 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10288 rtx tmp = maybe_get_pool_constant (operand);
10293 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10295 /* The only non-offsetable memories we handle are pushes. */
10296 if (! push_operand (operand, VOIDmode))
10299 operand = copy_rtx (operand);
10300 PUT_MODE (operand, Pmode);
10301 parts[0] = parts[1] = parts[2] = operand;
10303 else if (!TARGET_64BIT)
10305 if (mode == DImode)
10306 split_di (&operand, 1, &parts[0], &parts[1]);
10309 if (REG_P (operand))
10311 if (!reload_completed)
10313 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10314 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10316 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10318 else if (offsettable_memref_p (operand))
10320 operand = adjust_address (operand, SImode, 0);
10321 parts[0] = operand;
10322 parts[1] = adjust_address (operand, SImode, 4);
10324 parts[2] = adjust_address (operand, SImode, 8);
10326 else if (GET_CODE (operand) == CONST_DOUBLE)
10331 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10335 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10336 parts[2] = gen_int_mode (l[2], SImode);
10339 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10344 parts[1] = gen_int_mode (l[1], SImode);
10345 parts[0] = gen_int_mode (l[0], SImode);
10353 if (mode == TImode)
10354 split_ti (&operand, 1, &parts[0], &parts[1]);
10355 if (mode == XFmode || mode == TFmode)
10357 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10358 if (REG_P (operand))
10360 if (!reload_completed)
10362 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10363 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10365 else if (offsettable_memref_p (operand))
10367 operand = adjust_address (operand, DImode, 0);
10368 parts[0] = operand;
10369 parts[1] = adjust_address (operand, upper_mode, 8);
10371 else if (GET_CODE (operand) == CONST_DOUBLE)
10376 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10377 real_to_target (l, &r, mode);
10379 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10380 if (HOST_BITS_PER_WIDE_INT >= 64)
10383 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10384 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10387 parts[0] = immed_double_const (l[0], l[1], DImode);
10389 if (upper_mode == SImode)
10390 parts[1] = gen_int_mode (l[2], SImode);
10391 else if (HOST_BITS_PER_WIDE_INT >= 64)
10394 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10395 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10398 parts[1] = immed_double_const (l[2], l[3], DImode);
10408 /* Emit insns to perform a move or push of DI, DF, and XF values.
10409 Return false when normal moves are needed; true when all required
10410 insns have been emitted. Operands 2-4 contain the input values
10411 int the correct order; operands 5-7 contain the output values. */
10414 ix86_split_long_move (rtx operands[])
10419 int collisions = 0;
10420 enum machine_mode mode = GET_MODE (operands[0]);
10422 /* The DFmode expanders may ask us to move double.
10423 For 64bit target this is single move. By hiding the fact
10424 here we simplify i386.md splitters. */
10425 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10427 /* Optimize constant pool reference to immediates. This is used by
10428 fp moves, that force all constants to memory to allow combining. */
10430 if (GET_CODE (operands[1]) == MEM
10431 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10432 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10433 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10434 if (push_operand (operands[0], VOIDmode))
10436 operands[0] = copy_rtx (operands[0]);
10437 PUT_MODE (operands[0], Pmode);
10440 operands[0] = gen_lowpart (DImode, operands[0]);
10441 operands[1] = gen_lowpart (DImode, operands[1]);
10442 emit_move_insn (operands[0], operands[1]);
10446 /* The only non-offsettable memory we handle is push. */
10447 if (push_operand (operands[0], VOIDmode))
10449 else if (GET_CODE (operands[0]) == MEM
10450 && ! offsettable_memref_p (operands[0]))
10453 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10454 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10456 /* When emitting push, take care for source operands on the stack. */
10457 if (push && GET_CODE (operands[1]) == MEM
10458 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10461 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10462 XEXP (part[1][2], 0));
10463 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10464 XEXP (part[1][1], 0));
10467 /* We need to do copy in the right order in case an address register
10468 of the source overlaps the destination. */
10469 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10471 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10473 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10476 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10479 /* Collision in the middle part can be handled by reordering. */
10480 if (collisions == 1 && nparts == 3
10481 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10484 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10485 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10488 /* If there are more collisions, we can't handle it by reordering.
10489 Do an lea to the last part and use only one colliding move. */
10490 else if (collisions > 1)
10496 base = part[0][nparts - 1];
10498 /* Handle the case when the last part isn't valid for lea.
10499 Happens in 64-bit mode storing the 12-byte XFmode. */
10500 if (GET_MODE (base) != Pmode)
10501 base = gen_rtx_REG (Pmode, REGNO (base));
10503 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10504 part[1][0] = replace_equiv_address (part[1][0], base);
10505 part[1][1] = replace_equiv_address (part[1][1],
10506 plus_constant (base, UNITS_PER_WORD));
10508 part[1][2] = replace_equiv_address (part[1][2],
10509 plus_constant (base, 8));
10519 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10520 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10521 emit_move_insn (part[0][2], part[1][2]);
10526 /* In 64bit mode we don't have 32bit push available. In case this is
10527 register, it is OK - we will just use larger counterpart. We also
10528 retype memory - these comes from attempt to avoid REX prefix on
10529 moving of second half of TFmode value. */
10530 if (GET_MODE (part[1][1]) == SImode)
10532 if (GET_CODE (part[1][1]) == MEM)
10533 part[1][1] = adjust_address (part[1][1], DImode, 0);
10534 else if (REG_P (part[1][1]))
10535 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10538 if (GET_MODE (part[1][0]) == SImode)
10539 part[1][0] = part[1][1];
10542 emit_move_insn (part[0][1], part[1][1]);
10543 emit_move_insn (part[0][0], part[1][0]);
10547 /* Choose correct order to not overwrite the source before it is copied. */
10548 if ((REG_P (part[0][0])
10549 && REG_P (part[1][1])
10550 && (REGNO (part[0][0]) == REGNO (part[1][1])
10552 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10554 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10558 operands[2] = part[0][2];
10559 operands[3] = part[0][1];
10560 operands[4] = part[0][0];
10561 operands[5] = part[1][2];
10562 operands[6] = part[1][1];
10563 operands[7] = part[1][0];
10567 operands[2] = part[0][1];
10568 operands[3] = part[0][0];
10569 operands[5] = part[1][1];
10570 operands[6] = part[1][0];
10577 operands[2] = part[0][0];
10578 operands[3] = part[0][1];
10579 operands[4] = part[0][2];
10580 operands[5] = part[1][0];
10581 operands[6] = part[1][1];
10582 operands[7] = part[1][2];
10586 operands[2] = part[0][0];
10587 operands[3] = part[0][1];
10588 operands[5] = part[1][0];
10589 operands[6] = part[1][1];
10593 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10596 if (GET_CODE (operands[5]) == CONST_INT
10597 && operands[5] != const0_rtx
10598 && REG_P (operands[2]))
10600 if (GET_CODE (operands[6]) == CONST_INT
10601 && INTVAL (operands[6]) == INTVAL (operands[5]))
10602 operands[6] = operands[2];
10605 && GET_CODE (operands[7]) == CONST_INT
10606 && INTVAL (operands[7]) == INTVAL (operands[5]))
10607 operands[7] = operands[2];
10611 && GET_CODE (operands[6]) == CONST_INT
10612 && operands[6] != const0_rtx
10613 && REG_P (operands[3])
10614 && GET_CODE (operands[7]) == CONST_INT
10615 && INTVAL (operands[7]) == INTVAL (operands[6]))
10616 operands[7] = operands[3];
10619 emit_move_insn (operands[2], operands[5]);
10620 emit_move_insn (operands[3], operands[6]);
10622 emit_move_insn (operands[4], operands[7]);
10627 /* Helper function of ix86_split_ashldi used to generate an SImode
10628 left shift by a constant, either using a single shift or
10629 a sequence of add instructions. */
10632 ix86_expand_ashlsi3_const (rtx operand, int count)
10635 emit_insn (gen_addsi3 (operand, operand, operand));
10636 else if (!optimize_size
10637 && count * ix86_cost->add <= ix86_cost->shift_const)
10640 for (i=0; i<count; i++)
10641 emit_insn (gen_addsi3 (operand, operand, operand));
10644 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10648 ix86_split_ashldi (rtx *operands, rtx scratch)
10650 rtx low[2], high[2];
10653 if (GET_CODE (operands[2]) == CONST_INT)
10655 split_di (operands, 2, low, high);
10656 count = INTVAL (operands[2]) & 63;
10660 emit_move_insn (high[0], low[1]);
10661 emit_move_insn (low[0], const0_rtx);
10664 ix86_expand_ashlsi3_const (high[0], count - 32);
10668 if (!rtx_equal_p (operands[0], operands[1]))
10669 emit_move_insn (operands[0], operands[1]);
10670 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10671 ix86_expand_ashlsi3_const (low[0], count);
10676 split_di (operands, 1, low, high);
10678 if (operands[1] == const1_rtx)
10680 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10681 can be done with two 32-bit shifts, no branches, no cmoves. */
10682 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10684 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10686 ix86_expand_clear (low[0]);
10687 ix86_expand_clear (high[0]);
10688 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10690 d = gen_lowpart (QImode, low[0]);
10691 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10692 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10693 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10695 d = gen_lowpart (QImode, high[0]);
10696 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10697 s = gen_rtx_NE (QImode, flags, const0_rtx);
10698 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10701 /* Otherwise, we can get the same results by manually performing
10702 a bit extract operation on bit 5, and then performing the two
10703 shifts. The two methods of getting 0/1 into low/high are exactly
10704 the same size. Avoiding the shift in the bit extract case helps
10705 pentium4 a bit; no one else seems to care much either way. */
10710 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10711 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10713 x = gen_lowpart (SImode, operands[2]);
10714 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10716 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10717 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10718 emit_move_insn (low[0], high[0]);
10719 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10722 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10723 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10727 if (operands[1] == constm1_rtx)
10729 /* For -1LL << N, we can avoid the shld instruction, because we
10730 know that we're shifting 0...31 ones into a -1. */
10731 emit_move_insn (low[0], constm1_rtx);
10733 emit_move_insn (high[0], low[0]);
10735 emit_move_insn (high[0], constm1_rtx);
10739 if (!rtx_equal_p (operands[0], operands[1]))
10740 emit_move_insn (operands[0], operands[1]);
10742 split_di (operands, 1, low, high);
10743 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10746 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10748 if (TARGET_CMOVE && scratch)
10750 ix86_expand_clear (scratch);
10751 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10754 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10758 ix86_split_ashrdi (rtx *operands, rtx scratch)
10760 rtx low[2], high[2];
10763 if (GET_CODE (operands[2]) == CONST_INT)
10765 split_di (operands, 2, low, high);
10766 count = INTVAL (operands[2]) & 63;
10770 emit_move_insn (high[0], high[1]);
10771 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10772 emit_move_insn (low[0], high[0]);
10775 else if (count >= 32)
10777 emit_move_insn (low[0], high[1]);
10778 emit_move_insn (high[0], low[0]);
10779 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10781 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10785 if (!rtx_equal_p (operands[0], operands[1]))
10786 emit_move_insn (operands[0], operands[1]);
10787 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10788 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10793 if (!rtx_equal_p (operands[0], operands[1]))
10794 emit_move_insn (operands[0], operands[1]);
10796 split_di (operands, 1, low, high);
10798 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10799 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10801 if (TARGET_CMOVE && scratch)
10803 emit_move_insn (scratch, high[0]);
10804 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10805 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10809 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10814 ix86_split_lshrdi (rtx *operands, rtx scratch)
10816 rtx low[2], high[2];
10819 if (GET_CODE (operands[2]) == CONST_INT)
10821 split_di (operands, 2, low, high);
10822 count = INTVAL (operands[2]) & 63;
10826 emit_move_insn (low[0], high[1]);
10827 ix86_expand_clear (high[0]);
10830 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10834 if (!rtx_equal_p (operands[0], operands[1]))
10835 emit_move_insn (operands[0], operands[1]);
10836 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10837 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10842 if (!rtx_equal_p (operands[0], operands[1]))
10843 emit_move_insn (operands[0], operands[1]);
10845 split_di (operands, 1, low, high);
10847 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10848 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10850 /* Heh. By reversing the arguments, we can reuse this pattern. */
10851 if (TARGET_CMOVE && scratch)
10853 ix86_expand_clear (scratch);
10854 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10858 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10862 /* Helper function for the string operations below. Dest VARIABLE whether
10863 it is aligned to VALUE bytes. If true, jump to the label. */
10865 ix86_expand_aligntest (rtx variable, int value)
10867 rtx label = gen_label_rtx ();
10868 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10869 if (GET_MODE (variable) == DImode)
10870 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10872 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10873 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10878 /* Adjust COUNTER by the VALUE. */
10880 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10882 if (GET_MODE (countreg) == DImode)
10883 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10885 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10888 /* Zero extend possibly SImode EXP to Pmode register. */
10890 ix86_zero_extend_to_Pmode (rtx exp)
10893 if (GET_MODE (exp) == VOIDmode)
10894 return force_reg (Pmode, exp);
10895 if (GET_MODE (exp) == Pmode)
10896 return copy_to_mode_reg (Pmode, exp);
10897 r = gen_reg_rtx (Pmode);
10898 emit_insn (gen_zero_extendsidi2 (r, exp));
10902 /* Expand string move (memcpy) operation. Use i386 string operations when
10903 profitable. expand_clrmem contains similar code. */
10905 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10907 rtx srcreg, destreg, countreg, srcexp, destexp;
10908 enum machine_mode counter_mode;
10909 HOST_WIDE_INT align = 0;
10910 unsigned HOST_WIDE_INT count = 0;
10912 if (GET_CODE (align_exp) == CONST_INT)
10913 align = INTVAL (align_exp);
10915 /* Can't use any of this if the user has appropriated esi or edi. */
10916 if (global_regs[4] || global_regs[5])
10919 /* This simple hack avoids all inlining code and simplifies code below. */
10920 if (!TARGET_ALIGN_STRINGOPS)
10923 if (GET_CODE (count_exp) == CONST_INT)
10925 count = INTVAL (count_exp);
10926 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10930 /* Figure out proper mode for counter. For 32bits it is always SImode,
10931 for 64bits use SImode when possible, otherwise DImode.
10932 Set count to number of bytes copied when known at compile time. */
10934 || GET_MODE (count_exp) == SImode
10935 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10936 counter_mode = SImode;
10938 counter_mode = DImode;
10940 if (counter_mode != SImode && counter_mode != DImode)
10943 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10944 if (destreg != XEXP (dst, 0))
10945 dst = replace_equiv_address_nv (dst, destreg);
10946 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10947 if (srcreg != XEXP (src, 0))
10948 src = replace_equiv_address_nv (src, srcreg);
10950 /* When optimizing for size emit simple rep ; movsb instruction for
10951 counts not divisible by 4. */
10953 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10955 emit_insn (gen_cld ());
10956 countreg = ix86_zero_extend_to_Pmode (count_exp);
10957 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10958 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10959 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10963 /* For constant aligned (or small unaligned) copies use rep movsl
10964 followed by code copying the rest. For PentiumPro ensure 8 byte
10965 alignment to allow rep movsl acceleration. */
10967 else if (count != 0
10969 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10970 || optimize_size || count < (unsigned int) 64))
10972 unsigned HOST_WIDE_INT offset = 0;
10973 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10974 rtx srcmem, dstmem;
10976 emit_insn (gen_cld ());
10977 if (count & ~(size - 1))
10979 countreg = copy_to_mode_reg (counter_mode,
10980 GEN_INT ((count >> (size == 4 ? 2 : 3))
10981 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10982 countreg = ix86_zero_extend_to_Pmode (countreg);
10984 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10985 GEN_INT (size == 4 ? 2 : 3));
10986 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10987 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10989 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10990 countreg, destexp, srcexp));
10991 offset = count & ~(size - 1);
10993 if (size == 8 && (count & 0x04))
10995 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10997 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10999 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11004 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11006 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11008 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11013 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11015 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11017 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11020 /* The generic code based on the glibc implementation:
11021 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11022 allowing accelerated copying there)
11023 - copy the data using rep movsl
11024 - copy the rest. */
11029 rtx srcmem, dstmem;
11030 int desired_alignment = (TARGET_PENTIUMPRO
11031 && (count == 0 || count >= (unsigned int) 260)
11032 ? 8 : UNITS_PER_WORD);
11033 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11034 dst = change_address (dst, BLKmode, destreg);
11035 src = change_address (src, BLKmode, srcreg);
11037 /* In case we don't know anything about the alignment, default to
11038 library version, since it is usually equally fast and result in
11041 Also emit call when we know that the count is large and call overhead
11042 will not be important. */
11043 if (!TARGET_INLINE_ALL_STRINGOPS
11044 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11047 if (TARGET_SINGLE_STRINGOP)
11048 emit_insn (gen_cld ());
11050 countreg2 = gen_reg_rtx (Pmode);
11051 countreg = copy_to_mode_reg (counter_mode, count_exp);
11053 /* We don't use loops to align destination and to copy parts smaller
11054 than 4 bytes, because gcc is able to optimize such code better (in
11055 the case the destination or the count really is aligned, gcc is often
11056 able to predict the branches) and also it is friendlier to the
11057 hardware branch prediction.
11059 Using loops is beneficial for generic case, because we can
11060 handle small counts using the loops. Many CPUs (such as Athlon)
11061 have large REP prefix setup costs.
11063 This is quite costly. Maybe we can revisit this decision later or
11064 add some customizability to this code. */
11066 if (count == 0 && align < desired_alignment)
11068 label = gen_label_rtx ();
11069 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11070 LEU, 0, counter_mode, 1, label);
11074 rtx label = ix86_expand_aligntest (destreg, 1);
11075 srcmem = change_address (src, QImode, srcreg);
11076 dstmem = change_address (dst, QImode, destreg);
11077 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11078 ix86_adjust_counter (countreg, 1);
11079 emit_label (label);
11080 LABEL_NUSES (label) = 1;
11084 rtx label = ix86_expand_aligntest (destreg, 2);
11085 srcmem = change_address (src, HImode, srcreg);
11086 dstmem = change_address (dst, HImode, destreg);
11087 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11088 ix86_adjust_counter (countreg, 2);
11089 emit_label (label);
11090 LABEL_NUSES (label) = 1;
11092 if (align <= 4 && desired_alignment > 4)
11094 rtx label = ix86_expand_aligntest (destreg, 4);
11095 srcmem = change_address (src, SImode, srcreg);
11096 dstmem = change_address (dst, SImode, destreg);
11097 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11098 ix86_adjust_counter (countreg, 4);
11099 emit_label (label);
11100 LABEL_NUSES (label) = 1;
11103 if (label && desired_alignment > 4 && !TARGET_64BIT)
11105 emit_label (label);
11106 LABEL_NUSES (label) = 1;
11109 if (!TARGET_SINGLE_STRINGOP)
11110 emit_insn (gen_cld ());
11113 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11115 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11119 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11120 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11122 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11123 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11124 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11125 countreg2, destexp, srcexp));
11129 emit_label (label);
11130 LABEL_NUSES (label) = 1;
11132 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11134 srcmem = change_address (src, SImode, srcreg);
11135 dstmem = change_address (dst, SImode, destreg);
11136 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11138 if ((align <= 4 || count == 0) && TARGET_64BIT)
11140 rtx label = ix86_expand_aligntest (countreg, 4);
11141 srcmem = change_address (src, SImode, srcreg);
11142 dstmem = change_address (dst, SImode, destreg);
11143 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11144 emit_label (label);
11145 LABEL_NUSES (label) = 1;
11147 if (align > 2 && count != 0 && (count & 2))
11149 srcmem = change_address (src, HImode, srcreg);
11150 dstmem = change_address (dst, HImode, destreg);
11151 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11153 if (align <= 2 || count == 0)
11155 rtx label = ix86_expand_aligntest (countreg, 2);
11156 srcmem = change_address (src, HImode, srcreg);
11157 dstmem = change_address (dst, HImode, destreg);
11158 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11159 emit_label (label);
11160 LABEL_NUSES (label) = 1;
11162 if (align > 1 && count != 0 && (count & 1))
11164 srcmem = change_address (src, QImode, srcreg);
11165 dstmem = change_address (dst, QImode, destreg);
11166 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11168 if (align <= 1 || count == 0)
11170 rtx label = ix86_expand_aligntest (countreg, 1);
11171 srcmem = change_address (src, QImode, srcreg);
11172 dstmem = change_address (dst, QImode, destreg);
11173 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11174 emit_label (label);
11175 LABEL_NUSES (label) = 1;
11182 /* Expand string clear operation (bzero). Use i386 string operations when
11183 profitable. expand_movmem contains similar code. */
11185 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11187 rtx destreg, zeroreg, countreg, destexp;
11188 enum machine_mode counter_mode;
11189 HOST_WIDE_INT align = 0;
11190 unsigned HOST_WIDE_INT count = 0;
11192 if (GET_CODE (align_exp) == CONST_INT)
11193 align = INTVAL (align_exp);
11195 /* Can't use any of this if the user has appropriated esi. */
11196 if (global_regs[4])
11199 /* This simple hack avoids all inlining code and simplifies code below. */
11200 if (!TARGET_ALIGN_STRINGOPS)
11203 if (GET_CODE (count_exp) == CONST_INT)
11205 count = INTVAL (count_exp);
11206 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11209 /* Figure out proper mode for counter. For 32bits it is always SImode,
11210 for 64bits use SImode when possible, otherwise DImode.
11211 Set count to number of bytes copied when known at compile time. */
11213 || GET_MODE (count_exp) == SImode
11214 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11215 counter_mode = SImode;
11217 counter_mode = DImode;
11219 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11220 if (destreg != XEXP (dst, 0))
11221 dst = replace_equiv_address_nv (dst, destreg);
11224 /* When optimizing for size emit simple rep ; movsb instruction for
11225 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11226 sequence is 7 bytes long, so if optimizing for size and count is
11227 small enough that some stosl, stosw and stosb instructions without
11228 rep are shorter, fall back into the next if. */
11230 if ((!optimize || optimize_size)
11233 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11235 emit_insn (gen_cld ());
11237 countreg = ix86_zero_extend_to_Pmode (count_exp);
11238 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11239 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11240 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11242 else if (count != 0
11244 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11245 || optimize_size || count < (unsigned int) 64))
11247 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11248 unsigned HOST_WIDE_INT offset = 0;
11250 emit_insn (gen_cld ());
11252 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11253 if (count & ~(size - 1))
11255 unsigned HOST_WIDE_INT repcount;
11256 unsigned int max_nonrep;
11258 repcount = count >> (size == 4 ? 2 : 3);
11260 repcount &= 0x3fffffff;
11262 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11263 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11264 bytes. In both cases the latter seems to be faster for small
11266 max_nonrep = size == 4 ? 7 : 4;
11267 if (!optimize_size)
11270 case PROCESSOR_PENTIUM4:
11271 case PROCESSOR_NOCONA:
11278 if (repcount <= max_nonrep)
11279 while (repcount-- > 0)
11281 rtx mem = adjust_automodify_address_nv (dst,
11282 GET_MODE (zeroreg),
11284 emit_insn (gen_strset (destreg, mem, zeroreg));
11289 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11290 countreg = ix86_zero_extend_to_Pmode (countreg);
11291 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11292 GEN_INT (size == 4 ? 2 : 3));
11293 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11294 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11296 offset = count & ~(size - 1);
11299 if (size == 8 && (count & 0x04))
11301 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11303 emit_insn (gen_strset (destreg, mem,
11304 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11309 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11311 emit_insn (gen_strset (destreg, mem,
11312 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11317 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11319 emit_insn (gen_strset (destreg, mem,
11320 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11327 /* Compute desired alignment of the string operation. */
11328 int desired_alignment = (TARGET_PENTIUMPRO
11329 && (count == 0 || count >= (unsigned int) 260)
11330 ? 8 : UNITS_PER_WORD);
11332 /* In case we don't know anything about the alignment, default to
11333 library version, since it is usually equally fast and result in
11336 Also emit call when we know that the count is large and call overhead
11337 will not be important. */
11338 if (!TARGET_INLINE_ALL_STRINGOPS
11339 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11342 if (TARGET_SINGLE_STRINGOP)
11343 emit_insn (gen_cld ());
11345 countreg2 = gen_reg_rtx (Pmode);
11346 countreg = copy_to_mode_reg (counter_mode, count_exp);
11347 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11348 /* Get rid of MEM_OFFSET, it won't be accurate. */
11349 dst = change_address (dst, BLKmode, destreg);
11351 if (count == 0 && align < desired_alignment)
11353 label = gen_label_rtx ();
11354 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11355 LEU, 0, counter_mode, 1, label);
11359 rtx label = ix86_expand_aligntest (destreg, 1);
11360 emit_insn (gen_strset (destreg, dst,
11361 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11362 ix86_adjust_counter (countreg, 1);
11363 emit_label (label);
11364 LABEL_NUSES (label) = 1;
11368 rtx label = ix86_expand_aligntest (destreg, 2);
11369 emit_insn (gen_strset (destreg, dst,
11370 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11371 ix86_adjust_counter (countreg, 2);
11372 emit_label (label);
11373 LABEL_NUSES (label) = 1;
11375 if (align <= 4 && desired_alignment > 4)
11377 rtx label = ix86_expand_aligntest (destreg, 4);
11378 emit_insn (gen_strset (destreg, dst,
11380 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11382 ix86_adjust_counter (countreg, 4);
11383 emit_label (label);
11384 LABEL_NUSES (label) = 1;
11387 if (label && desired_alignment > 4 && !TARGET_64BIT)
11389 emit_label (label);
11390 LABEL_NUSES (label) = 1;
11394 if (!TARGET_SINGLE_STRINGOP)
11395 emit_insn (gen_cld ());
11398 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11400 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11404 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11405 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11407 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11408 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11412 emit_label (label);
11413 LABEL_NUSES (label) = 1;
11416 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11417 emit_insn (gen_strset (destreg, dst,
11418 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11419 if (TARGET_64BIT && (align <= 4 || count == 0))
11421 rtx label = ix86_expand_aligntest (countreg, 4);
11422 emit_insn (gen_strset (destreg, dst,
11423 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11424 emit_label (label);
11425 LABEL_NUSES (label) = 1;
11427 if (align > 2 && count != 0 && (count & 2))
11428 emit_insn (gen_strset (destreg, dst,
11429 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11430 if (align <= 2 || count == 0)
11432 rtx label = ix86_expand_aligntest (countreg, 2);
11433 emit_insn (gen_strset (destreg, dst,
11434 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11435 emit_label (label);
11436 LABEL_NUSES (label) = 1;
11438 if (align > 1 && count != 0 && (count & 1))
11439 emit_insn (gen_strset (destreg, dst,
11440 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11441 if (align <= 1 || count == 0)
11443 rtx label = ix86_expand_aligntest (countreg, 1);
11444 emit_insn (gen_strset (destreg, dst,
11445 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11446 emit_label (label);
11447 LABEL_NUSES (label) = 1;
11453 /* Expand strlen. */
11455 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11457 rtx addr, scratch1, scratch2, scratch3, scratch4;
11459 /* The generic case of strlen expander is long. Avoid it's
11460 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11462 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11463 && !TARGET_INLINE_ALL_STRINGOPS
11465 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11468 addr = force_reg (Pmode, XEXP (src, 0));
11469 scratch1 = gen_reg_rtx (Pmode);
11471 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11474 /* Well it seems that some optimizer does not combine a call like
11475 foo(strlen(bar), strlen(bar));
11476 when the move and the subtraction is done here. It does calculate
11477 the length just once when these instructions are done inside of
11478 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11479 often used and I use one fewer register for the lifetime of
11480 output_strlen_unroll() this is better. */
11482 emit_move_insn (out, addr);
11484 ix86_expand_strlensi_unroll_1 (out, src, align);
11486 /* strlensi_unroll_1 returns the address of the zero at the end of
11487 the string, like memchr(), so compute the length by subtracting
11488 the start address. */
11490 emit_insn (gen_subdi3 (out, out, addr));
11492 emit_insn (gen_subsi3 (out, out, addr));
11497 scratch2 = gen_reg_rtx (Pmode);
11498 scratch3 = gen_reg_rtx (Pmode);
11499 scratch4 = force_reg (Pmode, constm1_rtx);
11501 emit_move_insn (scratch3, addr);
11502 eoschar = force_reg (QImode, eoschar);
11504 emit_insn (gen_cld ());
11505 src = replace_equiv_address_nv (src, scratch3);
11507 /* If .md starts supporting :P, this can be done in .md. */
11508 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11509 scratch4), UNSPEC_SCAS);
11510 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11513 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11514 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11518 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11519 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11525 /* Expand the appropriate insns for doing strlen if not just doing
11528 out = result, initialized with the start address
11529 align_rtx = alignment of the address.
11530 scratch = scratch register, initialized with the startaddress when
11531 not aligned, otherwise undefined
11533 This is just the body. It needs the initializations mentioned above and
11534 some address computing at the end. These things are done in i386.md. */
11537 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11541 rtx align_2_label = NULL_RTX;
11542 rtx align_3_label = NULL_RTX;
11543 rtx align_4_label = gen_label_rtx ();
11544 rtx end_0_label = gen_label_rtx ();
11546 rtx tmpreg = gen_reg_rtx (SImode);
11547 rtx scratch = gen_reg_rtx (SImode);
11551 if (GET_CODE (align_rtx) == CONST_INT)
11552 align = INTVAL (align_rtx);
11554 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11556 /* Is there a known alignment and is it less than 4? */
11559 rtx scratch1 = gen_reg_rtx (Pmode);
11560 emit_move_insn (scratch1, out);
11561 /* Is there a known alignment and is it not 2? */
11564 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11565 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11567 /* Leave just the 3 lower bits. */
11568 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11569 NULL_RTX, 0, OPTAB_WIDEN);
11571 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11572 Pmode, 1, align_4_label);
11573 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11574 Pmode, 1, align_2_label);
11575 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11576 Pmode, 1, align_3_label);
11580 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11581 check if is aligned to 4 - byte. */
11583 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11584 NULL_RTX, 0, OPTAB_WIDEN);
11586 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11587 Pmode, 1, align_4_label);
11590 mem = change_address (src, QImode, out);
11592 /* Now compare the bytes. */
11594 /* Compare the first n unaligned byte on a byte per byte basis. */
11595 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11596 QImode, 1, end_0_label);
11598 /* Increment the address. */
11600 emit_insn (gen_adddi3 (out, out, const1_rtx));
11602 emit_insn (gen_addsi3 (out, out, const1_rtx));
11604 /* Not needed with an alignment of 2 */
11607 emit_label (align_2_label);
11609 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11613 emit_insn (gen_adddi3 (out, out, const1_rtx));
11615 emit_insn (gen_addsi3 (out, out, const1_rtx));
11617 emit_label (align_3_label);
11620 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11624 emit_insn (gen_adddi3 (out, out, const1_rtx));
11626 emit_insn (gen_addsi3 (out, out, const1_rtx));
11629 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11630 align this loop. It gives only huge programs, but does not help to
11632 emit_label (align_4_label);
11634 mem = change_address (src, SImode, out);
11635 emit_move_insn (scratch, mem);
11637 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11639 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11641 /* This formula yields a nonzero result iff one of the bytes is zero.
11642 This saves three branches inside loop and many cycles. */
11644 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11645 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11646 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11647 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11648 gen_int_mode (0x80808080, SImode)));
11649 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11654 rtx reg = gen_reg_rtx (SImode);
11655 rtx reg2 = gen_reg_rtx (Pmode);
11656 emit_move_insn (reg, tmpreg);
11657 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11659 /* If zero is not in the first two bytes, move two bytes forward. */
11660 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11661 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11662 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11663 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11664 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11667 /* Emit lea manually to avoid clobbering of flags. */
11668 emit_insn (gen_rtx_SET (SImode, reg2,
11669 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11671 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11672 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11673 emit_insn (gen_rtx_SET (VOIDmode, out,
11674 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11681 rtx end_2_label = gen_label_rtx ();
11682 /* Is zero in the first two bytes? */
11684 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11685 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11686 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11687 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11688 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11690 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11691 JUMP_LABEL (tmp) = end_2_label;
11693 /* Not in the first two. Move two bytes forward. */
11694 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11696 emit_insn (gen_adddi3 (out, out, const2_rtx));
11698 emit_insn (gen_addsi3 (out, out, const2_rtx));
11700 emit_label (end_2_label);
11704 /* Avoid branch in fixing the byte. */
11705 tmpreg = gen_lowpart (QImode, tmpreg);
11706 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11707 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11709 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11711 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11713 emit_label (end_0_label);
11717 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11718 rtx callarg2 ATTRIBUTE_UNUSED,
11719 rtx pop, int sibcall)
11721 rtx use = NULL, call;
11723 if (pop == const0_rtx)
11725 if (TARGET_64BIT && pop)
11729 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11730 fnaddr = machopic_indirect_call_target (fnaddr);
11732 /* Static functions and indirect calls don't need the pic register. */
11733 if (! TARGET_64BIT && flag_pic
11734 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11735 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11736 use_reg (&use, pic_offset_table_rtx);
11738 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11740 rtx al = gen_rtx_REG (QImode, 0);
11741 emit_move_insn (al, callarg2);
11742 use_reg (&use, al);
11744 #endif /* TARGET_MACHO */
11746 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11748 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11749 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11751 if (sibcall && TARGET_64BIT
11752 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11755 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11756 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11757 emit_move_insn (fnaddr, addr);
11758 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11761 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11763 call = gen_rtx_SET (VOIDmode, retval, call);
11766 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11767 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11768 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11771 call = emit_call_insn (call);
11773 CALL_INSN_FUNCTION_USAGE (call) = use;
11777 /* Clear stack slot assignments remembered from previous functions.
11778 This is called from INIT_EXPANDERS once before RTL is emitted for each
11781 static struct machine_function *
11782 ix86_init_machine_status (void)
11784 struct machine_function *f;
11786 f = ggc_alloc_cleared (sizeof (struct machine_function));
11787 f->use_fast_prologue_epilogue_nregs = -1;
11792 /* Return a MEM corresponding to a stack slot with mode MODE.
11793 Allocate a new slot if necessary.
11795 The RTL for a function can have several slots available: N is
11796 which slot to use. */
11799 assign_386_stack_local (enum machine_mode mode, int n)
11801 struct stack_local_entry *s;
11803 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11806 for (s = ix86_stack_locals; s; s = s->next)
11807 if (s->mode == mode && s->n == n)
11810 s = (struct stack_local_entry *)
11811 ggc_alloc (sizeof (struct stack_local_entry));
11814 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11816 s->next = ix86_stack_locals;
11817 ix86_stack_locals = s;
11821 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11823 static GTY(()) rtx ix86_tls_symbol;
11825 ix86_tls_get_addr (void)
11828 if (!ix86_tls_symbol)
11830 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11831 (TARGET_GNU_TLS && !TARGET_64BIT)
11832 ? "___tls_get_addr"
11833 : "__tls_get_addr");
11836 return ix86_tls_symbol;
11839 /* Calculate the length of the memory address in the instruction
11840 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11843 memory_address_length (rtx addr)
11845 struct ix86_address parts;
11846 rtx base, index, disp;
11849 if (GET_CODE (addr) == PRE_DEC
11850 || GET_CODE (addr) == POST_INC
11851 || GET_CODE (addr) == PRE_MODIFY
11852 || GET_CODE (addr) == POST_MODIFY)
11855 if (! ix86_decompose_address (addr, &parts))
11858 if (parts.base && GET_CODE (parts.base) == SUBREG)
11859 parts.base = SUBREG_REG (parts.base);
11860 if (parts.index && GET_CODE (parts.index) == SUBREG)
11861 parts.index = SUBREG_REG (parts.index);
11864 index = parts.index;
11869 - esp as the base always wants an index,
11870 - ebp as the base always wants a displacement. */
11872 /* Register Indirect. */
11873 if (base && !index && !disp)
11875 /* esp (for its index) and ebp (for its displacement) need
11876 the two-byte modrm form. */
11877 if (addr == stack_pointer_rtx
11878 || addr == arg_pointer_rtx
11879 || addr == frame_pointer_rtx
11880 || addr == hard_frame_pointer_rtx)
11884 /* Direct Addressing. */
11885 else if (disp && !base && !index)
11890 /* Find the length of the displacement constant. */
11893 if (GET_CODE (disp) == CONST_INT
11894 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11900 /* ebp always wants a displacement. */
11901 else if (base == hard_frame_pointer_rtx)
11904 /* An index requires the two-byte modrm form.... */
11906 /* ...like esp, which always wants an index. */
11907 || base == stack_pointer_rtx
11908 || base == arg_pointer_rtx
11909 || base == frame_pointer_rtx)
11916 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11917 is set, expect that insn have 8bit immediate alternative. */
11919 ix86_attr_length_immediate_default (rtx insn, int shortform)
11923 extract_insn_cached (insn);
11924 for (i = recog_data.n_operands - 1; i >= 0; --i)
11925 if (CONSTANT_P (recog_data.operand[i]))
11930 && GET_CODE (recog_data.operand[i]) == CONST_INT
11931 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11935 switch (get_attr_mode (insn))
11946 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11951 fatal_insn ("unknown insn mode", insn);
11957 /* Compute default value for "length_address" attribute. */
11959 ix86_attr_length_address_default (rtx insn)
11963 if (get_attr_type (insn) == TYPE_LEA)
11965 rtx set = PATTERN (insn);
11966 if (GET_CODE (set) == SET)
11968 else if (GET_CODE (set) == PARALLEL
11969 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11970 set = XVECEXP (set, 0, 0);
11973 #ifdef ENABLE_CHECKING
11979 return memory_address_length (SET_SRC (set));
11982 extract_insn_cached (insn);
11983 for (i = recog_data.n_operands - 1; i >= 0; --i)
11984 if (GET_CODE (recog_data.operand[i]) == MEM)
11986 return memory_address_length (XEXP (recog_data.operand[i], 0));
11992 /* Return the maximum number of instructions a cpu can issue. */
11995 ix86_issue_rate (void)
11999 case PROCESSOR_PENTIUM:
12003 case PROCESSOR_PENTIUMPRO:
12004 case PROCESSOR_PENTIUM4:
12005 case PROCESSOR_ATHLON:
12007 case PROCESSOR_NOCONA:
12015 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12016 by DEP_INSN and nothing set by DEP_INSN. */
12019 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12023 /* Simplify the test for uninteresting insns. */
12024 if (insn_type != TYPE_SETCC
12025 && insn_type != TYPE_ICMOV
12026 && insn_type != TYPE_FCMOV
12027 && insn_type != TYPE_IBR)
12030 if ((set = single_set (dep_insn)) != 0)
12032 set = SET_DEST (set);
12035 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12036 && XVECLEN (PATTERN (dep_insn), 0) == 2
12037 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12038 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12040 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12041 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12046 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12049 /* This test is true if the dependent insn reads the flags but
12050 not any other potentially set register. */
12051 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12054 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12060 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12061 address with operands set by DEP_INSN. */
12064 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12068 if (insn_type == TYPE_LEA
12071 addr = PATTERN (insn);
12072 if (GET_CODE (addr) == SET)
12074 else if (GET_CODE (addr) == PARALLEL
12075 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12076 addr = XVECEXP (addr, 0, 0);
12079 addr = SET_SRC (addr);
12084 extract_insn_cached (insn);
12085 for (i = recog_data.n_operands - 1; i >= 0; --i)
12086 if (GET_CODE (recog_data.operand[i]) == MEM)
12088 addr = XEXP (recog_data.operand[i], 0);
12095 return modified_in_p (addr, dep_insn);
12099 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12101 enum attr_type insn_type, dep_insn_type;
12102 enum attr_memory memory;
12104 int dep_insn_code_number;
12106 /* Anti and output dependencies have zero cost on all CPUs. */
12107 if (REG_NOTE_KIND (link) != 0)
12110 dep_insn_code_number = recog_memoized (dep_insn);
12112 /* If we can't recognize the insns, we can't really do anything. */
12113 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12116 insn_type = get_attr_type (insn);
12117 dep_insn_type = get_attr_type (dep_insn);
12121 case PROCESSOR_PENTIUM:
12122 /* Address Generation Interlock adds a cycle of latency. */
12123 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12126 /* ??? Compares pair with jump/setcc. */
12127 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12130 /* Floating point stores require value to be ready one cycle earlier. */
12131 if (insn_type == TYPE_FMOV
12132 && get_attr_memory (insn) == MEMORY_STORE
12133 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12137 case PROCESSOR_PENTIUMPRO:
12138 memory = get_attr_memory (insn);
12140 /* INT->FP conversion is expensive. */
12141 if (get_attr_fp_int_src (dep_insn))
12144 /* There is one cycle extra latency between an FP op and a store. */
12145 if (insn_type == TYPE_FMOV
12146 && (set = single_set (dep_insn)) != NULL_RTX
12147 && (set2 = single_set (insn)) != NULL_RTX
12148 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12149 && GET_CODE (SET_DEST (set2)) == MEM)
12152 /* Show ability of reorder buffer to hide latency of load by executing
12153 in parallel with previous instruction in case
12154 previous instruction is not needed to compute the address. */
12155 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12156 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12158 /* Claim moves to take one cycle, as core can issue one load
12159 at time and the next load can start cycle later. */
12160 if (dep_insn_type == TYPE_IMOV
12161 || dep_insn_type == TYPE_FMOV)
12169 memory = get_attr_memory (insn);
12171 /* The esp dependency is resolved before the instruction is really
12173 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12174 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12177 /* INT->FP conversion is expensive. */
12178 if (get_attr_fp_int_src (dep_insn))
12181 /* Show ability of reorder buffer to hide latency of load by executing
12182 in parallel with previous instruction in case
12183 previous instruction is not needed to compute the address. */
12184 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12185 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12187 /* Claim moves to take one cycle, as core can issue one load
12188 at time and the next load can start cycle later. */
12189 if (dep_insn_type == TYPE_IMOV
12190 || dep_insn_type == TYPE_FMOV)
12199 case PROCESSOR_ATHLON:
12201 memory = get_attr_memory (insn);
12203 /* Show ability of reorder buffer to hide latency of load by executing
12204 in parallel with previous instruction in case
12205 previous instruction is not needed to compute the address. */
12206 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12207 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12209 enum attr_unit unit = get_attr_unit (insn);
12212 /* Because of the difference between the length of integer and
12213 floating unit pipeline preparation stages, the memory operands
12214 for floating point are cheaper.
12216 ??? For Athlon it the difference is most probably 2. */
12217 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12220 loadcost = TARGET_ATHLON ? 2 : 0;
12222 if (cost >= loadcost)
12235 /* How many alternative schedules to try. This should be as wide as the
12236 scheduling freedom in the DFA, but no wider. Making this value too
12237 large results extra work for the scheduler. */
12240 ia32_multipass_dfa_lookahead (void)
12242 if (ix86_tune == PROCESSOR_PENTIUM)
12245 if (ix86_tune == PROCESSOR_PENTIUMPRO
12246 || ix86_tune == PROCESSOR_K6)
12254 /* Compute the alignment given to a constant that is being placed in memory.
12255 EXP is the constant and ALIGN is the alignment that the object would
12257 The value of this function is used instead of that alignment to align
12261 ix86_constant_alignment (tree exp, int align)
12263 if (TREE_CODE (exp) == REAL_CST)
12265 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12267 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12270 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12271 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12272 return BITS_PER_WORD;
12277 /* Compute the alignment for a static variable.
12278 TYPE is the data type, and ALIGN is the alignment that
12279 the object would ordinarily have. The value of this function is used
12280 instead of that alignment to align the object. */
12283 ix86_data_alignment (tree type, int align)
12285 if (AGGREGATE_TYPE_P (type)
12286 && TYPE_SIZE (type)
12287 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12288 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12289 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12292 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12293 to 16byte boundary. */
12296 if (AGGREGATE_TYPE_P (type)
12297 && TYPE_SIZE (type)
12298 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12299 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12300 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12304 if (TREE_CODE (type) == ARRAY_TYPE)
12306 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12308 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12311 else if (TREE_CODE (type) == COMPLEX_TYPE)
12314 if (TYPE_MODE (type) == DCmode && align < 64)
12316 if (TYPE_MODE (type) == XCmode && align < 128)
12319 else if ((TREE_CODE (type) == RECORD_TYPE
12320 || TREE_CODE (type) == UNION_TYPE
12321 || TREE_CODE (type) == QUAL_UNION_TYPE)
12322 && TYPE_FIELDS (type))
12324 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12326 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12329 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12330 || TREE_CODE (type) == INTEGER_TYPE)
12332 if (TYPE_MODE (type) == DFmode && align < 64)
12334 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12341 /* Compute the alignment for a local variable.
12342 TYPE is the data type, and ALIGN is the alignment that
12343 the object would ordinarily have. The value of this macro is used
12344 instead of that alignment to align the object. */
12347 ix86_local_alignment (tree type, int align)
12349 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12350 to 16byte boundary. */
12353 if (AGGREGATE_TYPE_P (type)
12354 && TYPE_SIZE (type)
12355 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12356 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12357 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12360 if (TREE_CODE (type) == ARRAY_TYPE)
12362 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12364 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12367 else if (TREE_CODE (type) == COMPLEX_TYPE)
12369 if (TYPE_MODE (type) == DCmode && align < 64)
12371 if (TYPE_MODE (type) == XCmode && align < 128)
12374 else if ((TREE_CODE (type) == RECORD_TYPE
12375 || TREE_CODE (type) == UNION_TYPE
12376 || TREE_CODE (type) == QUAL_UNION_TYPE)
12377 && TYPE_FIELDS (type))
12379 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12381 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12384 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12385 || TREE_CODE (type) == INTEGER_TYPE)
12388 if (TYPE_MODE (type) == DFmode && align < 64)
12390 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12396 /* Emit RTL insns to initialize the variable parts of a trampoline.
12397 FNADDR is an RTX for the address of the function's pure code.
12398 CXT is an RTX for the static chain value for the function. */
12400 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12404 /* Compute offset from the end of the jmp to the target function. */
12405 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12406 plus_constant (tramp, 10),
12407 NULL_RTX, 1, OPTAB_DIRECT);
12408 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12409 gen_int_mode (0xb9, QImode));
12410 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12411 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12412 gen_int_mode (0xe9, QImode));
12413 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12418 /* Try to load address using shorter movl instead of movabs.
12419 We may want to support movq for kernel mode, but kernel does not use
12420 trampolines at the moment. */
12421 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12423 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12424 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12425 gen_int_mode (0xbb41, HImode));
12426 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12427 gen_lowpart (SImode, fnaddr));
12432 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12433 gen_int_mode (0xbb49, HImode));
12434 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12438 /* Load static chain using movabs to r10. */
12439 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12440 gen_int_mode (0xba49, HImode));
12441 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12444 /* Jump to the r11 */
12445 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12446 gen_int_mode (0xff49, HImode));
12447 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12448 gen_int_mode (0xe3, QImode));
12450 if (offset > TRAMPOLINE_SIZE)
12454 #ifdef ENABLE_EXECUTE_STACK
12455 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12456 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12460 /* Codes for all the SSE/MMX builtins. */
12463 IX86_BUILTIN_ADDPS,
12464 IX86_BUILTIN_ADDSS,
12465 IX86_BUILTIN_DIVPS,
12466 IX86_BUILTIN_DIVSS,
12467 IX86_BUILTIN_MULPS,
12468 IX86_BUILTIN_MULSS,
12469 IX86_BUILTIN_SUBPS,
12470 IX86_BUILTIN_SUBSS,
12472 IX86_BUILTIN_CMPEQPS,
12473 IX86_BUILTIN_CMPLTPS,
12474 IX86_BUILTIN_CMPLEPS,
12475 IX86_BUILTIN_CMPGTPS,
12476 IX86_BUILTIN_CMPGEPS,
12477 IX86_BUILTIN_CMPNEQPS,
12478 IX86_BUILTIN_CMPNLTPS,
12479 IX86_BUILTIN_CMPNLEPS,
12480 IX86_BUILTIN_CMPNGTPS,
12481 IX86_BUILTIN_CMPNGEPS,
12482 IX86_BUILTIN_CMPORDPS,
12483 IX86_BUILTIN_CMPUNORDPS,
12484 IX86_BUILTIN_CMPNEPS,
12485 IX86_BUILTIN_CMPEQSS,
12486 IX86_BUILTIN_CMPLTSS,
12487 IX86_BUILTIN_CMPLESS,
12488 IX86_BUILTIN_CMPNEQSS,
12489 IX86_BUILTIN_CMPNLTSS,
12490 IX86_BUILTIN_CMPNLESS,
12491 IX86_BUILTIN_CMPNGTSS,
12492 IX86_BUILTIN_CMPNGESS,
12493 IX86_BUILTIN_CMPORDSS,
12494 IX86_BUILTIN_CMPUNORDSS,
12495 IX86_BUILTIN_CMPNESS,
12497 IX86_BUILTIN_COMIEQSS,
12498 IX86_BUILTIN_COMILTSS,
12499 IX86_BUILTIN_COMILESS,
12500 IX86_BUILTIN_COMIGTSS,
12501 IX86_BUILTIN_COMIGESS,
12502 IX86_BUILTIN_COMINEQSS,
12503 IX86_BUILTIN_UCOMIEQSS,
12504 IX86_BUILTIN_UCOMILTSS,
12505 IX86_BUILTIN_UCOMILESS,
12506 IX86_BUILTIN_UCOMIGTSS,
12507 IX86_BUILTIN_UCOMIGESS,
12508 IX86_BUILTIN_UCOMINEQSS,
12510 IX86_BUILTIN_CVTPI2PS,
12511 IX86_BUILTIN_CVTPS2PI,
12512 IX86_BUILTIN_CVTSI2SS,
12513 IX86_BUILTIN_CVTSI642SS,
12514 IX86_BUILTIN_CVTSS2SI,
12515 IX86_BUILTIN_CVTSS2SI64,
12516 IX86_BUILTIN_CVTTPS2PI,
12517 IX86_BUILTIN_CVTTSS2SI,
12518 IX86_BUILTIN_CVTTSS2SI64,
12520 IX86_BUILTIN_MAXPS,
12521 IX86_BUILTIN_MAXSS,
12522 IX86_BUILTIN_MINPS,
12523 IX86_BUILTIN_MINSS,
12525 IX86_BUILTIN_LOADUPS,
12526 IX86_BUILTIN_STOREUPS,
12527 IX86_BUILTIN_MOVSS,
12529 IX86_BUILTIN_MOVHLPS,
12530 IX86_BUILTIN_MOVLHPS,
12531 IX86_BUILTIN_LOADHPS,
12532 IX86_BUILTIN_LOADLPS,
12533 IX86_BUILTIN_STOREHPS,
12534 IX86_BUILTIN_STORELPS,
12536 IX86_BUILTIN_MASKMOVQ,
12537 IX86_BUILTIN_MOVMSKPS,
12538 IX86_BUILTIN_PMOVMSKB,
12540 IX86_BUILTIN_MOVNTPS,
12541 IX86_BUILTIN_MOVNTQ,
12543 IX86_BUILTIN_LOADDQU,
12544 IX86_BUILTIN_STOREDQU,
12546 IX86_BUILTIN_PACKSSWB,
12547 IX86_BUILTIN_PACKSSDW,
12548 IX86_BUILTIN_PACKUSWB,
12550 IX86_BUILTIN_PADDB,
12551 IX86_BUILTIN_PADDW,
12552 IX86_BUILTIN_PADDD,
12553 IX86_BUILTIN_PADDQ,
12554 IX86_BUILTIN_PADDSB,
12555 IX86_BUILTIN_PADDSW,
12556 IX86_BUILTIN_PADDUSB,
12557 IX86_BUILTIN_PADDUSW,
12558 IX86_BUILTIN_PSUBB,
12559 IX86_BUILTIN_PSUBW,
12560 IX86_BUILTIN_PSUBD,
12561 IX86_BUILTIN_PSUBQ,
12562 IX86_BUILTIN_PSUBSB,
12563 IX86_BUILTIN_PSUBSW,
12564 IX86_BUILTIN_PSUBUSB,
12565 IX86_BUILTIN_PSUBUSW,
12568 IX86_BUILTIN_PANDN,
12572 IX86_BUILTIN_PAVGB,
12573 IX86_BUILTIN_PAVGW,
12575 IX86_BUILTIN_PCMPEQB,
12576 IX86_BUILTIN_PCMPEQW,
12577 IX86_BUILTIN_PCMPEQD,
12578 IX86_BUILTIN_PCMPGTB,
12579 IX86_BUILTIN_PCMPGTW,
12580 IX86_BUILTIN_PCMPGTD,
12582 IX86_BUILTIN_PMADDWD,
12584 IX86_BUILTIN_PMAXSW,
12585 IX86_BUILTIN_PMAXUB,
12586 IX86_BUILTIN_PMINSW,
12587 IX86_BUILTIN_PMINUB,
12589 IX86_BUILTIN_PMULHUW,
12590 IX86_BUILTIN_PMULHW,
12591 IX86_BUILTIN_PMULLW,
12593 IX86_BUILTIN_PSADBW,
12594 IX86_BUILTIN_PSHUFW,
12596 IX86_BUILTIN_PSLLW,
12597 IX86_BUILTIN_PSLLD,
12598 IX86_BUILTIN_PSLLQ,
12599 IX86_BUILTIN_PSRAW,
12600 IX86_BUILTIN_PSRAD,
12601 IX86_BUILTIN_PSRLW,
12602 IX86_BUILTIN_PSRLD,
12603 IX86_BUILTIN_PSRLQ,
12604 IX86_BUILTIN_PSLLWI,
12605 IX86_BUILTIN_PSLLDI,
12606 IX86_BUILTIN_PSLLQI,
12607 IX86_BUILTIN_PSRAWI,
12608 IX86_BUILTIN_PSRADI,
12609 IX86_BUILTIN_PSRLWI,
12610 IX86_BUILTIN_PSRLDI,
12611 IX86_BUILTIN_PSRLQI,
12613 IX86_BUILTIN_PUNPCKHBW,
12614 IX86_BUILTIN_PUNPCKHWD,
12615 IX86_BUILTIN_PUNPCKHDQ,
12616 IX86_BUILTIN_PUNPCKLBW,
12617 IX86_BUILTIN_PUNPCKLWD,
12618 IX86_BUILTIN_PUNPCKLDQ,
12620 IX86_BUILTIN_SHUFPS,
12622 IX86_BUILTIN_RCPPS,
12623 IX86_BUILTIN_RCPSS,
12624 IX86_BUILTIN_RSQRTPS,
12625 IX86_BUILTIN_RSQRTSS,
12626 IX86_BUILTIN_SQRTPS,
12627 IX86_BUILTIN_SQRTSS,
12629 IX86_BUILTIN_UNPCKHPS,
12630 IX86_BUILTIN_UNPCKLPS,
12632 IX86_BUILTIN_ANDPS,
12633 IX86_BUILTIN_ANDNPS,
12635 IX86_BUILTIN_XORPS,
12638 IX86_BUILTIN_LDMXCSR,
12639 IX86_BUILTIN_STMXCSR,
12640 IX86_BUILTIN_SFENCE,
12642 /* 3DNow! Original */
12643 IX86_BUILTIN_FEMMS,
12644 IX86_BUILTIN_PAVGUSB,
12645 IX86_BUILTIN_PF2ID,
12646 IX86_BUILTIN_PFACC,
12647 IX86_BUILTIN_PFADD,
12648 IX86_BUILTIN_PFCMPEQ,
12649 IX86_BUILTIN_PFCMPGE,
12650 IX86_BUILTIN_PFCMPGT,
12651 IX86_BUILTIN_PFMAX,
12652 IX86_BUILTIN_PFMIN,
12653 IX86_BUILTIN_PFMUL,
12654 IX86_BUILTIN_PFRCP,
12655 IX86_BUILTIN_PFRCPIT1,
12656 IX86_BUILTIN_PFRCPIT2,
12657 IX86_BUILTIN_PFRSQIT1,
12658 IX86_BUILTIN_PFRSQRT,
12659 IX86_BUILTIN_PFSUB,
12660 IX86_BUILTIN_PFSUBR,
12661 IX86_BUILTIN_PI2FD,
12662 IX86_BUILTIN_PMULHRW,
12664 /* 3DNow! Athlon Extensions */
12665 IX86_BUILTIN_PF2IW,
12666 IX86_BUILTIN_PFNACC,
12667 IX86_BUILTIN_PFPNACC,
12668 IX86_BUILTIN_PI2FW,
12669 IX86_BUILTIN_PSWAPDSI,
12670 IX86_BUILTIN_PSWAPDSF,
12673 IX86_BUILTIN_ADDPD,
12674 IX86_BUILTIN_ADDSD,
12675 IX86_BUILTIN_DIVPD,
12676 IX86_BUILTIN_DIVSD,
12677 IX86_BUILTIN_MULPD,
12678 IX86_BUILTIN_MULSD,
12679 IX86_BUILTIN_SUBPD,
12680 IX86_BUILTIN_SUBSD,
12682 IX86_BUILTIN_CMPEQPD,
12683 IX86_BUILTIN_CMPLTPD,
12684 IX86_BUILTIN_CMPLEPD,
12685 IX86_BUILTIN_CMPGTPD,
12686 IX86_BUILTIN_CMPGEPD,
12687 IX86_BUILTIN_CMPNEQPD,
12688 IX86_BUILTIN_CMPNLTPD,
12689 IX86_BUILTIN_CMPNLEPD,
12690 IX86_BUILTIN_CMPNGTPD,
12691 IX86_BUILTIN_CMPNGEPD,
12692 IX86_BUILTIN_CMPORDPD,
12693 IX86_BUILTIN_CMPUNORDPD,
12694 IX86_BUILTIN_CMPNEPD,
12695 IX86_BUILTIN_CMPEQSD,
12696 IX86_BUILTIN_CMPLTSD,
12697 IX86_BUILTIN_CMPLESD,
12698 IX86_BUILTIN_CMPNEQSD,
12699 IX86_BUILTIN_CMPNLTSD,
12700 IX86_BUILTIN_CMPNLESD,
12701 IX86_BUILTIN_CMPORDSD,
12702 IX86_BUILTIN_CMPUNORDSD,
12703 IX86_BUILTIN_CMPNESD,
12705 IX86_BUILTIN_COMIEQSD,
12706 IX86_BUILTIN_COMILTSD,
12707 IX86_BUILTIN_COMILESD,
12708 IX86_BUILTIN_COMIGTSD,
12709 IX86_BUILTIN_COMIGESD,
12710 IX86_BUILTIN_COMINEQSD,
12711 IX86_BUILTIN_UCOMIEQSD,
12712 IX86_BUILTIN_UCOMILTSD,
12713 IX86_BUILTIN_UCOMILESD,
12714 IX86_BUILTIN_UCOMIGTSD,
12715 IX86_BUILTIN_UCOMIGESD,
12716 IX86_BUILTIN_UCOMINEQSD,
12718 IX86_BUILTIN_MAXPD,
12719 IX86_BUILTIN_MAXSD,
12720 IX86_BUILTIN_MINPD,
12721 IX86_BUILTIN_MINSD,
12723 IX86_BUILTIN_ANDPD,
12724 IX86_BUILTIN_ANDNPD,
12726 IX86_BUILTIN_XORPD,
12728 IX86_BUILTIN_SQRTPD,
12729 IX86_BUILTIN_SQRTSD,
12731 IX86_BUILTIN_UNPCKHPD,
12732 IX86_BUILTIN_UNPCKLPD,
12734 IX86_BUILTIN_SHUFPD,
12736 IX86_BUILTIN_LOADUPD,
12737 IX86_BUILTIN_STOREUPD,
12738 IX86_BUILTIN_MOVSD,
12740 IX86_BUILTIN_LOADHPD,
12741 IX86_BUILTIN_LOADLPD,
12743 IX86_BUILTIN_CVTDQ2PD,
12744 IX86_BUILTIN_CVTDQ2PS,
12746 IX86_BUILTIN_CVTPD2DQ,
12747 IX86_BUILTIN_CVTPD2PI,
12748 IX86_BUILTIN_CVTPD2PS,
12749 IX86_BUILTIN_CVTTPD2DQ,
12750 IX86_BUILTIN_CVTTPD2PI,
12752 IX86_BUILTIN_CVTPI2PD,
12753 IX86_BUILTIN_CVTSI2SD,
12754 IX86_BUILTIN_CVTSI642SD,
12756 IX86_BUILTIN_CVTSD2SI,
12757 IX86_BUILTIN_CVTSD2SI64,
12758 IX86_BUILTIN_CVTSD2SS,
12759 IX86_BUILTIN_CVTSS2SD,
12760 IX86_BUILTIN_CVTTSD2SI,
12761 IX86_BUILTIN_CVTTSD2SI64,
12763 IX86_BUILTIN_CVTPS2DQ,
12764 IX86_BUILTIN_CVTPS2PD,
12765 IX86_BUILTIN_CVTTPS2DQ,
12767 IX86_BUILTIN_MOVNTI,
12768 IX86_BUILTIN_MOVNTPD,
12769 IX86_BUILTIN_MOVNTDQ,
12772 IX86_BUILTIN_MASKMOVDQU,
12773 IX86_BUILTIN_MOVMSKPD,
12774 IX86_BUILTIN_PMOVMSKB128,
12776 IX86_BUILTIN_PACKSSWB128,
12777 IX86_BUILTIN_PACKSSDW128,
12778 IX86_BUILTIN_PACKUSWB128,
12780 IX86_BUILTIN_PADDB128,
12781 IX86_BUILTIN_PADDW128,
12782 IX86_BUILTIN_PADDD128,
12783 IX86_BUILTIN_PADDQ128,
12784 IX86_BUILTIN_PADDSB128,
12785 IX86_BUILTIN_PADDSW128,
12786 IX86_BUILTIN_PADDUSB128,
12787 IX86_BUILTIN_PADDUSW128,
12788 IX86_BUILTIN_PSUBB128,
12789 IX86_BUILTIN_PSUBW128,
12790 IX86_BUILTIN_PSUBD128,
12791 IX86_BUILTIN_PSUBQ128,
12792 IX86_BUILTIN_PSUBSB128,
12793 IX86_BUILTIN_PSUBSW128,
12794 IX86_BUILTIN_PSUBUSB128,
12795 IX86_BUILTIN_PSUBUSW128,
12797 IX86_BUILTIN_PAND128,
12798 IX86_BUILTIN_PANDN128,
12799 IX86_BUILTIN_POR128,
12800 IX86_BUILTIN_PXOR128,
12802 IX86_BUILTIN_PAVGB128,
12803 IX86_BUILTIN_PAVGW128,
12805 IX86_BUILTIN_PCMPEQB128,
12806 IX86_BUILTIN_PCMPEQW128,
12807 IX86_BUILTIN_PCMPEQD128,
12808 IX86_BUILTIN_PCMPGTB128,
12809 IX86_BUILTIN_PCMPGTW128,
12810 IX86_BUILTIN_PCMPGTD128,
12812 IX86_BUILTIN_PMADDWD128,
12814 IX86_BUILTIN_PMAXSW128,
12815 IX86_BUILTIN_PMAXUB128,
12816 IX86_BUILTIN_PMINSW128,
12817 IX86_BUILTIN_PMINUB128,
12819 IX86_BUILTIN_PMULUDQ,
12820 IX86_BUILTIN_PMULUDQ128,
12821 IX86_BUILTIN_PMULHUW128,
12822 IX86_BUILTIN_PMULHW128,
12823 IX86_BUILTIN_PMULLW128,
12825 IX86_BUILTIN_PSADBW128,
12826 IX86_BUILTIN_PSHUFHW,
12827 IX86_BUILTIN_PSHUFLW,
12828 IX86_BUILTIN_PSHUFD,
12830 IX86_BUILTIN_PSLLW128,
12831 IX86_BUILTIN_PSLLD128,
12832 IX86_BUILTIN_PSLLQ128,
12833 IX86_BUILTIN_PSRAW128,
12834 IX86_BUILTIN_PSRAD128,
12835 IX86_BUILTIN_PSRLW128,
12836 IX86_BUILTIN_PSRLD128,
12837 IX86_BUILTIN_PSRLQ128,
12838 IX86_BUILTIN_PSLLDQI128,
12839 IX86_BUILTIN_PSLLWI128,
12840 IX86_BUILTIN_PSLLDI128,
12841 IX86_BUILTIN_PSLLQI128,
12842 IX86_BUILTIN_PSRAWI128,
12843 IX86_BUILTIN_PSRADI128,
12844 IX86_BUILTIN_PSRLDQI128,
12845 IX86_BUILTIN_PSRLWI128,
12846 IX86_BUILTIN_PSRLDI128,
12847 IX86_BUILTIN_PSRLQI128,
12849 IX86_BUILTIN_PUNPCKHBW128,
12850 IX86_BUILTIN_PUNPCKHWD128,
12851 IX86_BUILTIN_PUNPCKHDQ128,
12852 IX86_BUILTIN_PUNPCKHQDQ128,
12853 IX86_BUILTIN_PUNPCKLBW128,
12854 IX86_BUILTIN_PUNPCKLWD128,
12855 IX86_BUILTIN_PUNPCKLDQ128,
12856 IX86_BUILTIN_PUNPCKLQDQ128,
12858 IX86_BUILTIN_CLFLUSH,
12859 IX86_BUILTIN_MFENCE,
12860 IX86_BUILTIN_LFENCE,
12862 /* Prescott New Instructions. */
12863 IX86_BUILTIN_ADDSUBPS,
12864 IX86_BUILTIN_HADDPS,
12865 IX86_BUILTIN_HSUBPS,
12866 IX86_BUILTIN_MOVSHDUP,
12867 IX86_BUILTIN_MOVSLDUP,
12868 IX86_BUILTIN_ADDSUBPD,
12869 IX86_BUILTIN_HADDPD,
12870 IX86_BUILTIN_HSUBPD,
12871 IX86_BUILTIN_LDDQU,
12873 IX86_BUILTIN_MONITOR,
12874 IX86_BUILTIN_MWAIT,
12876 IX86_BUILTIN_VEC_INIT_V2SI,
12877 IX86_BUILTIN_VEC_INIT_V4HI,
12878 IX86_BUILTIN_VEC_INIT_V8QI,
12879 IX86_BUILTIN_VEC_EXT_V2DF,
12880 IX86_BUILTIN_VEC_EXT_V2DI,
12881 IX86_BUILTIN_VEC_EXT_V4SF,
12882 IX86_BUILTIN_VEC_EXT_V4SI,
12883 IX86_BUILTIN_VEC_EXT_V8HI,
12884 IX86_BUILTIN_VEC_EXT_V2SI,
12885 IX86_BUILTIN_VEC_EXT_V4HI,
12886 IX86_BUILTIN_VEC_SET_V8HI,
12887 IX86_BUILTIN_VEC_SET_V4HI,
12892 #define def_builtin(MASK, NAME, TYPE, CODE) \
12894 if ((MASK) & target_flags \
12895 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12896 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12897 NULL, NULL_TREE); \
12900 /* Bits for builtin_description.flag. */
12902 /* Set when we don't support the comparison natively, and should
12903 swap_comparison in order to support it. */
12904 #define BUILTIN_DESC_SWAP_OPERANDS 1
12906 struct builtin_description
12908 const unsigned int mask;
12909 const enum insn_code icode;
12910 const char *const name;
12911 const enum ix86_builtins code;
12912 const enum rtx_code comparison;
12913 const unsigned int flag;
12916 static const struct builtin_description bdesc_comi[] =
12918 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12919 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12920 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12921 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12922 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12923 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12924 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12925 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12926 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12927 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12928 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12929 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12930 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12931 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12932 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12935 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12936 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12937 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12938 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12939 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12940 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12941 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12944 static const struct builtin_description bdesc_2arg[] =
12947 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12948 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12949 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12950 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12951 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12952 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12953 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12954 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12956 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12957 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12958 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12959 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
12960 BUILTIN_DESC_SWAP_OPERANDS },
12961 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
12962 BUILTIN_DESC_SWAP_OPERANDS },
12963 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12964 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
12965 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
12966 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
12967 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
12968 BUILTIN_DESC_SWAP_OPERANDS },
12969 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
12970 BUILTIN_DESC_SWAP_OPERANDS },
12971 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
12972 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12973 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12974 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12975 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12976 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
12977 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
12978 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
12979 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
12980 BUILTIN_DESC_SWAP_OPERANDS },
12981 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
12982 BUILTIN_DESC_SWAP_OPERANDS },
12983 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12985 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12986 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12987 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12988 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12990 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12991 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12992 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12993 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12995 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12996 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12997 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12998 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12999 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13002 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13003 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13004 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13005 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13006 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13007 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13008 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13009 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13011 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13012 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13013 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13014 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13015 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13016 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13017 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13018 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13020 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13021 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13022 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13024 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13025 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13026 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13027 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13029 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13030 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13032 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13033 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13034 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13035 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13036 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13037 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13039 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13040 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13041 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13042 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13044 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13045 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13046 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13047 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13048 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13049 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13052 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13053 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13054 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13056 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13057 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13058 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13060 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13061 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13062 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13063 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13064 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13065 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13067 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13068 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13069 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13070 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13071 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13072 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13074 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13075 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13076 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13077 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13079 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13080 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13083 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13084 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13085 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13087 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13089 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13090 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13092 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13093 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13094 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13095 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13096 BUILTIN_DESC_SWAP_OPERANDS },
13097 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13098 BUILTIN_DESC_SWAP_OPERANDS },
13099 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13100 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13101 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13102 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13103 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13104 BUILTIN_DESC_SWAP_OPERANDS },
13105 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13106 BUILTIN_DESC_SWAP_OPERANDS },
13107 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13108 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13109 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13110 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13111 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13112 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13113 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13114 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13115 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13117 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13118 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13120 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13127 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13128 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13129 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13132 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13133 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13134 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13135 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13136 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13137 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13138 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13139 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13141 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13142 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13143 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13144 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13145 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13146 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13147 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13148 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13150 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13151 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13153 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13154 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13155 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13156 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13158 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13159 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13161 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13162 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13163 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13164 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13165 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13166 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13168 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13169 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13170 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13171 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13173 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13174 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13175 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13176 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13177 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13178 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13179 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13180 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13182 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13183 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13184 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13186 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13187 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13189 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13190 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13192 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13193 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13194 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13196 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13197 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13198 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13200 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13201 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13203 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13205 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13206 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13207 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13208 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13211 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13212 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13213 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13214 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13215 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13216 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13219 static const struct builtin_description bdesc_1arg[] =
13221 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13222 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13224 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13225 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13226 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13228 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13229 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13230 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13231 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13232 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13233 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13235 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13236 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13238 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13240 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13241 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13243 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13244 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13245 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13246 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13247 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13249 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13251 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13252 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13253 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13254 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13256 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13257 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13258 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13261 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13262 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13266 ix86_init_builtins (void)
13269 ix86_init_mmx_sse_builtins ();
13272 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13273 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13276 ix86_init_mmx_sse_builtins (void)
13278 const struct builtin_description * d;
13281 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13282 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13283 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13284 tree V2DI_type_node
13285 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13286 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13287 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13288 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13289 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13290 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13291 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13293 tree pchar_type_node = build_pointer_type (char_type_node);
13294 tree pcchar_type_node = build_pointer_type (
13295 build_type_variant (char_type_node, 1, 0));
13296 tree pfloat_type_node = build_pointer_type (float_type_node);
13297 tree pcfloat_type_node = build_pointer_type (
13298 build_type_variant (float_type_node, 1, 0));
13299 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13300 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13301 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13304 tree int_ftype_v4sf_v4sf
13305 = build_function_type_list (integer_type_node,
13306 V4SF_type_node, V4SF_type_node, NULL_TREE);
13307 tree v4si_ftype_v4sf_v4sf
13308 = build_function_type_list (V4SI_type_node,
13309 V4SF_type_node, V4SF_type_node, NULL_TREE);
13310 /* MMX/SSE/integer conversions. */
13311 tree int_ftype_v4sf
13312 = build_function_type_list (integer_type_node,
13313 V4SF_type_node, NULL_TREE);
13314 tree int64_ftype_v4sf
13315 = build_function_type_list (long_long_integer_type_node,
13316 V4SF_type_node, NULL_TREE);
13317 tree int_ftype_v8qi
13318 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13319 tree v4sf_ftype_v4sf_int
13320 = build_function_type_list (V4SF_type_node,
13321 V4SF_type_node, integer_type_node, NULL_TREE);
13322 tree v4sf_ftype_v4sf_int64
13323 = build_function_type_list (V4SF_type_node,
13324 V4SF_type_node, long_long_integer_type_node,
13326 tree v4sf_ftype_v4sf_v2si
13327 = build_function_type_list (V4SF_type_node,
13328 V4SF_type_node, V2SI_type_node, NULL_TREE);
13330 /* Miscellaneous. */
13331 tree v8qi_ftype_v4hi_v4hi
13332 = build_function_type_list (V8QI_type_node,
13333 V4HI_type_node, V4HI_type_node, NULL_TREE);
13334 tree v4hi_ftype_v2si_v2si
13335 = build_function_type_list (V4HI_type_node,
13336 V2SI_type_node, V2SI_type_node, NULL_TREE);
13337 tree v4sf_ftype_v4sf_v4sf_int
13338 = build_function_type_list (V4SF_type_node,
13339 V4SF_type_node, V4SF_type_node,
13340 integer_type_node, NULL_TREE);
13341 tree v2si_ftype_v4hi_v4hi
13342 = build_function_type_list (V2SI_type_node,
13343 V4HI_type_node, V4HI_type_node, NULL_TREE);
13344 tree v4hi_ftype_v4hi_int
13345 = build_function_type_list (V4HI_type_node,
13346 V4HI_type_node, integer_type_node, NULL_TREE);
13347 tree v4hi_ftype_v4hi_di
13348 = build_function_type_list (V4HI_type_node,
13349 V4HI_type_node, long_long_unsigned_type_node,
13351 tree v2si_ftype_v2si_di
13352 = build_function_type_list (V2SI_type_node,
13353 V2SI_type_node, long_long_unsigned_type_node,
13355 tree void_ftype_void
13356 = build_function_type (void_type_node, void_list_node);
13357 tree void_ftype_unsigned
13358 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13359 tree void_ftype_unsigned_unsigned
13360 = build_function_type_list (void_type_node, unsigned_type_node,
13361 unsigned_type_node, NULL_TREE);
13362 tree void_ftype_pcvoid_unsigned_unsigned
13363 = build_function_type_list (void_type_node, const_ptr_type_node,
13364 unsigned_type_node, unsigned_type_node,
13366 tree unsigned_ftype_void
13367 = build_function_type (unsigned_type_node, void_list_node);
13368 tree v2si_ftype_v4sf
13369 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13370 /* Loads/stores. */
13371 tree void_ftype_v8qi_v8qi_pchar
13372 = build_function_type_list (void_type_node,
13373 V8QI_type_node, V8QI_type_node,
13374 pchar_type_node, NULL_TREE);
13375 tree v4sf_ftype_pcfloat
13376 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13377 /* @@@ the type is bogus */
13378 tree v4sf_ftype_v4sf_pv2si
13379 = build_function_type_list (V4SF_type_node,
13380 V4SF_type_node, pv2si_type_node, NULL_TREE);
13381 tree void_ftype_pv2si_v4sf
13382 = build_function_type_list (void_type_node,
13383 pv2si_type_node, V4SF_type_node, NULL_TREE);
13384 tree void_ftype_pfloat_v4sf
13385 = build_function_type_list (void_type_node,
13386 pfloat_type_node, V4SF_type_node, NULL_TREE);
13387 tree void_ftype_pdi_di
13388 = build_function_type_list (void_type_node,
13389 pdi_type_node, long_long_unsigned_type_node,
13391 tree void_ftype_pv2di_v2di
13392 = build_function_type_list (void_type_node,
13393 pv2di_type_node, V2DI_type_node, NULL_TREE);
13394 /* Normal vector unops. */
13395 tree v4sf_ftype_v4sf
13396 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13398 /* Normal vector binops. */
13399 tree v4sf_ftype_v4sf_v4sf
13400 = build_function_type_list (V4SF_type_node,
13401 V4SF_type_node, V4SF_type_node, NULL_TREE);
13402 tree v8qi_ftype_v8qi_v8qi
13403 = build_function_type_list (V8QI_type_node,
13404 V8QI_type_node, V8QI_type_node, NULL_TREE);
13405 tree v4hi_ftype_v4hi_v4hi
13406 = build_function_type_list (V4HI_type_node,
13407 V4HI_type_node, V4HI_type_node, NULL_TREE);
13408 tree v2si_ftype_v2si_v2si
13409 = build_function_type_list (V2SI_type_node,
13410 V2SI_type_node, V2SI_type_node, NULL_TREE);
13411 tree di_ftype_di_di
13412 = build_function_type_list (long_long_unsigned_type_node,
13413 long_long_unsigned_type_node,
13414 long_long_unsigned_type_node, NULL_TREE);
13416 tree v2si_ftype_v2sf
13417 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13418 tree v2sf_ftype_v2si
13419 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13420 tree v2si_ftype_v2si
13421 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13422 tree v2sf_ftype_v2sf
13423 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13424 tree v2sf_ftype_v2sf_v2sf
13425 = build_function_type_list (V2SF_type_node,
13426 V2SF_type_node, V2SF_type_node, NULL_TREE);
13427 tree v2si_ftype_v2sf_v2sf
13428 = build_function_type_list (V2SI_type_node,
13429 V2SF_type_node, V2SF_type_node, NULL_TREE);
13430 tree pint_type_node = build_pointer_type (integer_type_node);
13431 tree pdouble_type_node = build_pointer_type (double_type_node);
13432 tree pcdouble_type_node = build_pointer_type (
13433 build_type_variant (double_type_node, 1, 0));
13434 tree int_ftype_v2df_v2df
13435 = build_function_type_list (integer_type_node,
13436 V2DF_type_node, V2DF_type_node, NULL_TREE);
13438 tree ti_ftype_ti_ti
13439 = build_function_type_list (intTI_type_node,
13440 intTI_type_node, intTI_type_node, NULL_TREE);
13441 tree void_ftype_pcvoid
13442 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13443 tree v4sf_ftype_v4si
13444 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13445 tree v4si_ftype_v4sf
13446 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13447 tree v2df_ftype_v4si
13448 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13449 tree v4si_ftype_v2df
13450 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13451 tree v2si_ftype_v2df
13452 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13453 tree v4sf_ftype_v2df
13454 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13455 tree v2df_ftype_v2si
13456 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13457 tree v2df_ftype_v4sf
13458 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13459 tree int_ftype_v2df
13460 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13461 tree int64_ftype_v2df
13462 = build_function_type_list (long_long_integer_type_node,
13463 V2DF_type_node, NULL_TREE);
13464 tree v2df_ftype_v2df_int
13465 = build_function_type_list (V2DF_type_node,
13466 V2DF_type_node, integer_type_node, NULL_TREE);
13467 tree v2df_ftype_v2df_int64
13468 = build_function_type_list (V2DF_type_node,
13469 V2DF_type_node, long_long_integer_type_node,
13471 tree v4sf_ftype_v4sf_v2df
13472 = build_function_type_list (V4SF_type_node,
13473 V4SF_type_node, V2DF_type_node, NULL_TREE);
13474 tree v2df_ftype_v2df_v4sf
13475 = build_function_type_list (V2DF_type_node,
13476 V2DF_type_node, V4SF_type_node, NULL_TREE);
13477 tree v2df_ftype_v2df_v2df_int
13478 = build_function_type_list (V2DF_type_node,
13479 V2DF_type_node, V2DF_type_node,
13482 tree v2df_ftype_v2df_pcdouble
13483 = build_function_type_list (V2DF_type_node,
13484 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13485 tree void_ftype_pdouble_v2df
13486 = build_function_type_list (void_type_node,
13487 pdouble_type_node, V2DF_type_node, NULL_TREE);
13488 tree void_ftype_pint_int
13489 = build_function_type_list (void_type_node,
13490 pint_type_node, integer_type_node, NULL_TREE);
13491 tree void_ftype_v16qi_v16qi_pchar
13492 = build_function_type_list (void_type_node,
13493 V16QI_type_node, V16QI_type_node,
13494 pchar_type_node, NULL_TREE);
13495 tree v2df_ftype_pcdouble
13496 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13497 tree v2df_ftype_v2df_v2df
13498 = build_function_type_list (V2DF_type_node,
13499 V2DF_type_node, V2DF_type_node, NULL_TREE);
13500 tree v16qi_ftype_v16qi_v16qi
13501 = build_function_type_list (V16QI_type_node,
13502 V16QI_type_node, V16QI_type_node, NULL_TREE);
13503 tree v8hi_ftype_v8hi_v8hi
13504 = build_function_type_list (V8HI_type_node,
13505 V8HI_type_node, V8HI_type_node, NULL_TREE);
13506 tree v4si_ftype_v4si_v4si
13507 = build_function_type_list (V4SI_type_node,
13508 V4SI_type_node, V4SI_type_node, NULL_TREE);
13509 tree v2di_ftype_v2di_v2di
13510 = build_function_type_list (V2DI_type_node,
13511 V2DI_type_node, V2DI_type_node, NULL_TREE);
13512 tree v2di_ftype_v2df_v2df
13513 = build_function_type_list (V2DI_type_node,
13514 V2DF_type_node, V2DF_type_node, NULL_TREE);
13515 tree v2df_ftype_v2df
13516 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13517 tree v2di_ftype_v2di_int
13518 = build_function_type_list (V2DI_type_node,
13519 V2DI_type_node, integer_type_node, NULL_TREE);
13520 tree v4si_ftype_v4si_int
13521 = build_function_type_list (V4SI_type_node,
13522 V4SI_type_node, integer_type_node, NULL_TREE);
13523 tree v8hi_ftype_v8hi_int
13524 = build_function_type_list (V8HI_type_node,
13525 V8HI_type_node, integer_type_node, NULL_TREE);
13526 tree v8hi_ftype_v8hi_v2di
13527 = build_function_type_list (V8HI_type_node,
13528 V8HI_type_node, V2DI_type_node, NULL_TREE);
13529 tree v4si_ftype_v4si_v2di
13530 = build_function_type_list (V4SI_type_node,
13531 V4SI_type_node, V2DI_type_node, NULL_TREE);
13532 tree v4si_ftype_v8hi_v8hi
13533 = build_function_type_list (V4SI_type_node,
13534 V8HI_type_node, V8HI_type_node, NULL_TREE);
13535 tree di_ftype_v8qi_v8qi
13536 = build_function_type_list (long_long_unsigned_type_node,
13537 V8QI_type_node, V8QI_type_node, NULL_TREE);
13538 tree di_ftype_v2si_v2si
13539 = build_function_type_list (long_long_unsigned_type_node,
13540 V2SI_type_node, V2SI_type_node, NULL_TREE);
13541 tree v2di_ftype_v16qi_v16qi
13542 = build_function_type_list (V2DI_type_node,
13543 V16QI_type_node, V16QI_type_node, NULL_TREE);
13544 tree v2di_ftype_v4si_v4si
13545 = build_function_type_list (V2DI_type_node,
13546 V4SI_type_node, V4SI_type_node, NULL_TREE);
13547 tree int_ftype_v16qi
13548 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13549 tree v16qi_ftype_pcchar
13550 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13551 tree void_ftype_pchar_v16qi
13552 = build_function_type_list (void_type_node,
13553 pchar_type_node, V16QI_type_node, NULL_TREE);
13556 tree float128_type;
13559 /* The __float80 type. */
13560 if (TYPE_MODE (long_double_type_node) == XFmode)
13561 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13565 /* The __float80 type. */
13566 float80_type = make_node (REAL_TYPE);
13567 TYPE_PRECISION (float80_type) = 80;
13568 layout_type (float80_type);
13569 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13572 float128_type = make_node (REAL_TYPE);
13573 TYPE_PRECISION (float128_type) = 128;
13574 layout_type (float128_type);
13575 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13577 /* Add all builtins that are more or less simple operations on two
13579 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13581 /* Use one of the operands; the target can have a different mode for
13582 mask-generating compares. */
13583 enum machine_mode mode;
13588 mode = insn_data[d->icode].operand[1].mode;
13593 type = v16qi_ftype_v16qi_v16qi;
13596 type = v8hi_ftype_v8hi_v8hi;
13599 type = v4si_ftype_v4si_v4si;
13602 type = v2di_ftype_v2di_v2di;
13605 type = v2df_ftype_v2df_v2df;
13608 type = ti_ftype_ti_ti;
13611 type = v4sf_ftype_v4sf_v4sf;
13614 type = v8qi_ftype_v8qi_v8qi;
13617 type = v4hi_ftype_v4hi_v4hi;
13620 type = v2si_ftype_v2si_v2si;
13623 type = di_ftype_di_di;
13630 /* Override for comparisons. */
13631 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13632 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13633 type = v4si_ftype_v4sf_v4sf;
13635 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13636 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13637 type = v2di_ftype_v2df_v2df;
13639 def_builtin (d->mask, d->name, type, d->code);
13642 /* Add the remaining MMX insns with somewhat more complicated types. */
13643 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13644 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13645 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13646 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13648 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13649 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13650 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13652 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13653 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13655 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13656 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13658 /* comi/ucomi insns. */
13659 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13660 if (d->mask == MASK_SSE2)
13661 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13663 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13665 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13666 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13667 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13669 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13670 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13671 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13672 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13673 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13674 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13675 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13676 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13677 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13678 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13679 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13681 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13683 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13684 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13686 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13687 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13688 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13689 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13691 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13692 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13693 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13694 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13696 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13698 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13700 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13701 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13702 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13703 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13704 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13705 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13707 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13709 /* Original 3DNow! */
13710 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13711 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13712 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13713 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13714 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13715 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13716 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13717 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13718 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13719 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13720 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13721 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13722 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13723 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13724 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13725 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13726 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13727 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13728 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13729 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13731 /* 3DNow! extension as used in the Athlon CPU. */
13732 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13733 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13734 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13735 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13736 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13737 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13740 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13750 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13751 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13752 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13754 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13755 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13756 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13757 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13759 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13760 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13762 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13764 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13765 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13767 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13768 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13769 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13770 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13771 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13773 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13775 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13776 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13777 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13778 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13780 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13781 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13782 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13784 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13785 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13786 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13787 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13789 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13790 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13791 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13793 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13794 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13796 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13797 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13799 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13800 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13801 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13803 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13804 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13805 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13807 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13808 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13810 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13811 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13812 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13813 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13815 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13816 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13817 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13818 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13820 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13821 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13823 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13825 /* Prescott New Instructions. */
13826 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13827 void_ftype_pcvoid_unsigned_unsigned,
13828 IX86_BUILTIN_MONITOR);
13829 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13830 void_ftype_unsigned_unsigned,
13831 IX86_BUILTIN_MWAIT);
13832 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13834 IX86_BUILTIN_MOVSHDUP);
13835 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13837 IX86_BUILTIN_MOVSLDUP);
13838 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13839 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13841 /* Access to the vec_init patterns. */
13842 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
13843 integer_type_node, NULL_TREE);
13844 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
13845 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
13847 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
13848 short_integer_type_node,
13849 short_integer_type_node,
13850 short_integer_type_node, NULL_TREE);
13851 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
13852 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
13854 ftype = build_function_type_list (V8QI_type_node, char_type_node,
13855 char_type_node, char_type_node,
13856 char_type_node, char_type_node,
13857 char_type_node, char_type_node,
13858 char_type_node, NULL_TREE);
13859 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
13860 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
13862 /* Access to the vec_extract patterns. */
13863 ftype = build_function_type_list (double_type_node, V2DF_type_node,
13864 integer_type_node, NULL_TREE);
13865 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
13866 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
13868 ftype = build_function_type_list (long_long_integer_type_node,
13869 V2DI_type_node, integer_type_node,
13871 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
13872 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
13874 ftype = build_function_type_list (float_type_node, V4SF_type_node,
13875 integer_type_node, NULL_TREE);
13876 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
13877 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
13879 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
13880 integer_type_node, NULL_TREE);
13881 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
13882 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
13884 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
13885 integer_type_node, NULL_TREE);
13886 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
13887 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
13889 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
13890 integer_type_node, NULL_TREE);
13891 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
13892 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
13894 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
13895 integer_type_node, NULL_TREE);
13896 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
13897 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
13899 /* Access to the vec_set patterns. */
13900 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
13902 integer_type_node, NULL_TREE);
13903 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
13904 ftype, IX86_BUILTIN_VEC_SET_V8HI);
13906 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
13908 integer_type_node, NULL_TREE);
13909 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
13910 ftype, IX86_BUILTIN_VEC_SET_V4HI);
13913 /* Errors in the source file can cause expand_expr to return const0_rtx
13914 where we expect a vector. To avoid crashing, use one of the vector
13915 clear instructions. */
13917 safe_vector_operand (rtx x, enum machine_mode mode)
13919 if (x == const0_rtx)
13920 x = CONST0_RTX (mode);
13924 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13927 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13930 tree arg0 = TREE_VALUE (arglist);
13931 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13932 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13933 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13934 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13935 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13936 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13938 if (VECTOR_MODE_P (mode0))
13939 op0 = safe_vector_operand (op0, mode0);
13940 if (VECTOR_MODE_P (mode1))
13941 op1 = safe_vector_operand (op1, mode1);
13943 if (optimize || !target
13944 || GET_MODE (target) != tmode
13945 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13946 target = gen_reg_rtx (tmode);
13948 if (GET_MODE (op1) == SImode && mode1 == TImode)
13950 rtx x = gen_reg_rtx (V4SImode);
13951 emit_insn (gen_sse2_loadd (x, op1));
13952 op1 = gen_lowpart (TImode, x);
13955 /* In case the insn wants input operands in modes different from
13956 the result, abort. */
13957 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13958 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13961 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13962 op0 = copy_to_mode_reg (mode0, op0);
13963 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13964 op1 = copy_to_mode_reg (mode1, op1);
13966 /* ??? Using ix86_fixup_binary_operands is problematic when
13967 we've got mismatched modes. Fake it. */
13973 if (tmode == mode0 && tmode == mode1)
13975 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
13979 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
13981 op0 = force_reg (mode0, op0);
13982 op1 = force_reg (mode1, op1);
13983 target = gen_reg_rtx (tmode);
13986 pat = GEN_FCN (icode) (target, op0, op1);
13993 /* Subroutine of ix86_expand_builtin to take care of stores. */
13996 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13999 tree arg0 = TREE_VALUE (arglist);
14000 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14001 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14002 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14003 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14004 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14006 if (VECTOR_MODE_P (mode1))
14007 op1 = safe_vector_operand (op1, mode1);
14009 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14010 op1 = copy_to_mode_reg (mode1, op1);
14012 pat = GEN_FCN (icode) (op0, op1);
14018 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14021 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14022 rtx target, int do_load)
14025 tree arg0 = TREE_VALUE (arglist);
14026 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14027 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14028 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14030 if (optimize || !target
14031 || GET_MODE (target) != tmode
14032 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14033 target = gen_reg_rtx (tmode);
14035 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14038 if (VECTOR_MODE_P (mode0))
14039 op0 = safe_vector_operand (op0, mode0);
14041 if ((optimize && !register_operand (op0, mode0))
14042 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14043 op0 = copy_to_mode_reg (mode0, op0);
14046 pat = GEN_FCN (icode) (target, op0);
14053 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14054 sqrtss, rsqrtss, rcpss. */
14057 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14060 tree arg0 = TREE_VALUE (arglist);
14061 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14062 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14063 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14065 if (optimize || !target
14066 || GET_MODE (target) != tmode
14067 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14068 target = gen_reg_rtx (tmode);
14070 if (VECTOR_MODE_P (mode0))
14071 op0 = safe_vector_operand (op0, mode0);
14073 if ((optimize && !register_operand (op0, mode0))
14074 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14075 op0 = copy_to_mode_reg (mode0, op0);
14078 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14079 op1 = copy_to_mode_reg (mode0, op1);
14081 pat = GEN_FCN (icode) (target, op0, op1);
14088 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14091 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14095 tree arg0 = TREE_VALUE (arglist);
14096 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14097 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14098 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14100 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14101 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14102 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14103 enum rtx_code comparison = d->comparison;
14105 if (VECTOR_MODE_P (mode0))
14106 op0 = safe_vector_operand (op0, mode0);
14107 if (VECTOR_MODE_P (mode1))
14108 op1 = safe_vector_operand (op1, mode1);
14110 /* Swap operands if we have a comparison that isn't available in
14112 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14114 rtx tmp = gen_reg_rtx (mode1);
14115 emit_move_insn (tmp, op1);
14120 if (optimize || !target
14121 || GET_MODE (target) != tmode
14122 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14123 target = gen_reg_rtx (tmode);
14125 if ((optimize && !register_operand (op0, mode0))
14126 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14127 op0 = copy_to_mode_reg (mode0, op0);
14128 if ((optimize && !register_operand (op1, mode1))
14129 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14130 op1 = copy_to_mode_reg (mode1, op1);
14132 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14133 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14140 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14143 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14147 tree arg0 = TREE_VALUE (arglist);
14148 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14149 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14150 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14152 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14153 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14154 enum rtx_code comparison = d->comparison;
14156 if (VECTOR_MODE_P (mode0))
14157 op0 = safe_vector_operand (op0, mode0);
14158 if (VECTOR_MODE_P (mode1))
14159 op1 = safe_vector_operand (op1, mode1);
14161 /* Swap operands if we have a comparison that isn't available in
14163 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14170 target = gen_reg_rtx (SImode);
14171 emit_move_insn (target, const0_rtx);
14172 target = gen_rtx_SUBREG (QImode, target, 0);
14174 if ((optimize && !register_operand (op0, mode0))
14175 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14176 op0 = copy_to_mode_reg (mode0, op0);
14177 if ((optimize && !register_operand (op1, mode1))
14178 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14179 op1 = copy_to_mode_reg (mode1, op1);
14181 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14182 pat = GEN_FCN (d->icode) (op0, op1);
14186 emit_insn (gen_rtx_SET (VOIDmode,
14187 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14188 gen_rtx_fmt_ee (comparison, QImode,
14192 return SUBREG_REG (target);
14195 /* Return the integer constant in ARG. Constrain it to be in the range
14196 of the subparts of VEC_TYPE; issue an error if not. */
14199 get_element_number (tree vec_type, tree arg)
14201 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14203 if (!host_integerp (arg, 1)
14204 || (elt = tree_low_cst (arg, 1), elt > max))
14206 error ("selector must be an integer constant in the range 0..%i", max);
14213 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14214 ix86_expand_vector_init. We DO have language-level syntax for this, in
14215 the form of (type){ init-list }. Except that since we can't place emms
14216 instructions from inside the compiler, we can't allow the use of MMX
14217 registers unless the user explicitly asks for it. So we do *not* define
14218 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14219 we have builtins invoked by mmintrin.h that gives us license to emit
14220 these sorts of instructions. */
14223 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14225 enum machine_mode tmode = TYPE_MODE (type);
14226 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14227 int i, n_elt = GET_MODE_NUNITS (tmode);
14228 rtvec v = rtvec_alloc (n_elt);
14230 gcc_assert (VECTOR_MODE_P (tmode));
14232 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14234 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14235 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14238 gcc_assert (arglist == NULL);
14240 if (!target || !register_operand (target, tmode))
14241 target = gen_reg_rtx (tmode);
14243 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14247 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14248 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14249 had a language-level syntax for referencing vector elements. */
14252 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14254 enum machine_mode tmode, mode0;
14259 arg0 = TREE_VALUE (arglist);
14260 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14262 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14263 elt = get_element_number (TREE_TYPE (arg0), arg1);
14265 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14266 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14267 gcc_assert (VECTOR_MODE_P (mode0));
14269 op0 = force_reg (mode0, op0);
14271 if (optimize || !target || !register_operand (target, tmode))
14272 target = gen_reg_rtx (tmode);
14274 ix86_expand_vector_extract (true, target, op0, elt);
14279 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14280 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14281 a language-level syntax for referencing vector elements. */
14284 ix86_expand_vec_set_builtin (tree arglist)
14286 enum machine_mode tmode, mode1;
14287 tree arg0, arg1, arg2;
14291 arg0 = TREE_VALUE (arglist);
14292 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14293 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14295 tmode = TYPE_MODE (TREE_TYPE (arg0));
14296 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14297 gcc_assert (VECTOR_MODE_P (tmode));
14299 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14300 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14301 elt = get_element_number (TREE_TYPE (arg0), arg2);
14303 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14304 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14306 op0 = force_reg (tmode, op0);
14307 op1 = force_reg (mode1, op1);
14309 ix86_expand_vector_set (true, op0, op1, elt);
14314 /* Expand an expression EXP that calls a built-in function,
14315 with result going to TARGET if that's convenient
14316 (and in mode MODE if that's convenient).
14317 SUBTARGET may be used as the target for computing one of EXP's operands.
14318 IGNORE is nonzero if the value is to be ignored. */
14321 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14322 enum machine_mode mode ATTRIBUTE_UNUSED,
14323 int ignore ATTRIBUTE_UNUSED)
14325 const struct builtin_description *d;
14327 enum insn_code icode;
14328 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14329 tree arglist = TREE_OPERAND (exp, 1);
14330 tree arg0, arg1, arg2;
14331 rtx op0, op1, op2, pat;
14332 enum machine_mode tmode, mode0, mode1, mode2;
14333 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14337 case IX86_BUILTIN_EMMS:
14338 emit_insn (gen_mmx_emms ());
14341 case IX86_BUILTIN_SFENCE:
14342 emit_insn (gen_sse_sfence ());
14345 case IX86_BUILTIN_MASKMOVQ:
14346 case IX86_BUILTIN_MASKMOVDQU:
14347 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14348 ? CODE_FOR_mmx_maskmovq
14349 : CODE_FOR_sse2_maskmovdqu);
14350 /* Note the arg order is different from the operand order. */
14351 arg1 = TREE_VALUE (arglist);
14352 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14353 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14354 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14355 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14356 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14357 mode0 = insn_data[icode].operand[0].mode;
14358 mode1 = insn_data[icode].operand[1].mode;
14359 mode2 = insn_data[icode].operand[2].mode;
14361 op0 = force_reg (Pmode, op0);
14362 op0 = gen_rtx_MEM (mode1, op0);
14364 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14365 op0 = copy_to_mode_reg (mode0, op0);
14366 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14367 op1 = copy_to_mode_reg (mode1, op1);
14368 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14369 op2 = copy_to_mode_reg (mode2, op2);
14370 pat = GEN_FCN (icode) (op0, op1, op2);
14376 case IX86_BUILTIN_SQRTSS:
14377 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14378 case IX86_BUILTIN_RSQRTSS:
14379 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14380 case IX86_BUILTIN_RCPSS:
14381 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14383 case IX86_BUILTIN_LOADUPS:
14384 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14386 case IX86_BUILTIN_STOREUPS:
14387 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14389 case IX86_BUILTIN_LOADHPS:
14390 case IX86_BUILTIN_LOADLPS:
14391 case IX86_BUILTIN_LOADHPD:
14392 case IX86_BUILTIN_LOADLPD:
14393 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14394 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14395 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14396 : CODE_FOR_sse2_loadlpd);
14397 arg0 = TREE_VALUE (arglist);
14398 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14399 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14400 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14401 tmode = insn_data[icode].operand[0].mode;
14402 mode0 = insn_data[icode].operand[1].mode;
14403 mode1 = insn_data[icode].operand[2].mode;
14405 op0 = force_reg (mode0, op0);
14406 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14407 if (optimize || target == 0
14408 || GET_MODE (target) != tmode
14409 || !register_operand (target, tmode))
14410 target = gen_reg_rtx (tmode);
14411 pat = GEN_FCN (icode) (target, op0, op1);
14417 case IX86_BUILTIN_STOREHPS:
14418 case IX86_BUILTIN_STORELPS:
14419 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14420 : CODE_FOR_sse_storelps);
14421 arg0 = TREE_VALUE (arglist);
14422 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14423 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14424 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14425 mode0 = insn_data[icode].operand[0].mode;
14426 mode1 = insn_data[icode].operand[1].mode;
14428 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14429 op1 = force_reg (mode1, op1);
14431 pat = GEN_FCN (icode) (op0, op1);
14437 case IX86_BUILTIN_MOVNTPS:
14438 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14439 case IX86_BUILTIN_MOVNTQ:
14440 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14442 case IX86_BUILTIN_LDMXCSR:
14443 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14444 target = assign_386_stack_local (SImode, 0);
14445 emit_move_insn (target, op0);
14446 emit_insn (gen_sse_ldmxcsr (target));
14449 case IX86_BUILTIN_STMXCSR:
14450 target = assign_386_stack_local (SImode, 0);
14451 emit_insn (gen_sse_stmxcsr (target));
14452 return copy_to_mode_reg (SImode, target);
14454 case IX86_BUILTIN_SHUFPS:
14455 case IX86_BUILTIN_SHUFPD:
14456 icode = (fcode == IX86_BUILTIN_SHUFPS
14457 ? CODE_FOR_sse_shufps
14458 : CODE_FOR_sse2_shufpd);
14459 arg0 = TREE_VALUE (arglist);
14460 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14461 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14462 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14463 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14464 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14465 tmode = insn_data[icode].operand[0].mode;
14466 mode0 = insn_data[icode].operand[1].mode;
14467 mode1 = insn_data[icode].operand[2].mode;
14468 mode2 = insn_data[icode].operand[3].mode;
14470 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14471 op0 = copy_to_mode_reg (mode0, op0);
14472 if ((optimize && !register_operand (op1, mode1))
14473 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14474 op1 = copy_to_mode_reg (mode1, op1);
14475 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14477 /* @@@ better error message */
14478 error ("mask must be an immediate");
14479 return gen_reg_rtx (tmode);
14481 if (optimize || target == 0
14482 || GET_MODE (target) != tmode
14483 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14484 target = gen_reg_rtx (tmode);
14485 pat = GEN_FCN (icode) (target, op0, op1, op2);
14491 case IX86_BUILTIN_PSHUFW:
14492 case IX86_BUILTIN_PSHUFD:
14493 case IX86_BUILTIN_PSHUFHW:
14494 case IX86_BUILTIN_PSHUFLW:
14495 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14496 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14497 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14498 : CODE_FOR_mmx_pshufw);
14499 arg0 = TREE_VALUE (arglist);
14500 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14501 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14502 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14503 tmode = insn_data[icode].operand[0].mode;
14504 mode1 = insn_data[icode].operand[1].mode;
14505 mode2 = insn_data[icode].operand[2].mode;
14507 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14508 op0 = copy_to_mode_reg (mode1, op0);
14509 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14511 /* @@@ better error message */
14512 error ("mask must be an immediate");
14516 || GET_MODE (target) != tmode
14517 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14518 target = gen_reg_rtx (tmode);
14519 pat = GEN_FCN (icode) (target, op0, op1);
14525 case IX86_BUILTIN_PSLLDQI128:
14526 case IX86_BUILTIN_PSRLDQI128:
14527 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14528 : CODE_FOR_sse2_lshrti3);
14529 arg0 = TREE_VALUE (arglist);
14530 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14531 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14532 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14533 tmode = insn_data[icode].operand[0].mode;
14534 mode1 = insn_data[icode].operand[1].mode;
14535 mode2 = insn_data[icode].operand[2].mode;
14537 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14539 op0 = copy_to_reg (op0);
14540 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14542 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14544 error ("shift must be an immediate");
14547 target = gen_reg_rtx (V2DImode);
14548 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14554 case IX86_BUILTIN_FEMMS:
14555 emit_insn (gen_mmx_femms ());
14558 case IX86_BUILTIN_PAVGUSB:
14559 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14561 case IX86_BUILTIN_PF2ID:
14562 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14564 case IX86_BUILTIN_PFACC:
14565 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14567 case IX86_BUILTIN_PFADD:
14568 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14570 case IX86_BUILTIN_PFCMPEQ:
14571 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14573 case IX86_BUILTIN_PFCMPGE:
14574 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14576 case IX86_BUILTIN_PFCMPGT:
14577 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14579 case IX86_BUILTIN_PFMAX:
14580 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14582 case IX86_BUILTIN_PFMIN:
14583 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14585 case IX86_BUILTIN_PFMUL:
14586 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14588 case IX86_BUILTIN_PFRCP:
14589 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14591 case IX86_BUILTIN_PFRCPIT1:
14592 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14594 case IX86_BUILTIN_PFRCPIT2:
14595 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14597 case IX86_BUILTIN_PFRSQIT1:
14598 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14600 case IX86_BUILTIN_PFRSQRT:
14601 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14603 case IX86_BUILTIN_PFSUB:
14604 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14606 case IX86_BUILTIN_PFSUBR:
14607 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14609 case IX86_BUILTIN_PI2FD:
14610 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14612 case IX86_BUILTIN_PMULHRW:
14613 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14615 case IX86_BUILTIN_PF2IW:
14616 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14618 case IX86_BUILTIN_PFNACC:
14619 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14621 case IX86_BUILTIN_PFPNACC:
14622 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14624 case IX86_BUILTIN_PI2FW:
14625 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14627 case IX86_BUILTIN_PSWAPDSI:
14628 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14630 case IX86_BUILTIN_PSWAPDSF:
14631 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14633 case IX86_BUILTIN_SQRTSD:
14634 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14635 case IX86_BUILTIN_LOADUPD:
14636 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14637 case IX86_BUILTIN_STOREUPD:
14638 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14640 case IX86_BUILTIN_MFENCE:
14641 emit_insn (gen_sse2_mfence ());
14643 case IX86_BUILTIN_LFENCE:
14644 emit_insn (gen_sse2_lfence ());
14647 case IX86_BUILTIN_CLFLUSH:
14648 arg0 = TREE_VALUE (arglist);
14649 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14650 icode = CODE_FOR_sse2_clflush;
14651 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14652 op0 = copy_to_mode_reg (Pmode, op0);
14654 emit_insn (gen_sse2_clflush (op0));
14657 case IX86_BUILTIN_MOVNTPD:
14658 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14659 case IX86_BUILTIN_MOVNTDQ:
14660 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14661 case IX86_BUILTIN_MOVNTI:
14662 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14664 case IX86_BUILTIN_LOADDQU:
14665 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14666 case IX86_BUILTIN_STOREDQU:
14667 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14669 case IX86_BUILTIN_MONITOR:
14670 arg0 = TREE_VALUE (arglist);
14671 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14672 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14673 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14674 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14675 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14677 op0 = copy_to_mode_reg (SImode, op0);
14679 op1 = copy_to_mode_reg (SImode, op1);
14681 op2 = copy_to_mode_reg (SImode, op2);
14682 emit_insn (gen_sse3_monitor (op0, op1, op2));
14685 case IX86_BUILTIN_MWAIT:
14686 arg0 = TREE_VALUE (arglist);
14687 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14688 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14689 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14691 op0 = copy_to_mode_reg (SImode, op0);
14693 op1 = copy_to_mode_reg (SImode, op1);
14694 emit_insn (gen_sse3_mwait (op0, op1));
14697 case IX86_BUILTIN_LDDQU:
14698 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
14701 case IX86_BUILTIN_VEC_INIT_V2SI:
14702 case IX86_BUILTIN_VEC_INIT_V4HI:
14703 case IX86_BUILTIN_VEC_INIT_V8QI:
14704 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
14706 case IX86_BUILTIN_VEC_EXT_V2DF:
14707 case IX86_BUILTIN_VEC_EXT_V2DI:
14708 case IX86_BUILTIN_VEC_EXT_V4SF:
14709 case IX86_BUILTIN_VEC_EXT_V4SI:
14710 case IX86_BUILTIN_VEC_EXT_V8HI:
14711 case IX86_BUILTIN_VEC_EXT_V2SI:
14712 case IX86_BUILTIN_VEC_EXT_V4HI:
14713 return ix86_expand_vec_ext_builtin (arglist, target);
14715 case IX86_BUILTIN_VEC_SET_V8HI:
14716 case IX86_BUILTIN_VEC_SET_V4HI:
14717 return ix86_expand_vec_set_builtin (arglist);
14723 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14724 if (d->code == fcode)
14726 /* Compares are treated specially. */
14727 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14728 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
14729 || d->icode == CODE_FOR_sse2_maskcmpv2df3
14730 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14731 return ix86_expand_sse_compare (d, arglist, target);
14733 return ix86_expand_binop_builtin (d->icode, arglist, target);
14736 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14737 if (d->code == fcode)
14738 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14740 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14741 if (d->code == fcode)
14742 return ix86_expand_sse_comi (d, arglist, target);
14744 gcc_unreachable ();
14747 /* Store OPERAND to the memory after reload is completed. This means
14748 that we can't easily use assign_stack_local. */
14750 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14753 if (!reload_completed)
14755 if (TARGET_RED_ZONE)
14757 result = gen_rtx_MEM (mode,
14758 gen_rtx_PLUS (Pmode,
14760 GEN_INT (-RED_ZONE_SIZE)));
14761 emit_move_insn (result, operand);
14763 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14769 operand = gen_lowpart (DImode, operand);
14773 gen_rtx_SET (VOIDmode,
14774 gen_rtx_MEM (DImode,
14775 gen_rtx_PRE_DEC (DImode,
14776 stack_pointer_rtx)),
14782 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14791 split_di (&operand, 1, operands, operands + 1);
14793 gen_rtx_SET (VOIDmode,
14794 gen_rtx_MEM (SImode,
14795 gen_rtx_PRE_DEC (Pmode,
14796 stack_pointer_rtx)),
14799 gen_rtx_SET (VOIDmode,
14800 gen_rtx_MEM (SImode,
14801 gen_rtx_PRE_DEC (Pmode,
14802 stack_pointer_rtx)),
14807 /* It is better to store HImodes as SImodes. */
14808 if (!TARGET_PARTIAL_REG_STALL)
14809 operand = gen_lowpart (SImode, operand);
14813 gen_rtx_SET (VOIDmode,
14814 gen_rtx_MEM (GET_MODE (operand),
14815 gen_rtx_PRE_DEC (SImode,
14816 stack_pointer_rtx)),
14822 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14827 /* Free operand from the memory. */
14829 ix86_free_from_memory (enum machine_mode mode)
14831 if (!TARGET_RED_ZONE)
14835 if (mode == DImode || TARGET_64BIT)
14837 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14841 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14842 to pop or add instruction if registers are available. */
14843 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14844 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14849 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14850 QImode must go into class Q_REGS.
14851 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14852 movdf to do mem-to-mem moves through integer regs. */
14854 ix86_preferred_reload_class (rtx x, enum reg_class class)
14856 /* We're only allowed to return a subclass of CLASS. Many of the
14857 following checks fail for NO_REGS, so eliminate that early. */
14858 if (class == NO_REGS)
14861 /* All classes can load zeros. */
14862 if (x == CONST0_RTX (GET_MODE (x)))
14865 /* Floating-point constants need more complex checks. */
14866 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14868 /* General regs can load everything. */
14869 if (reg_class_subset_p (class, GENERAL_REGS))
14872 /* Floats can load 0 and 1 plus some others. Note that we eliminated
14873 zero above. We only want to wind up preferring 80387 registers if
14874 we plan on doing computation with them. */
14876 && (TARGET_MIX_SSE_I387
14877 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
14878 && standard_80387_constant_p (x))
14880 /* Limit class to non-sse. */
14881 if (class == FLOAT_SSE_REGS)
14883 if (class == FP_TOP_SSE_REGS)
14885 if (class == FP_SECOND_SSE_REGS)
14886 return FP_SECOND_REG;
14887 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
14893 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14895 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
14898 /* Generally when we see PLUS here, it's the function invariant
14899 (plus soft-fp const_int). Which can only be computed into general
14901 if (GET_CODE (x) == PLUS)
14902 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
14904 /* QImode constants are easy to load, but non-constant QImode data
14905 must go into Q_REGS. */
14906 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
14908 if (reg_class_subset_p (class, Q_REGS))
14910 if (reg_class_subset_p (Q_REGS, class))
14918 /* If we are copying between general and FP registers, we need a memory
14919 location. The same is true for SSE and MMX registers.
14921 The macro can't work reliably when one of the CLASSES is class containing
14922 registers from multiple units (SSE, MMX, integer). We avoid this by never
14923 combining those units in single alternative in the machine description.
14924 Ensure that this constraint holds to avoid unexpected surprises.
14926 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14927 enforce these sanity checks. */
14930 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14931 enum machine_mode mode, int strict)
14933 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14934 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14935 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14936 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14937 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14938 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14945 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
14948 /* ??? This is a lie. We do have moves between mmx/general, and for
14949 mmx/sse2. But by saying we need secondary memory we discourage the
14950 register allocator from using the mmx registers unless needed. */
14951 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14954 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14956 /* SSE1 doesn't have any direct moves from other classes. */
14960 /* If the target says that inter-unit moves are more expensive
14961 than moving through memory, then don't generate them. */
14962 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
14965 /* Between SSE and general, we have moves no larger than word size. */
14966 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14969 /* ??? For the cost of one register reformat penalty, we could use
14970 the same instructions to move SFmode and DFmode data, but the
14971 relevant move patterns don't support those alternatives. */
14972 if (mode == SFmode || mode == DFmode)
14979 /* Return the cost of moving data from a register in class CLASS1 to
14980 one in class CLASS2.
14982 It is not required that the cost always equal 2 when FROM is the same as TO;
14983 on some machines it is expensive to move between registers if they are not
14984 general registers. */
14987 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14988 enum reg_class class2)
14990 /* In case we require secondary memory, compute cost of the store followed
14991 by load. In order to avoid bad register allocation choices, we need
14992 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14994 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14998 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14999 MEMORY_MOVE_COST (mode, class1, 1));
15000 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15001 MEMORY_MOVE_COST (mode, class2, 1));
15003 /* In case of copying from general_purpose_register we may emit multiple
15004 stores followed by single load causing memory size mismatch stall.
15005 Count this as arbitrarily high cost of 20. */
15006 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15009 /* In the case of FP/MMX moves, the registers actually overlap, and we
15010 have to switch modes in order to treat them differently. */
15011 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15012 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15018 /* Moves between SSE/MMX and integer unit are expensive. */
15019 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15020 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15021 return ix86_cost->mmxsse_to_integer;
15022 if (MAYBE_FLOAT_CLASS_P (class1))
15023 return ix86_cost->fp_move;
15024 if (MAYBE_SSE_CLASS_P (class1))
15025 return ix86_cost->sse_move;
15026 if (MAYBE_MMX_CLASS_P (class1))
15027 return ix86_cost->mmx_move;
15031 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15034 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15036 /* Flags and only flags can only hold CCmode values. */
15037 if (CC_REGNO_P (regno))
15038 return GET_MODE_CLASS (mode) == MODE_CC;
15039 if (GET_MODE_CLASS (mode) == MODE_CC
15040 || GET_MODE_CLASS (mode) == MODE_RANDOM
15041 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15043 if (FP_REGNO_P (regno))
15044 return VALID_FP_MODE_P (mode);
15045 if (SSE_REGNO_P (regno))
15047 /* We implement the move patterns for all vector modes into and
15048 out of SSE registers, even when no operation instructions
15050 return (VALID_SSE_REG_MODE (mode)
15051 || VALID_SSE2_REG_MODE (mode)
15052 || VALID_MMX_REG_MODE (mode)
15053 || VALID_MMX_REG_MODE_3DNOW (mode));
15055 if (MMX_REGNO_P (regno))
15057 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15058 so if the register is available at all, then we can move data of
15059 the given mode into or out of it. */
15060 return (VALID_MMX_REG_MODE (mode)
15061 || VALID_MMX_REG_MODE_3DNOW (mode));
15063 /* We handle both integer and floats in the general purpose registers.
15064 In future we should be able to handle vector modes as well. */
15065 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
15067 /* Take care for QImode values - they can be in non-QI regs, but then
15068 they do cause partial register stalls. */
15069 if (regno < 4 || mode != QImode || TARGET_64BIT)
15071 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
15074 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15075 tieable integer mode. */
15078 ix86_tieable_integer_mode_p (enum machine_mode mode)
15087 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
15090 return TARGET_64BIT;
15097 /* Return true if MODE1 is accessible in a register that can hold MODE2
15098 without copying. That is, all register classes that can hold MODE2
15099 can also hold MODE1. */
15102 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
15104 if (mode1 == mode2)
15107 if (ix86_tieable_integer_mode_p (mode1)
15108 && ix86_tieable_integer_mode_p (mode2))
15111 /* MODE2 being XFmode implies fp stack or general regs, which means we
15112 can tie any smaller floating point modes to it. Note that we do not
15113 tie this with TFmode. */
15114 if (mode2 == XFmode)
15115 return mode1 == SFmode || mode1 == DFmode;
15117 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15118 that we can tie it with SFmode. */
15119 if (mode2 == DFmode)
15120 return mode1 == SFmode;
15122 /* If MODE2 is only appropriate for an SSE register, then tie with
15123 any other mode acceptable to SSE registers. */
15124 if (SSE_REG_MODE_P (mode2))
15125 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
15127 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15128 with any other mode acceptable to MMX registers. */
15129 if (MMX_REG_MODE_P (mode2))
15130 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
15135 /* Return the cost of moving data of mode M between a
15136 register and memory. A value of 2 is the default; this cost is
15137 relative to those in `REGISTER_MOVE_COST'.
15139 If moving between registers and memory is more expensive than
15140 between two registers, you should define this macro to express the
15143 Model also increased moving costs of QImode registers in non
15147 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
15149 if (FLOAT_CLASS_P (class))
15166 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15168 if (SSE_CLASS_P (class))
15171 switch (GET_MODE_SIZE (mode))
15185 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15187 if (MMX_CLASS_P (class))
15190 switch (GET_MODE_SIZE (mode))
15201 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15203 switch (GET_MODE_SIZE (mode))
15207 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15208 : ix86_cost->movzbl_load);
15210 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15211 : ix86_cost->int_store[0] + 4);
15214 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15216 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15217 if (mode == TFmode)
15219 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15220 * (((int) GET_MODE_SIZE (mode)
15221 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15225 /* Compute a (partial) cost for rtx X. Return true if the complete
15226 cost has been computed, and false if subexpressions should be
15227 scanned. In either case, *TOTAL contains the cost result. */
15230 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15232 enum machine_mode mode = GET_MODE (x);
15240 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15242 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15244 else if (flag_pic && SYMBOLIC_CONST (x)
15246 || (!GET_CODE (x) != LABEL_REF
15247 && (GET_CODE (x) != SYMBOL_REF
15248 || !SYMBOL_REF_LOCAL_P (x)))))
15255 if (mode == VOIDmode)
15258 switch (standard_80387_constant_p (x))
15263 default: /* Other constants */
15268 /* Start with (MEM (SYMBOL_REF)), since that's where
15269 it'll probably end up. Add a penalty for size. */
15270 *total = (COSTS_N_INSNS (1)
15271 + (flag_pic != 0 && !TARGET_64BIT)
15272 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15278 /* The zero extensions is often completely free on x86_64, so make
15279 it as cheap as possible. */
15280 if (TARGET_64BIT && mode == DImode
15281 && GET_MODE (XEXP (x, 0)) == SImode)
15283 else if (TARGET_ZERO_EXTEND_WITH_AND)
15284 *total = COSTS_N_INSNS (ix86_cost->add);
15286 *total = COSTS_N_INSNS (ix86_cost->movzx);
15290 *total = COSTS_N_INSNS (ix86_cost->movsx);
15294 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15295 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15297 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15300 *total = COSTS_N_INSNS (ix86_cost->add);
15303 if ((value == 2 || value == 3)
15304 && ix86_cost->lea <= ix86_cost->shift_const)
15306 *total = COSTS_N_INSNS (ix86_cost->lea);
15316 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15318 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15320 if (INTVAL (XEXP (x, 1)) > 32)
15321 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15323 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15327 if (GET_CODE (XEXP (x, 1)) == AND)
15328 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15330 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15335 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15336 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15338 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15343 if (FLOAT_MODE_P (mode))
15345 *total = COSTS_N_INSNS (ix86_cost->fmul);
15350 rtx op0 = XEXP (x, 0);
15351 rtx op1 = XEXP (x, 1);
15353 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15355 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15356 for (nbits = 0; value != 0; value &= value - 1)
15360 /* This is arbitrary. */
15363 /* Compute costs correctly for widening multiplication. */
15364 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15365 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15366 == GET_MODE_SIZE (mode))
15368 int is_mulwiden = 0;
15369 enum machine_mode inner_mode = GET_MODE (op0);
15371 if (GET_CODE (op0) == GET_CODE (op1))
15372 is_mulwiden = 1, op1 = XEXP (op1, 0);
15373 else if (GET_CODE (op1) == CONST_INT)
15375 if (GET_CODE (op0) == SIGN_EXTEND)
15376 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15379 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15383 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15386 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15387 + nbits * ix86_cost->mult_bit)
15388 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15397 if (FLOAT_MODE_P (mode))
15398 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15400 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15404 if (FLOAT_MODE_P (mode))
15405 *total = COSTS_N_INSNS (ix86_cost->fadd);
15406 else if (GET_MODE_CLASS (mode) == MODE_INT
15407 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15409 if (GET_CODE (XEXP (x, 0)) == PLUS
15410 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15411 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15412 && CONSTANT_P (XEXP (x, 1)))
15414 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15415 if (val == 2 || val == 4 || val == 8)
15417 *total = COSTS_N_INSNS (ix86_cost->lea);
15418 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15419 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15421 *total += rtx_cost (XEXP (x, 1), outer_code);
15425 else if (GET_CODE (XEXP (x, 0)) == MULT
15426 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15428 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15429 if (val == 2 || val == 4 || val == 8)
15431 *total = COSTS_N_INSNS (ix86_cost->lea);
15432 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15433 *total += rtx_cost (XEXP (x, 1), outer_code);
15437 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15439 *total = COSTS_N_INSNS (ix86_cost->lea);
15440 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15441 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15442 *total += rtx_cost (XEXP (x, 1), outer_code);
15449 if (FLOAT_MODE_P (mode))
15451 *total = COSTS_N_INSNS (ix86_cost->fadd);
15459 if (!TARGET_64BIT && mode == DImode)
15461 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15462 + (rtx_cost (XEXP (x, 0), outer_code)
15463 << (GET_MODE (XEXP (x, 0)) != DImode))
15464 + (rtx_cost (XEXP (x, 1), outer_code)
15465 << (GET_MODE (XEXP (x, 1)) != DImode)));
15471 if (FLOAT_MODE_P (mode))
15473 *total = COSTS_N_INSNS (ix86_cost->fchs);
15479 if (!TARGET_64BIT && mode == DImode)
15480 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15482 *total = COSTS_N_INSNS (ix86_cost->add);
15486 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15487 && XEXP (XEXP (x, 0), 1) == const1_rtx
15488 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15489 && XEXP (x, 1) == const0_rtx)
15491 /* This kind of construct is implemented using test[bwl].
15492 Treat it as if we had an AND. */
15493 *total = (COSTS_N_INSNS (ix86_cost->add)
15494 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15495 + rtx_cost (const1_rtx, outer_code));
15501 if (!TARGET_SSE_MATH
15503 || (mode == DFmode && !TARGET_SSE2))
15508 if (FLOAT_MODE_P (mode))
15509 *total = COSTS_N_INSNS (ix86_cost->fabs);
15513 if (FLOAT_MODE_P (mode))
15514 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15518 if (XINT (x, 1) == UNSPEC_TP)
15529 static int current_machopic_label_num;
15531 /* Given a symbol name and its associated stub, write out the
15532 definition of the stub. */
15535 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15537 unsigned int length;
15538 char *binder_name, *symbol_name, lazy_ptr_name[32];
15539 int label = ++current_machopic_label_num;
15541 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15542 symb = (*targetm.strip_name_encoding) (symb);
15544 length = strlen (stub);
15545 binder_name = alloca (length + 32);
15546 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15548 length = strlen (symb);
15549 symbol_name = alloca (length + 32);
15550 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15552 sprintf (lazy_ptr_name, "L%d$lz", label);
15555 machopic_picsymbol_stub_section ();
15557 machopic_symbol_stub_section ();
15559 fprintf (file, "%s:\n", stub);
15560 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15564 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15565 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15566 fprintf (file, "\tjmp %%edx\n");
15569 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15571 fprintf (file, "%s:\n", binder_name);
15575 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15576 fprintf (file, "\tpushl %%eax\n");
15579 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15581 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15583 machopic_lazy_symbol_ptr_section ();
15584 fprintf (file, "%s:\n", lazy_ptr_name);
15585 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15586 fprintf (file, "\t.long %s\n", binder_name);
15588 #endif /* TARGET_MACHO */
15590 /* Order the registers for register allocator. */
15593 x86_order_regs_for_local_alloc (void)
15598 /* First allocate the local general purpose registers. */
15599 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15600 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15601 reg_alloc_order [pos++] = i;
15603 /* Global general purpose registers. */
15604 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15605 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15606 reg_alloc_order [pos++] = i;
15608 /* x87 registers come first in case we are doing FP math
15610 if (!TARGET_SSE_MATH)
15611 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15612 reg_alloc_order [pos++] = i;
15614 /* SSE registers. */
15615 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15616 reg_alloc_order [pos++] = i;
15617 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15618 reg_alloc_order [pos++] = i;
15620 /* x87 registers. */
15621 if (TARGET_SSE_MATH)
15622 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15623 reg_alloc_order [pos++] = i;
15625 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15626 reg_alloc_order [pos++] = i;
15628 /* Initialize the rest of array as we do not allocate some registers
15630 while (pos < FIRST_PSEUDO_REGISTER)
15631 reg_alloc_order [pos++] = 0;
15634 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15635 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15638 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15639 struct attribute_spec.handler. */
15641 ix86_handle_struct_attribute (tree *node, tree name,
15642 tree args ATTRIBUTE_UNUSED,
15643 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15646 if (DECL_P (*node))
15648 if (TREE_CODE (*node) == TYPE_DECL)
15649 type = &TREE_TYPE (*node);
15654 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15655 || TREE_CODE (*type) == UNION_TYPE)))
15657 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
15658 *no_add_attrs = true;
15661 else if ((is_attribute_p ("ms_struct", name)
15662 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15663 || ((is_attribute_p ("gcc_struct", name)
15664 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15666 warning ("%qs incompatible attribute ignored",
15667 IDENTIFIER_POINTER (name));
15668 *no_add_attrs = true;
15675 ix86_ms_bitfield_layout_p (tree record_type)
15677 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15678 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15679 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15682 /* Returns an expression indicating where the this parameter is
15683 located on entry to the FUNCTION. */
15686 x86_this_parameter (tree function)
15688 tree type = TREE_TYPE (function);
15692 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15693 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15696 if (ix86_function_regparm (type, function) > 0)
15700 parm = TYPE_ARG_TYPES (type);
15701 /* Figure out whether or not the function has a variable number of
15703 for (; parm; parm = TREE_CHAIN (parm))
15704 if (TREE_VALUE (parm) == void_type_node)
15706 /* If not, the this parameter is in the first argument. */
15710 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15712 return gen_rtx_REG (SImode, regno);
15716 if (aggregate_value_p (TREE_TYPE (type), type))
15717 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15719 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15722 /* Determine whether x86_output_mi_thunk can succeed. */
15725 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15726 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15727 HOST_WIDE_INT vcall_offset, tree function)
15729 /* 64-bit can handle anything. */
15733 /* For 32-bit, everything's fine if we have one free register. */
15734 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15737 /* Need a free register for vcall_offset. */
15741 /* Need a free register for GOT references. */
15742 if (flag_pic && !(*targetm.binds_local_p) (function))
15745 /* Otherwise ok. */
15749 /* Output the assembler code for a thunk function. THUNK_DECL is the
15750 declaration for the thunk function itself, FUNCTION is the decl for
15751 the target function. DELTA is an immediate constant offset to be
15752 added to THIS. If VCALL_OFFSET is nonzero, the word at
15753 *(*this + vcall_offset) should be added to THIS. */
15756 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15757 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15758 HOST_WIDE_INT vcall_offset, tree function)
15761 rtx this = x86_this_parameter (function);
15764 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15765 pull it in now and let DELTA benefit. */
15768 else if (vcall_offset)
15770 /* Put the this parameter into %eax. */
15772 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15773 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15776 this_reg = NULL_RTX;
15778 /* Adjust the this parameter by a fixed constant. */
15781 xops[0] = GEN_INT (delta);
15782 xops[1] = this_reg ? this_reg : this;
15785 if (!x86_64_general_operand (xops[0], DImode))
15787 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15789 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15793 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15796 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15799 /* Adjust the this parameter by a value stored in the vtable. */
15803 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15806 int tmp_regno = 2 /* ECX */;
15807 if (lookup_attribute ("fastcall",
15808 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15809 tmp_regno = 0 /* EAX */;
15810 tmp = gen_rtx_REG (SImode, tmp_regno);
15813 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15816 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15818 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15820 /* Adjust the this parameter. */
15821 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15822 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15824 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15825 xops[0] = GEN_INT (vcall_offset);
15827 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15828 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15830 xops[1] = this_reg;
15832 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15834 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15837 /* If necessary, drop THIS back to its stack slot. */
15838 if (this_reg && this_reg != this)
15840 xops[0] = this_reg;
15842 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15845 xops[0] = XEXP (DECL_RTL (function), 0);
15848 if (!flag_pic || (*targetm.binds_local_p) (function))
15849 output_asm_insn ("jmp\t%P0", xops);
15852 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15853 tmp = gen_rtx_CONST (Pmode, tmp);
15854 tmp = gen_rtx_MEM (QImode, tmp);
15856 output_asm_insn ("jmp\t%A0", xops);
15861 if (!flag_pic || (*targetm.binds_local_p) (function))
15862 output_asm_insn ("jmp\t%P0", xops);
15867 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15868 tmp = (gen_rtx_SYMBOL_REF
15870 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15871 tmp = gen_rtx_MEM (QImode, tmp);
15873 output_asm_insn ("jmp\t%0", xops);
15876 #endif /* TARGET_MACHO */
15878 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15879 output_set_got (tmp);
15882 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15883 output_asm_insn ("jmp\t{*}%1", xops);
15889 x86_file_start (void)
15891 default_file_start ();
15892 if (X86_FILE_START_VERSION_DIRECTIVE)
15893 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15894 if (X86_FILE_START_FLTUSED)
15895 fputs ("\t.global\t__fltused\n", asm_out_file);
15896 if (ix86_asm_dialect == ASM_INTEL)
15897 fputs ("\t.intel_syntax\n", asm_out_file);
15901 x86_field_alignment (tree field, int computed)
15903 enum machine_mode mode;
15904 tree type = TREE_TYPE (field);
15906 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15908 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15909 ? get_inner_array_type (type) : type);
15910 if (mode == DFmode || mode == DCmode
15911 || GET_MODE_CLASS (mode) == MODE_INT
15912 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15913 return MIN (32, computed);
15917 /* Output assembler code to FILE to increment profiler label # LABELNO
15918 for profiling a function entry. */
15920 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15925 #ifndef NO_PROFILE_COUNTERS
15926 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15928 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15932 #ifndef NO_PROFILE_COUNTERS
15933 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15935 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15939 #ifndef NO_PROFILE_COUNTERS
15940 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15941 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15943 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15947 #ifndef NO_PROFILE_COUNTERS
15948 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15949 PROFILE_COUNT_REGISTER);
15951 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15955 /* We don't have exact information about the insn sizes, but we may assume
15956 quite safely that we are informed about all 1 byte insns and memory
15957 address sizes. This is enough to eliminate unnecessary padding in
15961 min_insn_size (rtx insn)
15965 if (!INSN_P (insn) || !active_insn_p (insn))
15968 /* Discard alignments we've emit and jump instructions. */
15969 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15970 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15972 if (GET_CODE (insn) == JUMP_INSN
15973 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15974 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15977 /* Important case - calls are always 5 bytes.
15978 It is common to have many calls in the row. */
15979 if (GET_CODE (insn) == CALL_INSN
15980 && symbolic_reference_mentioned_p (PATTERN (insn))
15981 && !SIBLING_CALL_P (insn))
15983 if (get_attr_length (insn) <= 1)
15986 /* For normal instructions we may rely on the sizes of addresses
15987 and the presence of symbol to require 4 bytes of encoding.
15988 This is not the case for jumps where references are PC relative. */
15989 if (GET_CODE (insn) != JUMP_INSN)
15991 l = get_attr_length_address (insn);
15992 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16001 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16005 ix86_avoid_jump_misspredicts (void)
16007 rtx insn, start = get_insns ();
16008 int nbytes = 0, njumps = 0;
16011 /* Look for all minimal intervals of instructions containing 4 jumps.
16012 The intervals are bounded by START and INSN. NBYTES is the total
16013 size of instructions in the interval including INSN and not including
16014 START. When the NBYTES is smaller than 16 bytes, it is possible
16015 that the end of START and INSN ends up in the same 16byte page.
16017 The smallest offset in the page INSN can start is the case where START
16018 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16019 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16021 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16024 nbytes += min_insn_size (insn);
16026 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16027 INSN_UID (insn), min_insn_size (insn));
16028 if ((GET_CODE (insn) == JUMP_INSN
16029 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16030 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16031 || GET_CODE (insn) == CALL_INSN)
16038 start = NEXT_INSN (start);
16039 if ((GET_CODE (start) == JUMP_INSN
16040 && GET_CODE (PATTERN (start)) != ADDR_VEC
16041 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16042 || GET_CODE (start) == CALL_INSN)
16043 njumps--, isjump = 1;
16046 nbytes -= min_insn_size (start);
16051 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16052 INSN_UID (start), INSN_UID (insn), nbytes);
16054 if (njumps == 3 && isjump && nbytes < 16)
16056 int padsize = 15 - nbytes + min_insn_size (insn);
16059 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16060 INSN_UID (insn), padsize);
16061 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16066 /* AMD Athlon works faster
16067 when RET is not destination of conditional jump or directly preceded
16068 by other jump instruction. We avoid the penalty by inserting NOP just
16069 before the RET instructions in such cases. */
16071 ix86_pad_returns (void)
16076 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16078 basic_block bb = e->src;
16079 rtx ret = BB_END (bb);
16081 bool replace = false;
16083 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16084 || !maybe_hot_bb_p (bb))
16086 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16087 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16089 if (prev && GET_CODE (prev) == CODE_LABEL)
16094 FOR_EACH_EDGE (e, ei, bb->preds)
16095 if (EDGE_FREQUENCY (e) && e->src->index >= 0
16096 && !(e->flags & EDGE_FALLTHRU))
16101 prev = prev_active_insn (ret);
16103 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
16104 || GET_CODE (prev) == CALL_INSN))
16106 /* Empty functions get branch mispredict even when the jump destination
16107 is not visible to us. */
16108 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
16113 emit_insn_before (gen_return_internal_long (), ret);
16119 /* Implement machine specific optimizations. We implement padding of returns
16120 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16124 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
16125 ix86_pad_returns ();
16126 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
16127 ix86_avoid_jump_misspredicts ();
16130 /* Return nonzero when QImode register that must be represented via REX prefix
16133 x86_extended_QIreg_mentioned_p (rtx insn)
16136 extract_insn_cached (insn);
16137 for (i = 0; i < recog_data.n_operands; i++)
16138 if (REG_P (recog_data.operand[i])
16139 && REGNO (recog_data.operand[i]) >= 4)
16144 /* Return nonzero when P points to register encoded via REX prefix.
16145 Called via for_each_rtx. */
16147 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
16149 unsigned int regno;
16152 regno = REGNO (*p);
16153 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
16156 /* Return true when INSN mentions register that must be encoded using REX
16159 x86_extended_reg_mentioned_p (rtx insn)
16161 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
16164 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16165 optabs would emit if we didn't have TFmode patterns. */
16168 x86_emit_floatuns (rtx operands[2])
16170 rtx neglab, donelab, i0, i1, f0, in, out;
16171 enum machine_mode mode, inmode;
16173 inmode = GET_MODE (operands[1]);
16174 if (inmode != SImode
16175 && inmode != DImode)
16179 in = force_reg (inmode, operands[1]);
16180 mode = GET_MODE (out);
16181 neglab = gen_label_rtx ();
16182 donelab = gen_label_rtx ();
16183 i1 = gen_reg_rtx (Pmode);
16184 f0 = gen_reg_rtx (mode);
16186 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16188 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16189 emit_jump_insn (gen_jump (donelab));
16192 emit_label (neglab);
16194 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16195 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16196 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16197 expand_float (f0, i0, 0);
16198 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16200 emit_label (donelab);
16203 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16204 with all elements equal to VAR. Return true if successful. */
16207 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16208 rtx target, rtx val)
16210 enum machine_mode smode, wsmode, wvmode;
16217 if (!mmx_ok && !TARGET_SSE)
16225 val = force_reg (GET_MODE_INNER (mode), val);
16226 x = gen_rtx_VEC_DUPLICATE (mode, val);
16227 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16233 if (TARGET_SSE || TARGET_3DNOW_A)
16235 val = gen_lowpart (SImode, val);
16236 x = gen_rtx_TRUNCATE (HImode, val);
16237 x = gen_rtx_VEC_DUPLICATE (mode, x);
16238 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16267 /* Replicate the value once into the next wider mode and recurse. */
16268 val = convert_modes (wsmode, smode, val, true);
16269 x = expand_simple_binop (wsmode, ASHIFT, val,
16270 GEN_INT (GET_MODE_BITSIZE (smode)),
16271 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16272 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16274 x = gen_reg_rtx (wvmode);
16275 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16276 gcc_unreachable ();
16277 emit_move_insn (target, gen_lowpart (mode, x));
16285 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16286 whose low element is VAR, and other elements are zero. Return true
16290 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16291 rtx target, rtx var)
16293 enum machine_mode vsimode;
16300 if (!mmx_ok && !TARGET_SSE)
16306 var = force_reg (GET_MODE_INNER (mode), var);
16307 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16308 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16313 var = force_reg (GET_MODE_INNER (mode), var);
16314 x = gen_rtx_VEC_DUPLICATE (mode, var);
16315 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16316 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16321 vsimode = V4SImode;
16327 vsimode = V2SImode;
16330 /* Zero extend the variable element to SImode and recurse. */
16331 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16333 x = gen_reg_rtx (vsimode);
16334 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16335 gcc_unreachable ();
16337 emit_move_insn (target, gen_lowpart (mode, x));
16345 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16346 consisting of the values in VALS. It is known that all elements
16347 except ONE_VAR are constants. Return true if successful. */
16350 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16351 rtx target, rtx vals, int one_var)
16353 rtx var = XVECEXP (vals, 0, one_var);
16354 enum machine_mode wmode;
16357 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16358 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16366 /* For the two element vectors, it's just as easy to use
16367 the general case. */
16383 /* There's no way to set one QImode entry easily. Combine
16384 the variable value with its adjacent constant value, and
16385 promote to an HImode set. */
16386 x = XVECEXP (vals, 0, one_var ^ 1);
16389 var = convert_modes (HImode, QImode, var, true);
16390 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16391 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16392 x = GEN_INT (INTVAL (x) & 0xff);
16396 var = convert_modes (HImode, QImode, var, true);
16397 x = gen_int_mode (INTVAL (x) << 8, HImode);
16399 if (x != const0_rtx)
16400 var = expand_simple_binop (HImode, IOR, var, x, var,
16401 1, OPTAB_LIB_WIDEN);
16403 x = gen_reg_rtx (wmode);
16404 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16405 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
16407 emit_move_insn (target, gen_lowpart (mode, x));
16414 emit_move_insn (target, const_vec);
16415 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16419 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16420 all values variable, and none identical. */
16423 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16424 rtx target, rtx vals)
16426 enum machine_mode half_mode = GET_MODE_INNER (mode);
16427 rtx op0 = NULL, op1 = NULL;
16428 bool use_vec_concat = false;
16434 if (!mmx_ok && !TARGET_SSE)
16440 /* For the two element vectors, we always implement VEC_CONCAT. */
16441 op0 = XVECEXP (vals, 0, 0);
16442 op1 = XVECEXP (vals, 0, 1);
16443 use_vec_concat = true;
16447 half_mode = V2SFmode;
16450 half_mode = V2SImode;
16456 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16457 Recurse to load the two halves. */
16459 op0 = gen_reg_rtx (half_mode);
16460 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16461 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16463 op1 = gen_reg_rtx (half_mode);
16464 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16465 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16467 use_vec_concat = true;
16478 gcc_unreachable ();
16481 if (use_vec_concat)
16483 if (!register_operand (op0, half_mode))
16484 op0 = force_reg (half_mode, op0);
16485 if (!register_operand (op1, half_mode))
16486 op1 = force_reg (half_mode, op1);
16488 emit_insn (gen_rtx_SET (VOIDmode, target,
16489 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16493 int i, j, n_elts, n_words, n_elt_per_word;
16494 enum machine_mode inner_mode;
16495 rtx words[4], shift;
16497 inner_mode = GET_MODE_INNER (mode);
16498 n_elts = GET_MODE_NUNITS (mode);
16499 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16500 n_elt_per_word = n_elts / n_words;
16501 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16503 for (i = 0; i < n_words; ++i)
16505 rtx word = NULL_RTX;
16507 for (j = 0; j < n_elt_per_word; ++j)
16509 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16510 elt = convert_modes (word_mode, inner_mode, elt, true);
16516 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16517 word, 1, OPTAB_LIB_WIDEN);
16518 word = expand_simple_binop (word_mode, IOR, word, elt,
16519 word, 1, OPTAB_LIB_WIDEN);
16527 emit_move_insn (target, gen_lowpart (mode, words[0]));
16528 else if (n_words == 2)
16530 rtx tmp = gen_reg_rtx (mode);
16531 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16532 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16533 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16534 emit_move_insn (target, tmp);
16536 else if (n_words == 4)
16538 rtx tmp = gen_reg_rtx (V4SImode);
16539 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16540 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
16541 emit_move_insn (target, gen_lowpart (mode, tmp));
16544 gcc_unreachable ();
16548 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16549 instructions unless MMX_OK is true. */
16552 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16554 enum machine_mode mode = GET_MODE (target);
16555 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16556 int n_elts = GET_MODE_NUNITS (mode);
16557 int n_var = 0, one_var = -1;
16558 bool all_same = true, all_const_zero = true;
16562 for (i = 0; i < n_elts; ++i)
16564 x = XVECEXP (vals, 0, i);
16565 if (!CONSTANT_P (x))
16566 n_var++, one_var = i;
16567 else if (x != CONST0_RTX (inner_mode))
16568 all_const_zero = false;
16569 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16573 /* Constants are best loaded from the constant pool. */
16576 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16580 /* If all values are identical, broadcast the value. */
16582 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16583 XVECEXP (vals, 0, 0)))
16586 /* Values where only one field is non-constant are best loaded from
16587 the pool and overwritten via move later. */
16590 if (all_const_zero && one_var == 0
16591 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16592 XVECEXP (vals, 0, 0)))
16595 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16599 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16603 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16605 enum machine_mode mode = GET_MODE (target);
16606 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16607 bool use_vec_merge = false;
16616 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
16617 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
16619 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
16621 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
16622 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16632 /* For the two element vectors, we implement a VEC_CONCAT with
16633 the extraction of the other element. */
16635 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
16636 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
16639 op0 = val, op1 = tmp;
16641 op0 = tmp, op1 = val;
16643 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
16644 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16652 use_vec_merge = true;
16656 /* tmp = op0 = A B C D */
16657 tmp = copy_to_reg (target);
16659 /* op0 = C C D D */
16660 emit_insn (gen_sse_unpcklps (target, target, target));
16662 /* op0 = C C D X */
16663 ix86_expand_vector_set (false, target, val, 0);
16665 /* op0 = A B X D */
16666 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16667 GEN_INT (1), GEN_INT (0),
16668 GEN_INT (2+4), GEN_INT (3+4)));
16672 tmp = copy_to_reg (target);
16673 ix86_expand_vector_set (false, target, val, 0);
16674 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16675 GEN_INT (0), GEN_INT (1),
16676 GEN_INT (0+4), GEN_INT (3+4)));
16680 tmp = copy_to_reg (target);
16681 ix86_expand_vector_set (false, target, val, 0);
16682 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16683 GEN_INT (0), GEN_INT (1),
16684 GEN_INT (2+4), GEN_INT (0+4)));
16688 gcc_unreachable ();
16693 /* Element 0 handled by vec_merge below. */
16696 use_vec_merge = true;
16702 /* With SSE2, use integer shuffles to swap element 0 and ELT,
16703 store into element 0, then shuffle them back. */
16707 order[0] = GEN_INT (elt);
16708 order[1] = const1_rtx;
16709 order[2] = const2_rtx;
16710 order[3] = GEN_INT (3);
16711 order[elt] = const0_rtx;
16713 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16714 order[1], order[2], order[3]));
16716 ix86_expand_vector_set (false, target, val, 0);
16718 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16719 order[1], order[2], order[3]));
16723 /* For SSE1, we have to reuse the V4SF code. */
16724 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
16725 gen_lowpart (SFmode, val), elt);
16730 use_vec_merge = TARGET_SSE2;
16733 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16744 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
16745 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
16746 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16750 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16752 emit_move_insn (mem, target);
16754 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16755 emit_move_insn (tmp, val);
16757 emit_move_insn (target, mem);
16762 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
16764 enum machine_mode mode = GET_MODE (vec);
16765 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16766 bool use_vec_extr = false;
16779 use_vec_extr = true;
16791 tmp = gen_reg_rtx (mode);
16792 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
16793 GEN_INT (elt), GEN_INT (elt),
16794 GEN_INT (elt+4), GEN_INT (elt+4)));
16798 tmp = gen_reg_rtx (mode);
16799 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
16803 gcc_unreachable ();
16806 use_vec_extr = true;
16821 tmp = gen_reg_rtx (mode);
16822 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
16823 GEN_INT (elt), GEN_INT (elt),
16824 GEN_INT (elt), GEN_INT (elt)));
16828 tmp = gen_reg_rtx (mode);
16829 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
16833 gcc_unreachable ();
16836 use_vec_extr = true;
16841 /* For SSE1, we have to reuse the V4SF code. */
16842 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
16843 gen_lowpart (V4SFmode, vec), elt);
16849 use_vec_extr = TARGET_SSE2;
16852 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16857 /* ??? Could extract the appropriate HImode element and shift. */
16864 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
16865 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
16867 /* Let the rtl optimizers know about the zero extension performed. */
16868 if (inner_mode == HImode)
16870 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
16871 target = gen_lowpart (SImode, target);
16874 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16878 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16880 emit_move_insn (mem, vec);
16882 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16883 emit_move_insn (target, tmp);
16887 /* Implements target hook vector_mode_supported_p. */
16889 ix86_vector_mode_supported_p (enum machine_mode mode)
16891 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
16893 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
16895 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
16897 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
16902 /* Worker function for TARGET_MD_ASM_CLOBBERS.
16904 We do this in the new i386 backend to maintain source compatibility
16905 with the old cc0-based compiler. */
16908 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
16909 tree inputs ATTRIBUTE_UNUSED,
16912 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
16914 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
16916 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
16921 /* Worker function for REVERSE_CONDITION. */
16924 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
16926 return (mode != CCFPmode && mode != CCFPUmode
16927 ? reverse_condition (code)
16928 : reverse_condition_maybe_unordered (code));
16931 /* Output code to perform an x87 FP register move, from OPERANDS[1]
16935 output_387_reg_move (rtx insn, rtx *operands)
16937 if (REG_P (operands[1])
16938 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16940 if (REGNO (operands[0]) == FIRST_STACK_REG
16941 && TARGET_USE_FFREEP)
16942 return "ffreep\t%y0";
16943 return "fstp\t%y0";
16945 if (STACK_TOP_P (operands[0]))
16946 return "fld%z1\t%y1";
16950 /* Output code to perform a conditional jump to LABEL, if C2 flag in
16951 FP status register is set. */
16954 ix86_emit_fp_unordered_jump (rtx label)
16956 rtx reg = gen_reg_rtx (HImode);
16959 emit_insn (gen_x86_fnstsw_1 (reg));
16961 if (TARGET_USE_SAHF)
16963 emit_insn (gen_x86_sahf_1 (reg));
16965 temp = gen_rtx_REG (CCmode, FLAGS_REG);
16966 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16970 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16972 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16973 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16976 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16977 gen_rtx_LABEL_REF (VOIDmode, label),
16979 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16980 emit_jump_insn (temp);
16983 /* Output code to perform a log1p XFmode calculation. */
16985 void ix86_emit_i387_log1p (rtx op0, rtx op1)
16987 rtx label1 = gen_label_rtx ();
16988 rtx label2 = gen_label_rtx ();
16990 rtx tmp = gen_reg_rtx (XFmode);
16991 rtx tmp2 = gen_reg_rtx (XFmode);
16993 emit_insn (gen_absxf2 (tmp, op1));
16994 emit_insn (gen_cmpxf (tmp,
16995 CONST_DOUBLE_FROM_REAL_VALUE (
16996 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16998 emit_jump_insn (gen_bge (label1));
17000 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17001 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17002 emit_jump (label2);
17004 emit_label (label1);
17005 emit_move_insn (tmp, CONST1_RTX (XFmode));
17006 emit_insn (gen_addxf3 (tmp, op1, tmp));
17007 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17008 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17010 emit_label (label2);
17013 /* Solaris named-section hook. Parameters are as for
17014 named_section_real. */
17017 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17020 /* With Binutils 2.15, the "@unwind" marker must be specified on
17021 every occurrence of the ".eh_frame" section, not just the first
17024 && strcmp (name, ".eh_frame") == 0)
17026 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
17027 flags & SECTION_WRITE ? "aw" : "a");
17030 default_elf_asm_named_section (name, flags, decl);
17033 #include "gt-i386.h"