1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_fisttp = m_NOCONA;
529 const int x86_3dnow_a = m_ATHLON_K8;
530 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints = 0;
536 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
537 const int x86_partial_reg_stall = m_PPRO;
538 const int x86_use_loop = m_K6;
539 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541 const int x86_use_mov0 = m_K6;
542 const int x86_use_cltd = ~(m_PENT | m_K6);
543 const int x86_read_modify_write = ~m_PENT;
544 const int x86_read_modify = ~(m_PENT | m_PPRO);
545 const int x86_split_long_moves = m_PPRO;
546 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549 const int x86_qimode_math = ~(0);
550 const int x86_promote_qi_regs = 0;
551 const int x86_himode_math = ~(m_PPRO);
552 const int x86_promote_hi_regs = m_PPRO;
553 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564 const int x86_shift1 = ~m_486;
565 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
571 const int x86_sse_split_regs = m_ATHLON_K8;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
581 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586 const int x86_use_bt = m_ATHLON_K8;
587 /* Compare and exchange was added for 80486. */
588 const int x86_cmpxchg = ~m_386;
589 /* Exchange and add was added for 80486. */
590 const int x86_xadd = ~m_386;
592 /* In case the average insn count for single function invocation is
593 lower than this constant, emit fast (but longer) prologue and
595 #define FAST_PROLOGUE_INSN_COUNT 20
597 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
598 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
599 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
600 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
602 /* Array of the smallest class containing reg number REGNO, indexed by
603 REGNO. Used by REGNO_REG_CLASS in i386.h. */
605 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
608 AREG, DREG, CREG, BREG,
610 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
612 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
613 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
616 /* flags, fpsr, dirflag, frame */
617 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
618 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
620 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
622 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
624 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
628 /* The "default" register map used in 32bit mode. */
630 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
632 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
633 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
634 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
635 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
636 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
637 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
638 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
641 static int const x86_64_int_parameter_registers[6] =
643 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
644 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
647 static int const x86_64_int_return_registers[4] =
649 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
652 /* The "default" register map used in 64bit mode. */
653 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
655 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
656 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
657 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
658 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
659 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
660 8,9,10,11,12,13,14,15, /* extended integer registers */
661 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
664 /* Define the register numbers to be used in Dwarf debugging information.
665 The SVR4 reference port C compiler uses the following register numbers
666 in its Dwarf output code:
667 0 for %eax (gcc regno = 0)
668 1 for %ecx (gcc regno = 2)
669 2 for %edx (gcc regno = 1)
670 3 for %ebx (gcc regno = 3)
671 4 for %esp (gcc regno = 7)
672 5 for %ebp (gcc regno = 6)
673 6 for %esi (gcc regno = 4)
674 7 for %edi (gcc regno = 5)
675 The following three DWARF register numbers are never generated by
676 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
677 believes these numbers have these meanings.
678 8 for %eip (no gcc equivalent)
679 9 for %eflags (gcc regno = 17)
680 10 for %trapno (no gcc equivalent)
681 It is not at all clear how we should number the FP stack registers
682 for the x86 architecture. If the version of SDB on x86/svr4 were
683 a bit less brain dead with respect to floating-point then we would
684 have a precedent to follow with respect to DWARF register numbers
685 for x86 FP registers, but the SDB on x86/svr4 is so completely
686 broken with respect to FP registers that it is hardly worth thinking
687 of it as something to strive for compatibility with.
688 The version of x86/svr4 SDB I have at the moment does (partially)
689 seem to believe that DWARF register number 11 is associated with
690 the x86 register %st(0), but that's about all. Higher DWARF
691 register numbers don't seem to be associated with anything in
692 particular, and even for DWARF regno 11, SDB only seems to under-
693 stand that it should say that a variable lives in %st(0) (when
694 asked via an `=' command) if we said it was in DWARF regno 11,
695 but SDB still prints garbage when asked for the value of the
696 variable in question (via a `/' command).
697 (Also note that the labels SDB prints for various FP stack regs
698 when doing an `x' command are all wrong.)
699 Note that these problems generally don't affect the native SVR4
700 C compiler because it doesn't allow the use of -O with -g and
701 because when it is *not* optimizing, it allocates a memory
702 location for each floating-point variable, and the memory
703 location is what gets described in the DWARF AT_location
704 attribute for the variable in question.
705 Regardless of the severe mental illness of the x86/svr4 SDB, we
706 do something sensible here and we use the following DWARF
707 register numbers. Note that these are all stack-top-relative
709 11 for %st(0) (gcc regno = 8)
710 12 for %st(1) (gcc regno = 9)
711 13 for %st(2) (gcc regno = 10)
712 14 for %st(3) (gcc regno = 11)
713 15 for %st(4) (gcc regno = 12)
714 16 for %st(5) (gcc regno = 13)
715 17 for %st(6) (gcc regno = 14)
716 18 for %st(7) (gcc regno = 15)
718 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
720 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
721 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
722 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
723 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
724 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
725 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
726 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
729 /* Test and compare insns in i386.md store the information needed to
730 generate branch and scc insns here. */
732 rtx ix86_compare_op0 = NULL_RTX;
733 rtx ix86_compare_op1 = NULL_RTX;
734 rtx ix86_compare_emitted = NULL_RTX;
736 #define MAX_386_STACK_LOCALS 3
737 /* Size of the register save area. */
738 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
740 /* Define the structure for the machine field in struct function. */
742 struct stack_local_entry GTY(())
747 struct stack_local_entry *next;
750 /* Structure describing stack frame layout.
751 Stack grows downward:
757 saved frame pointer if frame_pointer_needed
758 <- HARD_FRAME_POINTER
764 > to_allocate <- FRAME_POINTER
776 int outgoing_arguments_size;
779 HOST_WIDE_INT to_allocate;
780 /* The offsets relative to ARG_POINTER. */
781 HOST_WIDE_INT frame_pointer_offset;
782 HOST_WIDE_INT hard_frame_pointer_offset;
783 HOST_WIDE_INT stack_pointer_offset;
785 /* When save_regs_using_mov is set, emit prologue using
786 move instead of push instructions. */
787 bool save_regs_using_mov;
790 /* Code model option. */
791 enum cmodel ix86_cmodel;
793 enum asm_dialect ix86_asm_dialect = ASM_ATT;
795 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
797 /* Which unit we are generating floating point math for. */
798 enum fpmath_unit ix86_fpmath;
800 /* Which cpu are we scheduling for. */
801 enum processor_type ix86_tune;
802 /* Which instruction set architecture to use. */
803 enum processor_type ix86_arch;
805 /* true if sse prefetch instruction is not NOOP. */
806 int x86_prefetch_sse;
808 /* ix86_regparm_string as a number */
809 static int ix86_regparm;
811 /* Preferred alignment for stack boundary in bits. */
812 unsigned int ix86_preferred_stack_boundary;
814 /* Values 1-5: see jump.c */
815 int ix86_branch_cost;
817 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
818 char internal_label_prefix[16];
819 int internal_label_prefix_len;
821 static bool ix86_handle_option (size_t, const char *, int);
822 static void output_pic_addr_const (FILE *, rtx, int);
823 static void put_condition_code (enum rtx_code, enum machine_mode,
825 static const char *get_some_local_dynamic_name (void);
826 static int get_some_local_dynamic_name_1 (rtx *, void *);
827 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
828 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
830 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
831 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
833 static rtx get_thread_pointer (int);
834 static rtx legitimize_tls_address (rtx, enum tls_model, int);
835 static void get_pc_thunk_name (char [32], unsigned int);
836 static rtx gen_push (rtx);
837 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
838 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
839 static struct machine_function * ix86_init_machine_status (void);
840 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
841 static int ix86_nsaved_regs (void);
842 static void ix86_emit_save_regs (void);
843 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
844 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
845 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
846 static HOST_WIDE_INT ix86_GOT_alias_set (void);
847 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
848 static rtx ix86_expand_aligntest (rtx, int);
849 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
850 static int ix86_issue_rate (void);
851 static int ix86_adjust_cost (rtx, rtx, rtx, int);
852 static int ia32_multipass_dfa_lookahead (void);
853 static void ix86_init_mmx_sse_builtins (void);
854 static rtx x86_this_parameter (tree);
855 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
856 HOST_WIDE_INT, tree);
857 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
858 static void x86_file_start (void);
859 static void ix86_reorg (void);
860 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
861 static tree ix86_build_builtin_va_list (void);
862 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
864 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
865 static bool ix86_vector_mode_supported_p (enum machine_mode);
867 static int ix86_address_cost (rtx);
868 static bool ix86_cannot_force_const_mem (rtx);
869 static rtx ix86_delegitimize_address (rtx);
871 struct builtin_description;
872 static rtx ix86_expand_sse_comi (const struct builtin_description *,
874 static rtx ix86_expand_sse_compare (const struct builtin_description *,
876 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
877 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
878 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
879 static rtx ix86_expand_store_builtin (enum insn_code, tree);
880 static rtx safe_vector_operand (rtx, enum machine_mode);
881 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
882 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
883 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
884 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
885 static int ix86_fp_comparison_cost (enum rtx_code code);
886 static unsigned int ix86_select_alt_pic_regnum (void);
887 static int ix86_save_reg (unsigned int, int);
888 static void ix86_compute_frame_layout (struct ix86_frame *);
889 static int ix86_comp_type_attributes (tree, tree);
890 static int ix86_function_regparm (tree, tree);
891 const struct attribute_spec ix86_attribute_table[];
892 static bool ix86_function_ok_for_sibcall (tree, tree);
893 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
894 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
895 static int ix86_value_regno (enum machine_mode, tree);
896 static bool contains_128bit_aligned_vector_p (tree);
897 static rtx ix86_struct_value_rtx (tree, int);
898 static bool ix86_ms_bitfield_layout_p (tree);
899 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
900 static int extended_reg_mentioned_1 (rtx *, void *);
901 static bool ix86_rtx_costs (rtx, int, int, int *);
902 static int min_insn_size (rtx);
903 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
904 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
905 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
907 static void ix86_init_builtins (void);
908 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
910 /* This function is only used on Solaris. */
911 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
914 /* Register class used for passing given 64bit part of the argument.
915 These represent classes as documented by the PS ABI, with the exception
916 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
917 use SF or DFmode move instead of DImode to avoid reformatting penalties.
919 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
920 whenever possible (upper half does contain padding).
922 enum x86_64_reg_class
925 X86_64_INTEGER_CLASS,
926 X86_64_INTEGERSI_CLASS,
933 X86_64_COMPLEX_X87_CLASS,
936 static const char * const x86_64_reg_class_name[] = {
937 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
938 "sseup", "x87", "x87up", "cplx87", "no"
941 #define MAX_CLASSES 4
943 /* Table of constants used by fldpi, fldln2, etc.... */
944 static REAL_VALUE_TYPE ext_80387_constants_table [5];
945 static bool ext_80387_constants_init = 0;
946 static void init_ext_80387_constants (void);
948 /* Initialize the GCC target structure. */
949 #undef TARGET_ATTRIBUTE_TABLE
950 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
951 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
952 # undef TARGET_MERGE_DECL_ATTRIBUTES
953 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
956 #undef TARGET_COMP_TYPE_ATTRIBUTES
957 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
959 #undef TARGET_INIT_BUILTINS
960 #define TARGET_INIT_BUILTINS ix86_init_builtins
961 #undef TARGET_EXPAND_BUILTIN
962 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
964 #undef TARGET_ASM_FUNCTION_EPILOGUE
965 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
967 #undef TARGET_ASM_OPEN_PAREN
968 #define TARGET_ASM_OPEN_PAREN ""
969 #undef TARGET_ASM_CLOSE_PAREN
970 #define TARGET_ASM_CLOSE_PAREN ""
972 #undef TARGET_ASM_ALIGNED_HI_OP
973 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
974 #undef TARGET_ASM_ALIGNED_SI_OP
975 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
977 #undef TARGET_ASM_ALIGNED_DI_OP
978 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
981 #undef TARGET_ASM_UNALIGNED_HI_OP
982 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
983 #undef TARGET_ASM_UNALIGNED_SI_OP
984 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
985 #undef TARGET_ASM_UNALIGNED_DI_OP
986 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
988 #undef TARGET_SCHED_ADJUST_COST
989 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
990 #undef TARGET_SCHED_ISSUE_RATE
991 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
992 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
993 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
994 ia32_multipass_dfa_lookahead
996 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
997 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1000 #undef TARGET_HAVE_TLS
1001 #define TARGET_HAVE_TLS true
1003 #undef TARGET_CANNOT_FORCE_CONST_MEM
1004 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1006 #undef TARGET_DELEGITIMIZE_ADDRESS
1007 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1009 #undef TARGET_MS_BITFIELD_LAYOUT_P
1010 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1012 #undef TARGET_ASM_OUTPUT_MI_THUNK
1013 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1014 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1015 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1017 #undef TARGET_ASM_FILE_START
1018 #define TARGET_ASM_FILE_START x86_file_start
1020 #undef TARGET_DEFAULT_TARGET_FLAGS
1021 #define TARGET_DEFAULT_TARGET_FLAGS \
1023 | TARGET_64BIT_DEFAULT \
1024 | TARGET_SUBTARGET_DEFAULT \
1025 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1027 #undef TARGET_HANDLE_OPTION
1028 #define TARGET_HANDLE_OPTION ix86_handle_option
1030 #undef TARGET_RTX_COSTS
1031 #define TARGET_RTX_COSTS ix86_rtx_costs
1032 #undef TARGET_ADDRESS_COST
1033 #define TARGET_ADDRESS_COST ix86_address_cost
1035 #undef TARGET_FIXED_CONDITION_CODE_REGS
1036 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1037 #undef TARGET_CC_MODES_COMPATIBLE
1038 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1040 #undef TARGET_MACHINE_DEPENDENT_REORG
1041 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1043 #undef TARGET_BUILD_BUILTIN_VA_LIST
1044 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1046 #undef TARGET_MD_ASM_CLOBBERS
1047 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1049 #undef TARGET_PROMOTE_PROTOTYPES
1050 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1051 #undef TARGET_STRUCT_VALUE_RTX
1052 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1053 #undef TARGET_SETUP_INCOMING_VARARGS
1054 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1055 #undef TARGET_MUST_PASS_IN_STACK
1056 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1057 #undef TARGET_PASS_BY_REFERENCE
1058 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1060 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1061 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1063 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1064 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1066 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1067 #undef TARGET_INSERT_ATTRIBUTES
1068 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1071 struct gcc_target targetm = TARGET_INITIALIZER;
1074 /* The svr4 ABI for the i386 says that records and unions are returned
1076 #ifndef DEFAULT_PCC_STRUCT_RETURN
1077 #define DEFAULT_PCC_STRUCT_RETURN 1
1080 /* Implement TARGET_HANDLE_OPTION. */
1083 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1090 target_flags &= ~MASK_3DNOW_A;
1091 target_flags_explicit |= MASK_3DNOW_A;
1098 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1099 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1106 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1107 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1114 target_flags &= ~MASK_SSE3;
1115 target_flags_explicit |= MASK_SSE3;
1124 /* Sometimes certain combinations of command options do not make
1125 sense on a particular target machine. You can define a macro
1126 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1127 defined, is executed once just after all the command options have
1130 Don't use this macro to turn on various extra optimizations for
1131 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1134 override_options (void)
1137 int ix86_tune_defaulted = 0;
1139 /* Comes from final.c -- no real reason to change it. */
1140 #define MAX_CODE_ALIGN 16
1144 const struct processor_costs *cost; /* Processor costs */
1145 const int target_enable; /* Target flags to enable. */
1146 const int target_disable; /* Target flags to disable. */
1147 const int align_loop; /* Default alignments. */
1148 const int align_loop_max_skip;
1149 const int align_jump;
1150 const int align_jump_max_skip;
1151 const int align_func;
1153 const processor_target_table[PROCESSOR_max] =
1155 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1156 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1157 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1158 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1159 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1160 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1161 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1162 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1163 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1166 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1169 const char *const name; /* processor name or nickname. */
1170 const enum processor_type processor;
1171 const enum pta_flags
1177 PTA_PREFETCH_SSE = 16,
1183 const processor_alias_table[] =
1185 {"i386", PROCESSOR_I386, 0},
1186 {"i486", PROCESSOR_I486, 0},
1187 {"i586", PROCESSOR_PENTIUM, 0},
1188 {"pentium", PROCESSOR_PENTIUM, 0},
1189 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1190 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1191 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1192 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1193 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1194 {"i686", PROCESSOR_PENTIUMPRO, 0},
1195 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1196 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1197 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1198 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1199 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1200 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1201 | PTA_MMX | PTA_PREFETCH_SSE},
1202 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1203 | PTA_MMX | PTA_PREFETCH_SSE},
1204 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1205 | PTA_MMX | PTA_PREFETCH_SSE},
1206 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1207 | PTA_MMX | PTA_PREFETCH_SSE},
1208 {"k6", PROCESSOR_K6, PTA_MMX},
1209 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1210 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1211 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1213 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1214 | PTA_3DNOW | PTA_3DNOW_A},
1215 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1216 | PTA_3DNOW_A | PTA_SSE},
1217 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1218 | PTA_3DNOW_A | PTA_SSE},
1219 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1220 | PTA_3DNOW_A | PTA_SSE},
1221 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1222 | PTA_SSE | PTA_SSE2 },
1223 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1224 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1225 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1226 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1227 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1228 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1229 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1230 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1233 int const pta_size = ARRAY_SIZE (processor_alias_table);
1235 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1236 SUBTARGET_OVERRIDE_OPTIONS;
1239 /* Set the default values for switches whose default depends on TARGET_64BIT
1240 in case they weren't overwritten by command line options. */
1243 if (flag_omit_frame_pointer == 2)
1244 flag_omit_frame_pointer = 1;
1245 if (flag_asynchronous_unwind_tables == 2)
1246 flag_asynchronous_unwind_tables = 1;
1247 if (flag_pcc_struct_return == 2)
1248 flag_pcc_struct_return = 0;
1252 if (flag_omit_frame_pointer == 2)
1253 flag_omit_frame_pointer = 0;
1254 if (flag_asynchronous_unwind_tables == 2)
1255 flag_asynchronous_unwind_tables = 0;
1256 if (flag_pcc_struct_return == 2)
1257 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1260 if (!ix86_tune_string && ix86_arch_string)
1261 ix86_tune_string = ix86_arch_string;
1262 if (!ix86_tune_string)
1264 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1265 ix86_tune_defaulted = 1;
1267 if (!ix86_arch_string)
1268 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1270 if (ix86_cmodel_string != 0)
1272 if (!strcmp (ix86_cmodel_string, "small"))
1273 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1275 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1276 else if (!strcmp (ix86_cmodel_string, "32"))
1277 ix86_cmodel = CM_32;
1278 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1279 ix86_cmodel = CM_KERNEL;
1280 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1281 ix86_cmodel = CM_MEDIUM;
1282 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1283 ix86_cmodel = CM_LARGE;
1285 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1289 ix86_cmodel = CM_32;
1291 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1293 if (ix86_asm_string != 0)
1295 if (!strcmp (ix86_asm_string, "intel"))
1296 ix86_asm_dialect = ASM_INTEL;
1297 else if (!strcmp (ix86_asm_string, "att"))
1298 ix86_asm_dialect = ASM_ATT;
1300 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1302 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1303 error ("code model %qs not supported in the %s bit mode",
1304 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1305 if (ix86_cmodel == CM_LARGE)
1306 sorry ("code model %<large%> not supported yet");
1307 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1308 sorry ("%i-bit mode not compiled in",
1309 (target_flags & MASK_64BIT) ? 64 : 32);
1311 for (i = 0; i < pta_size; i++)
1312 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1314 ix86_arch = processor_alias_table[i].processor;
1315 /* Default cpu tuning to the architecture. */
1316 ix86_tune = ix86_arch;
1317 if (processor_alias_table[i].flags & PTA_MMX
1318 && !(target_flags_explicit & MASK_MMX))
1319 target_flags |= MASK_MMX;
1320 if (processor_alias_table[i].flags & PTA_3DNOW
1321 && !(target_flags_explicit & MASK_3DNOW))
1322 target_flags |= MASK_3DNOW;
1323 if (processor_alias_table[i].flags & PTA_3DNOW_A
1324 && !(target_flags_explicit & MASK_3DNOW_A))
1325 target_flags |= MASK_3DNOW_A;
1326 if (processor_alias_table[i].flags & PTA_SSE
1327 && !(target_flags_explicit & MASK_SSE))
1328 target_flags |= MASK_SSE;
1329 if (processor_alias_table[i].flags & PTA_SSE2
1330 && !(target_flags_explicit & MASK_SSE2))
1331 target_flags |= MASK_SSE2;
1332 if (processor_alias_table[i].flags & PTA_SSE3
1333 && !(target_flags_explicit & MASK_SSE3))
1334 target_flags |= MASK_SSE3;
1335 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1336 x86_prefetch_sse = true;
1337 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1338 error ("CPU you selected does not support x86-64 "
1344 error ("bad value (%s) for -march= switch", ix86_arch_string);
1346 for (i = 0; i < pta_size; i++)
1347 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1349 ix86_tune = processor_alias_table[i].processor;
1350 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1352 if (ix86_tune_defaulted)
1354 ix86_tune_string = "x86-64";
1355 for (i = 0; i < pta_size; i++)
1356 if (! strcmp (ix86_tune_string,
1357 processor_alias_table[i].name))
1359 ix86_tune = processor_alias_table[i].processor;
1362 error ("CPU you selected does not support x86-64 "
1365 /* Intel CPUs have always interpreted SSE prefetch instructions as
1366 NOPs; so, we can enable SSE prefetch instructions even when
1367 -mtune (rather than -march) points us to a processor that has them.
1368 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1369 higher processors. */
1370 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1371 x86_prefetch_sse = true;
1375 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1378 ix86_cost = &size_cost;
1380 ix86_cost = processor_target_table[ix86_tune].cost;
1381 target_flags |= processor_target_table[ix86_tune].target_enable;
1382 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1384 /* Arrange to set up i386_stack_locals for all functions. */
1385 init_machine_status = ix86_init_machine_status;
1387 /* Validate -mregparm= value. */
1388 if (ix86_regparm_string)
1390 i = atoi (ix86_regparm_string);
1391 if (i < 0 || i > REGPARM_MAX)
1392 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1398 ix86_regparm = REGPARM_MAX;
1400 /* If the user has provided any of the -malign-* options,
1401 warn and use that value only if -falign-* is not set.
1402 Remove this code in GCC 3.2 or later. */
1403 if (ix86_align_loops_string)
1405 warning (0, "-malign-loops is obsolete, use -falign-loops");
1406 if (align_loops == 0)
1408 i = atoi (ix86_align_loops_string);
1409 if (i < 0 || i > MAX_CODE_ALIGN)
1410 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1412 align_loops = 1 << i;
1416 if (ix86_align_jumps_string)
1418 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1419 if (align_jumps == 0)
1421 i = atoi (ix86_align_jumps_string);
1422 if (i < 0 || i > MAX_CODE_ALIGN)
1423 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1425 align_jumps = 1 << i;
1429 if (ix86_align_funcs_string)
1431 warning (0, "-malign-functions is obsolete, use -falign-functions");
1432 if (align_functions == 0)
1434 i = atoi (ix86_align_funcs_string);
1435 if (i < 0 || i > MAX_CODE_ALIGN)
1436 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1438 align_functions = 1 << i;
1442 /* Default align_* from the processor table. */
1443 if (align_loops == 0)
1445 align_loops = processor_target_table[ix86_tune].align_loop;
1446 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1448 if (align_jumps == 0)
1450 align_jumps = processor_target_table[ix86_tune].align_jump;
1451 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1453 if (align_functions == 0)
1455 align_functions = processor_target_table[ix86_tune].align_func;
1458 /* Validate -mpreferred-stack-boundary= value, or provide default.
1459 The default of 128 bits is for Pentium III's SSE __m128, but we
1460 don't want additional code to keep the stack aligned when
1461 optimizing for code size. */
1462 ix86_preferred_stack_boundary = (optimize_size
1463 ? TARGET_64BIT ? 128 : 32
1465 if (ix86_preferred_stack_boundary_string)
1467 i = atoi (ix86_preferred_stack_boundary_string);
1468 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1469 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1470 TARGET_64BIT ? 4 : 2);
1472 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1475 /* Validate -mbranch-cost= value, or provide default. */
1476 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1477 if (ix86_branch_cost_string)
1479 i = atoi (ix86_branch_cost_string);
1481 error ("-mbranch-cost=%d is not between 0 and 5", i);
1483 ix86_branch_cost = i;
1486 if (ix86_tls_dialect_string)
1488 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1489 ix86_tls_dialect = TLS_DIALECT_GNU;
1490 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1491 ix86_tls_dialect = TLS_DIALECT_SUN;
1493 error ("bad value (%s) for -mtls-dialect= switch",
1494 ix86_tls_dialect_string);
1497 /* Keep nonleaf frame pointers. */
1498 if (flag_omit_frame_pointer)
1499 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1500 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1501 flag_omit_frame_pointer = 1;
1503 /* If we're doing fast math, we don't care about comparison order
1504 wrt NaNs. This lets us use a shorter comparison sequence. */
1505 if (flag_unsafe_math_optimizations)
1506 target_flags &= ~MASK_IEEE_FP;
1508 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1509 since the insns won't need emulation. */
1510 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1511 target_flags &= ~MASK_NO_FANCY_MATH_387;
1513 /* Likewise, if the target doesn't have a 387, or we've specified
1514 software floating point, don't use 387 inline intrinsics. */
1516 target_flags |= MASK_NO_FANCY_MATH_387;
1518 /* Turn on SSE2 builtins for -msse3. */
1520 target_flags |= MASK_SSE2;
1522 /* Turn on SSE builtins for -msse2. */
1524 target_flags |= MASK_SSE;
1526 /* Turn on MMX builtins for -msse. */
1529 target_flags |= MASK_MMX & ~target_flags_explicit;
1530 x86_prefetch_sse = true;
1533 /* Turn on MMX builtins for 3Dnow. */
1535 target_flags |= MASK_MMX;
1539 if (TARGET_ALIGN_DOUBLE)
1540 error ("-malign-double makes no sense in the 64bit mode");
1542 error ("-mrtd calling convention not supported in the 64bit mode");
1544 /* Enable by default the SSE and MMX builtins. Do allow the user to
1545 explicitly disable any of these. In particular, disabling SSE and
1546 MMX for kernel code is extremely useful. */
1548 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1549 & ~target_flags_explicit);
1553 /* i386 ABI does not specify red zone. It still makes sense to use it
1554 when programmer takes care to stack from being destroyed. */
1555 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1556 target_flags |= MASK_NO_RED_ZONE;
1559 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1561 if (ix86_fpmath_string != 0)
1563 if (! strcmp (ix86_fpmath_string, "387"))
1564 ix86_fpmath = FPMATH_387;
1565 else if (! strcmp (ix86_fpmath_string, "sse"))
1569 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1570 ix86_fpmath = FPMATH_387;
1573 ix86_fpmath = FPMATH_SSE;
1575 else if (! strcmp (ix86_fpmath_string, "387,sse")
1576 || ! strcmp (ix86_fpmath_string, "sse,387"))
1580 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1581 ix86_fpmath = FPMATH_387;
1583 else if (!TARGET_80387)
1585 warning (0, "387 instruction set disabled, using SSE arithmetics");
1586 ix86_fpmath = FPMATH_SSE;
1589 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1592 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1595 /* If the i387 is disabled, then do not return values in it. */
1597 target_flags &= ~MASK_FLOAT_RETURNS;
1599 if ((x86_accumulate_outgoing_args & TUNEMASK)
1600 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1602 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1604 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1607 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1608 p = strchr (internal_label_prefix, 'X');
1609 internal_label_prefix_len = p - internal_label_prefix;
1613 /* When scheduling description is not available, disable scheduler pass
1614 so it won't slow down the compilation and make x87 code slower. */
1615 if (!TARGET_SCHEDULE)
1616 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1620 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1622 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1623 make the problem with not enough registers even worse. */
1624 #ifdef INSN_SCHEDULING
1626 flag_schedule_insns = 0;
1629 /* The default values of these switches depend on the TARGET_64BIT
1630 that is not known at this moment. Mark these values with 2 and
1631 let user the to override these. In case there is no command line option
1632 specifying them, we will set the defaults in override_options. */
1634 flag_omit_frame_pointer = 2;
1635 flag_pcc_struct_return = 2;
1636 flag_asynchronous_unwind_tables = 2;
1637 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1638 SUBTARGET_OPTIMIZATION_OPTIONS;
1642 /* Table of valid machine attributes. */
1643 const struct attribute_spec ix86_attribute_table[] =
1645 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1646 /* Stdcall attribute says callee is responsible for popping arguments
1647 if they are not variable. */
1648 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1649 /* Fastcall attribute says callee is responsible for popping arguments
1650 if they are not variable. */
1651 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1652 /* Cdecl attribute says the callee is a normal C declaration */
1653 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1654 /* Regparm attribute specifies how many integer arguments are to be
1655 passed in registers. */
1656 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1657 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1658 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1659 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1660 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1662 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1663 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1664 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1665 SUBTARGET_ATTRIBUTE_TABLE,
1667 { NULL, 0, 0, false, false, false, NULL }
1670 /* Decide whether we can make a sibling call to a function. DECL is the
1671 declaration of the function being targeted by the call and EXP is the
1672 CALL_EXPR representing the call. */
1675 ix86_function_ok_for_sibcall (tree decl, tree exp)
1679 /* If we are generating position-independent code, we cannot sibcall
1680 optimize any indirect call, or a direct call to a global function,
1681 as the PLT requires %ebx be live. */
1682 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1690 /* If we are returning floats on the 80387 register stack, we cannot
1691 make a sibcall from a function that doesn't return a float to a
1692 function that does or, conversely, from a function that does return
1693 a float to a function that doesn't; the necessary stack adjustment
1694 would not be executed. */
1695 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
1696 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1700 /* If this call is indirect, we'll need to be able to use a call-clobbered
1701 register for the address of the target function. Make sure that all
1702 such registers are not used for passing parameters. */
1703 if (!decl && !TARGET_64BIT)
1707 /* We're looking at the CALL_EXPR, we need the type of the function. */
1708 type = TREE_OPERAND (exp, 0); /* pointer expression */
1709 type = TREE_TYPE (type); /* pointer type */
1710 type = TREE_TYPE (type); /* function type */
1712 if (ix86_function_regparm (type, NULL) >= 3)
1714 /* ??? Need to count the actual number of registers to be used,
1715 not the possible number of registers. Fix later. */
1720 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1721 /* Dllimport'd functions are also called indirectly. */
1722 if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
1723 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1727 /* Otherwise okay. That also includes certain types of indirect calls. */
1731 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1732 arguments as in struct attribute_spec.handler. */
1734 ix86_handle_cdecl_attribute (tree *node, tree name,
1735 tree args ATTRIBUTE_UNUSED,
1736 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1738 if (TREE_CODE (*node) != FUNCTION_TYPE
1739 && TREE_CODE (*node) != METHOD_TYPE
1740 && TREE_CODE (*node) != FIELD_DECL
1741 && TREE_CODE (*node) != TYPE_DECL)
1743 warning (OPT_Wattributes, "%qs attribute only applies to functions",
1744 IDENTIFIER_POINTER (name));
1745 *no_add_attrs = true;
1749 if (is_attribute_p ("fastcall", name))
1751 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1753 error ("fastcall and stdcall attributes are not compatible");
1755 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1757 error ("fastcall and regparm attributes are not compatible");
1760 else if (is_attribute_p ("stdcall", name))
1762 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1764 error ("fastcall and stdcall attributes are not compatible");
1771 warning (OPT_Wattributes, "%qs attribute ignored",
1772 IDENTIFIER_POINTER (name));
1773 *no_add_attrs = true;
1779 /* Handle a "regparm" attribute;
1780 arguments as in struct attribute_spec.handler. */
1782 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1783 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1785 if (TREE_CODE (*node) != FUNCTION_TYPE
1786 && TREE_CODE (*node) != METHOD_TYPE
1787 && TREE_CODE (*node) != FIELD_DECL
1788 && TREE_CODE (*node) != TYPE_DECL)
1790 warning (OPT_Wattributes, "%qs attribute only applies to functions",
1791 IDENTIFIER_POINTER (name));
1792 *no_add_attrs = true;
1798 cst = TREE_VALUE (args);
1799 if (TREE_CODE (cst) != INTEGER_CST)
1801 warning (OPT_Wattributes,
1802 "%qs attribute requires an integer constant argument",
1803 IDENTIFIER_POINTER (name));
1804 *no_add_attrs = true;
1806 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1808 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
1809 IDENTIFIER_POINTER (name), REGPARM_MAX);
1810 *no_add_attrs = true;
1813 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1815 error ("fastcall and regparm attributes are not compatible");
1822 /* Return 0 if the attributes for two types are incompatible, 1 if they
1823 are compatible, and 2 if they are nearly compatible (which causes a
1824 warning to be generated). */
1827 ix86_comp_type_attributes (tree type1, tree type2)
1829 /* Check for mismatch of non-default calling convention. */
1830 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1832 if (TREE_CODE (type1) != FUNCTION_TYPE)
1835 /* Check for mismatched fastcall types */
1836 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1837 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1840 /* Check for mismatched return types (cdecl vs stdcall). */
1841 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1842 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1844 if (ix86_function_regparm (type1, NULL)
1845 != ix86_function_regparm (type2, NULL))
1850 /* Return the regparm value for a function with the indicated TYPE and DECL.
1851 DECL may be NULL when calling function indirectly
1852 or considering a libcall. */
1855 ix86_function_regparm (tree type, tree decl)
1858 int regparm = ix86_regparm;
1859 bool user_convention = false;
1863 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1866 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1867 user_convention = true;
1870 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1873 user_convention = true;
1876 /* Use register calling convention for local functions when possible. */
1877 if (!TARGET_64BIT && !user_convention && decl
1878 && flag_unit_at_a_time && !profile_flag)
1880 struct cgraph_local_info *i = cgraph_local_info (decl);
1883 /* We can't use regparm(3) for nested functions as these use
1884 static chain pointer in third argument. */
1885 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1895 /* Return true if EAX is live at the start of the function. Used by
1896 ix86_expand_prologue to determine if we need special help before
1897 calling allocate_stack_worker. */
1900 ix86_eax_live_at_start_p (void)
1902 /* Cheat. Don't bother working forward from ix86_function_regparm
1903 to the function type to whether an actual argument is located in
1904 eax. Instead just look at cfg info, which is still close enough
1905 to correct at this point. This gives false positives for broken
1906 functions that might use uninitialized data that happens to be
1907 allocated in eax, but who cares? */
1908 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1911 /* Value is the number of bytes of arguments automatically
1912 popped when returning from a subroutine call.
1913 FUNDECL is the declaration node of the function (as a tree),
1914 FUNTYPE is the data type of the function (as a tree),
1915 or for a library call it is an identifier node for the subroutine name.
1916 SIZE is the number of bytes of arguments passed on the stack.
1918 On the 80386, the RTD insn may be used to pop them if the number
1919 of args is fixed, but if the number is variable then the caller
1920 must pop them all. RTD can't be used for library calls now
1921 because the library is compiled with the Unix compiler.
1922 Use of RTD is a selectable option, since it is incompatible with
1923 standard Unix calling sequences. If the option is not selected,
1924 the caller must always pop the args.
1926 The attribute stdcall is equivalent to RTD on a per module basis. */
1929 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1931 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1933 /* Cdecl functions override -mrtd, and never pop the stack. */
1934 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1936 /* Stdcall and fastcall functions will pop the stack if not
1938 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1939 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1943 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1944 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1945 == void_type_node)))
1949 /* Lose any fake structure return argument if it is passed on the stack. */
1950 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1952 && !KEEP_AGGREGATE_RETURN_POINTER)
1954 int nregs = ix86_function_regparm (funtype, fundecl);
1957 return GET_MODE_SIZE (Pmode);
1963 /* Argument support functions. */
1965 /* Return true when register may be used to pass function parameters. */
1967 ix86_function_arg_regno_p (int regno)
1971 return (regno < REGPARM_MAX
1972 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1973 if (SSE_REGNO_P (regno) && TARGET_SSE)
1975 /* RAX is used as hidden argument to va_arg functions. */
1978 for (i = 0; i < REGPARM_MAX; i++)
1979 if (regno == x86_64_int_parameter_registers[i])
1984 /* Return if we do not know how to pass TYPE solely in registers. */
1987 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1989 if (must_pass_in_stack_var_size_or_pad (mode, type))
1992 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1993 The layout_type routine is crafty and tries to trick us into passing
1994 currently unsupported vector types on the stack by using TImode. */
1995 return (!TARGET_64BIT && mode == TImode
1996 && type && TREE_CODE (type) != VECTOR_TYPE);
1999 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2000 for a call to a function whose data type is FNTYPE.
2001 For a library call, FNTYPE is 0. */
2004 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2005 tree fntype, /* tree ptr for function decl */
2006 rtx libname, /* SYMBOL_REF of library name or 0 */
2009 static CUMULATIVE_ARGS zero_cum;
2010 tree param, next_param;
2012 if (TARGET_DEBUG_ARG)
2014 fprintf (stderr, "\ninit_cumulative_args (");
2016 fprintf (stderr, "fntype code = %s, ret code = %s",
2017 tree_code_name[(int) TREE_CODE (fntype)],
2018 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2020 fprintf (stderr, "no fntype");
2023 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2028 /* Set up the number of registers to use for passing arguments. */
2030 cum->nregs = ix86_function_regparm (fntype, fndecl);
2032 cum->nregs = ix86_regparm;
2034 cum->sse_nregs = SSE_REGPARM_MAX;
2036 cum->mmx_nregs = MMX_REGPARM_MAX;
2037 cum->warn_sse = true;
2038 cum->warn_mmx = true;
2039 cum->maybe_vaarg = false;
2041 /* Use ecx and edx registers if function has fastcall attribute */
2042 if (fntype && !TARGET_64BIT)
2044 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2051 /* Determine if this function has variable arguments. This is
2052 indicated by the last argument being 'void_type_mode' if there
2053 are no variable arguments. If there are variable arguments, then
2054 we won't pass anything in registers in 32-bit mode. */
2056 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2058 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2059 param != 0; param = next_param)
2061 next_param = TREE_CHAIN (param);
2062 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2073 cum->maybe_vaarg = true;
2077 if ((!fntype && !libname)
2078 || (fntype && !TYPE_ARG_TYPES (fntype)))
2079 cum->maybe_vaarg = true;
2081 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2082 in SSE registers even for 32-bit mode and not just 3, but up to
2083 8 SSE arguments in registers. */
2084 if (!TARGET_64BIT && !cum->maybe_vaarg && !cum->fastcall
2085 && cum->sse_nregs == SSE_REGPARM_MAX && fndecl
2086 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2088 struct cgraph_local_info *i = cgraph_local_info (fndecl);
2092 cum->float_in_sse = true;
2096 if (TARGET_DEBUG_ARG)
2097 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2102 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2103 But in the case of vector types, it is some vector mode.
2105 When we have only some of our vector isa extensions enabled, then there
2106 are some modes for which vector_mode_supported_p is false. For these
2107 modes, the generic vector support in gcc will choose some non-vector mode
2108 in order to implement the type. By computing the natural mode, we'll
2109 select the proper ABI location for the operand and not depend on whatever
2110 the middle-end decides to do with these vector types. */
2112 static enum machine_mode
2113 type_natural_mode (tree type)
2115 enum machine_mode mode = TYPE_MODE (type);
2117 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2119 HOST_WIDE_INT size = int_size_in_bytes (type);
2120 if ((size == 8 || size == 16)
2121 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2122 && TYPE_VECTOR_SUBPARTS (type) > 1)
2124 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2126 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2127 mode = MIN_MODE_VECTOR_FLOAT;
2129 mode = MIN_MODE_VECTOR_INT;
2131 /* Get the mode which has this inner mode and number of units. */
2132 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2133 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2134 && GET_MODE_INNER (mode) == innermode)
2144 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2145 this may not agree with the mode that the type system has chosen for the
2146 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2147 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2150 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2155 if (orig_mode != BLKmode)
2156 tmp = gen_rtx_REG (orig_mode, regno);
2159 tmp = gen_rtx_REG (mode, regno);
2160 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2161 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2167 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2168 of this code is to classify each 8bytes of incoming argument by the register
2169 class and assign registers accordingly. */
2171 /* Return the union class of CLASS1 and CLASS2.
2172 See the x86-64 PS ABI for details. */
2174 static enum x86_64_reg_class
2175 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2177 /* Rule #1: If both classes are equal, this is the resulting class. */
2178 if (class1 == class2)
2181 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2183 if (class1 == X86_64_NO_CLASS)
2185 if (class2 == X86_64_NO_CLASS)
2188 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2189 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2190 return X86_64_MEMORY_CLASS;
2192 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2193 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2194 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2195 return X86_64_INTEGERSI_CLASS;
2196 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2197 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2198 return X86_64_INTEGER_CLASS;
2200 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2202 if (class1 == X86_64_X87_CLASS
2203 || class1 == X86_64_X87UP_CLASS
2204 || class1 == X86_64_COMPLEX_X87_CLASS
2205 || class2 == X86_64_X87_CLASS
2206 || class2 == X86_64_X87UP_CLASS
2207 || class2 == X86_64_COMPLEX_X87_CLASS)
2208 return X86_64_MEMORY_CLASS;
2210 /* Rule #6: Otherwise class SSE is used. */
2211 return X86_64_SSE_CLASS;
2214 /* Classify the argument of type TYPE and mode MODE.
2215 CLASSES will be filled by the register class used to pass each word
2216 of the operand. The number of words is returned. In case the parameter
2217 should be passed in memory, 0 is returned. As a special case for zero
2218 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2220 BIT_OFFSET is used internally for handling records and specifies offset
2221 of the offset in bits modulo 256 to avoid overflow cases.
2223 See the x86-64 PS ABI for details.
2227 classify_argument (enum machine_mode mode, tree type,
2228 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2230 HOST_WIDE_INT bytes =
2231 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2232 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2234 /* Variable sized entities are always passed/returned in memory. */
2238 if (mode != VOIDmode
2239 && targetm.calls.must_pass_in_stack (mode, type))
2242 if (type && AGGREGATE_TYPE_P (type))
2246 enum x86_64_reg_class subclasses[MAX_CLASSES];
2248 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2252 for (i = 0; i < words; i++)
2253 classes[i] = X86_64_NO_CLASS;
2255 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2256 signalize memory class, so handle it as special case. */
2259 classes[0] = X86_64_NO_CLASS;
2263 /* Classify each field of record and merge classes. */
2264 switch (TREE_CODE (type))
2267 /* For classes first merge in the field of the subclasses. */
2268 if (TYPE_BINFO (type))
2270 tree binfo, base_binfo;
2273 for (binfo = TYPE_BINFO (type), basenum = 0;
2274 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2277 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2278 tree type = BINFO_TYPE (base_binfo);
2280 num = classify_argument (TYPE_MODE (type),
2282 (offset + bit_offset) % 256);
2285 for (i = 0; i < num; i++)
2287 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2289 merge_classes (subclasses[i], classes[i + pos]);
2293 /* And now merge the fields of structure. */
2294 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2296 if (TREE_CODE (field) == FIELD_DECL)
2300 /* Bitfields are always classified as integer. Handle them
2301 early, since later code would consider them to be
2302 misaligned integers. */
2303 if (DECL_BIT_FIELD (field))
2305 for (i = int_bit_position (field) / 8 / 8;
2306 i < (int_bit_position (field)
2307 + tree_low_cst (DECL_SIZE (field), 0)
2310 merge_classes (X86_64_INTEGER_CLASS,
2315 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2316 TREE_TYPE (field), subclasses,
2317 (int_bit_position (field)
2318 + bit_offset) % 256);
2321 for (i = 0; i < num; i++)
2324 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2326 merge_classes (subclasses[i], classes[i + pos]);
2334 /* Arrays are handled as small records. */
2337 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2338 TREE_TYPE (type), subclasses, bit_offset);
2342 /* The partial classes are now full classes. */
2343 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2344 subclasses[0] = X86_64_SSE_CLASS;
2345 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2346 subclasses[0] = X86_64_INTEGER_CLASS;
2348 for (i = 0; i < words; i++)
2349 classes[i] = subclasses[i % num];
2354 case QUAL_UNION_TYPE:
2355 /* Unions are similar to RECORD_TYPE but offset is always 0.
2358 /* Unions are not derived. */
2359 gcc_assert (!TYPE_BINFO (type)
2360 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2361 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2363 if (TREE_CODE (field) == FIELD_DECL)
2366 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2367 TREE_TYPE (field), subclasses,
2371 for (i = 0; i < num; i++)
2372 classes[i] = merge_classes (subclasses[i], classes[i]);
2381 /* Final merger cleanup. */
2382 for (i = 0; i < words; i++)
2384 /* If one class is MEMORY, everything should be passed in
2386 if (classes[i] == X86_64_MEMORY_CLASS)
2389 /* The X86_64_SSEUP_CLASS should be always preceded by
2390 X86_64_SSE_CLASS. */
2391 if (classes[i] == X86_64_SSEUP_CLASS
2392 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2393 classes[i] = X86_64_SSE_CLASS;
2395 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2396 if (classes[i] == X86_64_X87UP_CLASS
2397 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2398 classes[i] = X86_64_SSE_CLASS;
2403 /* Compute alignment needed. We align all types to natural boundaries with
2404 exception of XFmode that is aligned to 64bits. */
2405 if (mode != VOIDmode && mode != BLKmode)
2407 int mode_alignment = GET_MODE_BITSIZE (mode);
2410 mode_alignment = 128;
2411 else if (mode == XCmode)
2412 mode_alignment = 256;
2413 if (COMPLEX_MODE_P (mode))
2414 mode_alignment /= 2;
2415 /* Misaligned fields are always returned in memory. */
2416 if (bit_offset % mode_alignment)
2420 /* for V1xx modes, just use the base mode */
2421 if (VECTOR_MODE_P (mode)
2422 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2423 mode = GET_MODE_INNER (mode);
2425 /* Classification of atomic types. */
2435 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2436 classes[0] = X86_64_INTEGERSI_CLASS;
2438 classes[0] = X86_64_INTEGER_CLASS;
2442 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2447 if (!(bit_offset % 64))
2448 classes[0] = X86_64_SSESF_CLASS;
2450 classes[0] = X86_64_SSE_CLASS;
2453 classes[0] = X86_64_SSEDF_CLASS;
2456 classes[0] = X86_64_X87_CLASS;
2457 classes[1] = X86_64_X87UP_CLASS;
2460 classes[0] = X86_64_SSE_CLASS;
2461 classes[1] = X86_64_SSEUP_CLASS;
2464 classes[0] = X86_64_SSE_CLASS;
2467 classes[0] = X86_64_SSEDF_CLASS;
2468 classes[1] = X86_64_SSEDF_CLASS;
2471 classes[0] = X86_64_COMPLEX_X87_CLASS;
2474 /* This modes is larger than 16 bytes. */
2482 classes[0] = X86_64_SSE_CLASS;
2483 classes[1] = X86_64_SSEUP_CLASS;
2489 classes[0] = X86_64_SSE_CLASS;
2495 gcc_assert (VECTOR_MODE_P (mode));
2500 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2502 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2503 classes[0] = X86_64_INTEGERSI_CLASS;
2505 classes[0] = X86_64_INTEGER_CLASS;
2506 classes[1] = X86_64_INTEGER_CLASS;
2507 return 1 + (bytes > 8);
2511 /* Examine the argument and return set number of register required in each
2512 class. Return 0 iff parameter should be passed in memory. */
2514 examine_argument (enum machine_mode mode, tree type, int in_return,
2515 int *int_nregs, int *sse_nregs)
2517 enum x86_64_reg_class class[MAX_CLASSES];
2518 int n = classify_argument (mode, type, class, 0);
2524 for (n--; n >= 0; n--)
2527 case X86_64_INTEGER_CLASS:
2528 case X86_64_INTEGERSI_CLASS:
2531 case X86_64_SSE_CLASS:
2532 case X86_64_SSESF_CLASS:
2533 case X86_64_SSEDF_CLASS:
2536 case X86_64_NO_CLASS:
2537 case X86_64_SSEUP_CLASS:
2539 case X86_64_X87_CLASS:
2540 case X86_64_X87UP_CLASS:
2544 case X86_64_COMPLEX_X87_CLASS:
2545 return in_return ? 2 : 0;
2546 case X86_64_MEMORY_CLASS:
2552 /* Construct container for the argument used by GCC interface. See
2553 FUNCTION_ARG for the detailed description. */
2556 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2557 tree type, int in_return, int nintregs, int nsseregs,
2558 const int *intreg, int sse_regno)
2560 enum machine_mode tmpmode;
2562 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2563 enum x86_64_reg_class class[MAX_CLASSES];
2567 int needed_sseregs, needed_intregs;
2568 rtx exp[MAX_CLASSES];
2571 n = classify_argument (mode, type, class, 0);
2572 if (TARGET_DEBUG_ARG)
2575 fprintf (stderr, "Memory class\n");
2578 fprintf (stderr, "Classes:");
2579 for (i = 0; i < n; i++)
2581 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2583 fprintf (stderr, "\n");
2588 if (!examine_argument (mode, type, in_return, &needed_intregs,
2591 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2594 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2595 some less clueful developer tries to use floating-point anyway. */
2596 if (needed_sseregs && !TARGET_SSE)
2598 static bool issued_error;
2601 issued_error = true;
2603 error ("SSE register return with SSE disabled");
2605 error ("SSE register argument with SSE disabled");
2610 /* First construct simple cases. Avoid SCmode, since we want to use
2611 single register to pass this type. */
2612 if (n == 1 && mode != SCmode)
2615 case X86_64_INTEGER_CLASS:
2616 case X86_64_INTEGERSI_CLASS:
2617 return gen_rtx_REG (mode, intreg[0]);
2618 case X86_64_SSE_CLASS:
2619 case X86_64_SSESF_CLASS:
2620 case X86_64_SSEDF_CLASS:
2621 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2622 case X86_64_X87_CLASS:
2623 case X86_64_COMPLEX_X87_CLASS:
2624 return gen_rtx_REG (mode, FIRST_STACK_REG);
2625 case X86_64_NO_CLASS:
2626 /* Zero sized array, struct or class. */
2631 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2633 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2635 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2636 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2637 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2638 && class[1] == X86_64_INTEGER_CLASS
2639 && (mode == CDImode || mode == TImode || mode == TFmode)
2640 && intreg[0] + 1 == intreg[1])
2641 return gen_rtx_REG (mode, intreg[0]);
2643 /* Otherwise figure out the entries of the PARALLEL. */
2644 for (i = 0; i < n; i++)
2648 case X86_64_NO_CLASS:
2650 case X86_64_INTEGER_CLASS:
2651 case X86_64_INTEGERSI_CLASS:
2652 /* Merge TImodes on aligned occasions here too. */
2653 if (i * 8 + 8 > bytes)
2654 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2655 else if (class[i] == X86_64_INTEGERSI_CLASS)
2659 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2660 if (tmpmode == BLKmode)
2662 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2663 gen_rtx_REG (tmpmode, *intreg),
2667 case X86_64_SSESF_CLASS:
2668 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2669 gen_rtx_REG (SFmode,
2670 SSE_REGNO (sse_regno)),
2674 case X86_64_SSEDF_CLASS:
2675 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2676 gen_rtx_REG (DFmode,
2677 SSE_REGNO (sse_regno)),
2681 case X86_64_SSE_CLASS:
2682 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2686 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2687 gen_rtx_REG (tmpmode,
2688 SSE_REGNO (sse_regno)),
2690 if (tmpmode == TImode)
2699 /* Empty aligned struct, union or class. */
2703 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2704 for (i = 0; i < nexps; i++)
2705 XVECEXP (ret, 0, i) = exp [i];
2709 /* Update the data in CUM to advance over an argument
2710 of mode MODE and data type TYPE.
2711 (TYPE is null for libcalls where that information may not be available.) */
2714 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2715 tree type, int named)
2718 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2719 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2722 mode = type_natural_mode (type);
2724 if (TARGET_DEBUG_ARG)
2725 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2726 "mode=%s, named=%d)\n\n",
2727 words, cum->words, cum->nregs, cum->sse_nregs,
2728 GET_MODE_NAME (mode), named);
2732 int int_nregs, sse_nregs;
2733 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2734 cum->words += words;
2735 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2737 cum->nregs -= int_nregs;
2738 cum->sse_nregs -= sse_nregs;
2739 cum->regno += int_nregs;
2740 cum->sse_regno += sse_nregs;
2743 cum->words += words;
2761 cum->words += words;
2762 cum->nregs -= words;
2763 cum->regno += words;
2765 if (cum->nregs <= 0)
2776 if (!cum->float_in_sse)
2787 if (!type || !AGGREGATE_TYPE_P (type))
2789 cum->sse_words += words;
2790 cum->sse_nregs -= 1;
2791 cum->sse_regno += 1;
2792 if (cum->sse_nregs <= 0)
2804 if (!type || !AGGREGATE_TYPE_P (type))
2806 cum->mmx_words += words;
2807 cum->mmx_nregs -= 1;
2808 cum->mmx_regno += 1;
2809 if (cum->mmx_nregs <= 0)
2820 /* Define where to put the arguments to a function.
2821 Value is zero to push the argument on the stack,
2822 or a hard register in which to store the argument.
2824 MODE is the argument's machine mode.
2825 TYPE is the data type of the argument (as a tree).
2826 This is null for libcalls where that information may
2828 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2829 the preceding args and about the function being called.
2830 NAMED is nonzero if this argument is a named parameter
2831 (otherwise it is an extra parameter matching an ellipsis). */
2834 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2835 tree type, int named)
2837 enum machine_mode mode = orig_mode;
2840 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2841 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2842 static bool warnedsse, warnedmmx;
2844 /* To simplify the code below, represent vector types with a vector mode
2845 even if MMX/SSE are not active. */
2846 if (type && TREE_CODE (type) == VECTOR_TYPE)
2847 mode = type_natural_mode (type);
2849 /* Handle a hidden AL argument containing number of registers for varargs
2850 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2852 if (mode == VOIDmode)
2855 return GEN_INT (cum->maybe_vaarg
2856 ? (cum->sse_nregs < 0
2864 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2866 &x86_64_int_parameter_registers [cum->regno],
2871 /* For now, pass fp/complex values on the stack. */
2883 if (words <= cum->nregs)
2885 int regno = cum->regno;
2887 /* Fastcall allocates the first two DWORD (SImode) or
2888 smaller arguments to ECX and EDX. */
2891 if (mode == BLKmode || mode == DImode)
2894 /* ECX not EAX is the first allocated register. */
2898 ret = gen_rtx_REG (mode, regno);
2905 if (!cum->float_in_sse)
2915 if (!type || !AGGREGATE_TYPE_P (type))
2917 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2920 warning (0, "SSE vector argument without SSE enabled "
2924 ret = gen_reg_or_parallel (mode, orig_mode,
2925 cum->sse_regno + FIRST_SSE_REG);
2932 if (!type || !AGGREGATE_TYPE_P (type))
2934 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2937 warning (0, "MMX vector argument without MMX enabled "
2941 ret = gen_reg_or_parallel (mode, orig_mode,
2942 cum->mmx_regno + FIRST_MMX_REG);
2947 if (TARGET_DEBUG_ARG)
2950 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2951 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2954 print_simple_rtl (stderr, ret);
2956 fprintf (stderr, ", stack");
2958 fprintf (stderr, " )\n");
2964 /* A C expression that indicates when an argument must be passed by
2965 reference. If nonzero for an argument, a copy of that argument is
2966 made in memory and a pointer to the argument is passed instead of
2967 the argument itself. The pointer is passed in whatever way is
2968 appropriate for passing a pointer to that type. */
2971 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2972 enum machine_mode mode ATTRIBUTE_UNUSED,
2973 tree type, bool named ATTRIBUTE_UNUSED)
2978 if (type && int_size_in_bytes (type) == -1)
2980 if (TARGET_DEBUG_ARG)
2981 fprintf (stderr, "function_arg_pass_by_reference\n");
2988 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2989 ABI. Only called if TARGET_SSE. */
2991 contains_128bit_aligned_vector_p (tree type)
2993 enum machine_mode mode = TYPE_MODE (type);
2994 if (SSE_REG_MODE_P (mode)
2995 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2997 if (TYPE_ALIGN (type) < 128)
3000 if (AGGREGATE_TYPE_P (type))
3002 /* Walk the aggregates recursively. */
3003 switch (TREE_CODE (type))
3007 case QUAL_UNION_TYPE:
3011 if (TYPE_BINFO (type))
3013 tree binfo, base_binfo;
3016 for (binfo = TYPE_BINFO (type), i = 0;
3017 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3018 if (contains_128bit_aligned_vector_p
3019 (BINFO_TYPE (base_binfo)))
3022 /* And now merge the fields of structure. */
3023 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3025 if (TREE_CODE (field) == FIELD_DECL
3026 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3033 /* Just for use if some languages passes arrays by value. */
3034 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3044 /* Gives the alignment boundary, in bits, of an argument with the
3045 specified mode and type. */
3048 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3052 align = TYPE_ALIGN (type);
3054 align = GET_MODE_ALIGNMENT (mode);
3055 if (align < PARM_BOUNDARY)
3056 align = PARM_BOUNDARY;
3059 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3060 make an exception for SSE modes since these require 128bit
3063 The handling here differs from field_alignment. ICC aligns MMX
3064 arguments to 4 byte boundaries, while structure fields are aligned
3065 to 8 byte boundaries. */
3067 align = PARM_BOUNDARY;
3070 if (!SSE_REG_MODE_P (mode))
3071 align = PARM_BOUNDARY;
3075 if (!contains_128bit_aligned_vector_p (type))
3076 align = PARM_BOUNDARY;
3084 /* Return true if N is a possible register number of function value. */
3086 ix86_function_value_regno_p (int regno)
3090 return ((regno) == 0
3091 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3092 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3094 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3095 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3096 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3099 /* Define how to find the value returned by a function.
3100 VALTYPE is the data type of the value (as a tree).
3101 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3102 otherwise, FUNC is 0. */
3104 ix86_function_value (tree valtype, tree func)
3106 enum machine_mode natmode = type_natural_mode (valtype);
3110 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3111 1, REGPARM_MAX, SSE_REGPARM_MAX,
3112 x86_64_int_return_registers, 0);
3113 /* For zero sized structures, construct_container return NULL, but we
3114 need to keep rest of compiler happy by returning meaningful value. */
3116 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3120 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
3123 /* Return false iff type is returned in memory. */
3125 ix86_return_in_memory (tree type)
3127 int needed_intregs, needed_sseregs, size;
3128 enum machine_mode mode = type_natural_mode (type);
3131 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3133 if (mode == BLKmode)
3136 size = int_size_in_bytes (type);
3138 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3141 if (VECTOR_MODE_P (mode) || mode == TImode)
3143 /* User-created vectors small enough to fit in EAX. */
3147 /* MMX/3dNow values are returned on the stack, since we've
3148 got to EMMS/FEMMS before returning. */
3152 /* SSE values are returned in XMM0, except when it doesn't exist. */
3154 return (TARGET_SSE ? 0 : 1);
3165 /* When returning SSE vector types, we have a choice of either
3166 (1) being abi incompatible with a -march switch, or
3167 (2) generating an error.
3168 Given no good solution, I think the safest thing is one warning.
3169 The user won't be able to use -Werror, but....
3171 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3172 called in response to actually generating a caller or callee that
3173 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3174 via aggregate_value_p for general type probing from tree-ssa. */
3177 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3181 if (!TARGET_SSE && type && !warned)
3183 /* Look at the return type of the function, not the function type. */
3184 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3187 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3190 warning (0, "SSE vector return without SSE enabled changes the ABI");
3197 /* Define how to find the value returned by a library function
3198 assuming the value has mode MODE. */
3200 ix86_libcall_value (enum machine_mode mode)
3211 return gen_rtx_REG (mode, FIRST_SSE_REG);
3214 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3218 return gen_rtx_REG (mode, 0);
3222 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
3225 /* Given a mode, return the register to use for a return value. */
3228 ix86_value_regno (enum machine_mode mode, tree func)
3230 gcc_assert (!TARGET_64BIT);
3232 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3233 we prevent this case when sse is not available. */
3234 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3235 return FIRST_SSE_REG;
3237 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3238 if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3241 /* Floating point return values in %st(0), except for local functions when
3242 SSE math is enabled. */
3243 if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
3244 && flag_unit_at_a_time)
3246 struct cgraph_local_info *i = cgraph_local_info (func);
3248 return FIRST_SSE_REG;
3251 return FIRST_FLOAT_REG;
3254 /* Create the va_list data type. */
3257 ix86_build_builtin_va_list (void)
3259 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3261 /* For i386 we use plain pointer to argument area. */
3263 return build_pointer_type (char_type_node);
3265 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3266 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3268 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3269 unsigned_type_node);
3270 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3271 unsigned_type_node);
3272 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3274 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3277 va_list_gpr_counter_field = f_gpr;
3278 va_list_fpr_counter_field = f_fpr;
3280 DECL_FIELD_CONTEXT (f_gpr) = record;
3281 DECL_FIELD_CONTEXT (f_fpr) = record;
3282 DECL_FIELD_CONTEXT (f_ovf) = record;
3283 DECL_FIELD_CONTEXT (f_sav) = record;
3285 TREE_CHAIN (record) = type_decl;
3286 TYPE_NAME (record) = type_decl;
3287 TYPE_FIELDS (record) = f_gpr;
3288 TREE_CHAIN (f_gpr) = f_fpr;
3289 TREE_CHAIN (f_fpr) = f_ovf;
3290 TREE_CHAIN (f_ovf) = f_sav;
3292 layout_type (record);
3294 /* The correct type is an array type of one element. */
3295 return build_array_type (record, build_index_type (size_zero_node));
3298 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3301 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3302 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3305 CUMULATIVE_ARGS next_cum;
3306 rtx save_area = NULL_RTX, mem;
3319 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3322 /* Indicate to allocate space on the stack for varargs save area. */
3323 ix86_save_varrargs_registers = 1;
3325 cfun->stack_alignment_needed = 128;
3327 fntype = TREE_TYPE (current_function_decl);
3328 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3329 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3330 != void_type_node));
3332 /* For varargs, we do not want to skip the dummy va_dcl argument.
3333 For stdargs, we do want to skip the last named argument. */
3336 function_arg_advance (&next_cum, mode, type, 1);
3339 save_area = frame_pointer_rtx;
3341 set = get_varargs_alias_set ();
3343 for (i = next_cum.regno;
3345 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3348 mem = gen_rtx_MEM (Pmode,
3349 plus_constant (save_area, i * UNITS_PER_WORD));
3350 set_mem_alias_set (mem, set);
3351 emit_move_insn (mem, gen_rtx_REG (Pmode,
3352 x86_64_int_parameter_registers[i]));
3355 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3357 /* Now emit code to save SSE registers. The AX parameter contains number
3358 of SSE parameter registers used to call this function. We use
3359 sse_prologue_save insn template that produces computed jump across
3360 SSE saves. We need some preparation work to get this working. */
3362 label = gen_label_rtx ();
3363 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3365 /* Compute address to jump to :
3366 label - 5*eax + nnamed_sse_arguments*5 */
3367 tmp_reg = gen_reg_rtx (Pmode);
3368 nsse_reg = gen_reg_rtx (Pmode);
3369 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3370 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3371 gen_rtx_MULT (Pmode, nsse_reg,
3373 if (next_cum.sse_regno)
3376 gen_rtx_CONST (DImode,
3377 gen_rtx_PLUS (DImode,
3379 GEN_INT (next_cum.sse_regno * 4))));
3381 emit_move_insn (nsse_reg, label_ref);
3382 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3384 /* Compute address of memory block we save into. We always use pointer
3385 pointing 127 bytes after first byte to store - this is needed to keep
3386 instruction size limited by 4 bytes. */
3387 tmp_reg = gen_reg_rtx (Pmode);
3388 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3389 plus_constant (save_area,
3390 8 * REGPARM_MAX + 127)));
3391 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3392 set_mem_alias_set (mem, set);
3393 set_mem_align (mem, BITS_PER_WORD);
3395 /* And finally do the dirty job! */
3396 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3397 GEN_INT (next_cum.sse_regno), label));
3402 /* Implement va_start. */
3405 ix86_va_start (tree valist, rtx nextarg)
3407 HOST_WIDE_INT words, n_gpr, n_fpr;
3408 tree f_gpr, f_fpr, f_ovf, f_sav;
3409 tree gpr, fpr, ovf, sav, t;
3411 /* Only 64bit target needs something special. */
3414 std_expand_builtin_va_start (valist, nextarg);
3418 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3419 f_fpr = TREE_CHAIN (f_gpr);
3420 f_ovf = TREE_CHAIN (f_fpr);
3421 f_sav = TREE_CHAIN (f_ovf);
3423 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3424 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3425 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3426 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3427 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3429 /* Count number of gp and fp argument registers used. */
3430 words = current_function_args_info.words;
3431 n_gpr = current_function_args_info.regno;
3432 n_fpr = current_function_args_info.sse_regno;
3434 if (TARGET_DEBUG_ARG)
3435 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3436 (int) words, (int) n_gpr, (int) n_fpr);
3438 if (cfun->va_list_gpr_size)
3440 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3441 build_int_cst (NULL_TREE, n_gpr * 8));
3442 TREE_SIDE_EFFECTS (t) = 1;
3443 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3446 if (cfun->va_list_fpr_size)
3448 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3449 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3450 TREE_SIDE_EFFECTS (t) = 1;
3451 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3454 /* Find the overflow area. */
3455 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3457 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3458 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3459 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3460 TREE_SIDE_EFFECTS (t) = 1;
3461 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3463 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3465 /* Find the register save area.
3466 Prologue of the function save it right above stack frame. */
3467 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3468 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3469 TREE_SIDE_EFFECTS (t) = 1;
3470 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3474 /* Implement va_arg. */
3477 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3479 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3480 tree f_gpr, f_fpr, f_ovf, f_sav;
3481 tree gpr, fpr, ovf, sav, t;
3483 tree lab_false, lab_over = NULL_TREE;
3488 enum machine_mode nat_mode;
3490 /* Only 64bit target needs something special. */
3492 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3494 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3495 f_fpr = TREE_CHAIN (f_gpr);
3496 f_ovf = TREE_CHAIN (f_fpr);
3497 f_sav = TREE_CHAIN (f_ovf);
3499 valist = build_va_arg_indirect_ref (valist);
3500 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3501 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3502 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3503 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3505 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3507 type = build_pointer_type (type);
3508 size = int_size_in_bytes (type);
3509 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3511 nat_mode = type_natural_mode (type);
3512 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3513 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3515 /* Pull the value out of the saved registers. */
3517 addr = create_tmp_var (ptr_type_node, "addr");
3518 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3522 int needed_intregs, needed_sseregs;
3524 tree int_addr, sse_addr;
3526 lab_false = create_artificial_label ();
3527 lab_over = create_artificial_label ();
3529 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3531 need_temp = (!REG_P (container)
3532 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3533 || TYPE_ALIGN (type) > 128));
3535 /* In case we are passing structure, verify that it is consecutive block
3536 on the register save area. If not we need to do moves. */
3537 if (!need_temp && !REG_P (container))
3539 /* Verify that all registers are strictly consecutive */
3540 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3544 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3546 rtx slot = XVECEXP (container, 0, i);
3547 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3548 || INTVAL (XEXP (slot, 1)) != i * 16)
3556 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3558 rtx slot = XVECEXP (container, 0, i);
3559 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3560 || INTVAL (XEXP (slot, 1)) != i * 8)
3572 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3573 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3574 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3575 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3578 /* First ensure that we fit completely in registers. */
3581 t = build_int_cst (TREE_TYPE (gpr),
3582 (REGPARM_MAX - needed_intregs + 1) * 8);
3583 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3584 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3585 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3586 gimplify_and_add (t, pre_p);
3590 t = build_int_cst (TREE_TYPE (fpr),
3591 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3593 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3594 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3595 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3596 gimplify_and_add (t, pre_p);
3599 /* Compute index to start of area used for integer regs. */
3602 /* int_addr = gpr + sav; */
3603 t = fold_convert (ptr_type_node, gpr);
3604 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3605 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3606 gimplify_and_add (t, pre_p);
3610 /* sse_addr = fpr + sav; */
3611 t = fold_convert (ptr_type_node, fpr);
3612 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3613 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3614 gimplify_and_add (t, pre_p);
3619 tree temp = create_tmp_var (type, "va_arg_tmp");
3622 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3623 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3624 gimplify_and_add (t, pre_p);
3626 for (i = 0; i < XVECLEN (container, 0); i++)
3628 rtx slot = XVECEXP (container, 0, i);
3629 rtx reg = XEXP (slot, 0);
3630 enum machine_mode mode = GET_MODE (reg);
3631 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3632 tree addr_type = build_pointer_type (piece_type);
3635 tree dest_addr, dest;
3637 if (SSE_REGNO_P (REGNO (reg)))
3639 src_addr = sse_addr;
3640 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3644 src_addr = int_addr;
3645 src_offset = REGNO (reg) * 8;
3647 src_addr = fold_convert (addr_type, src_addr);
3648 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3649 size_int (src_offset)));
3650 src = build_va_arg_indirect_ref (src_addr);
3652 dest_addr = fold_convert (addr_type, addr);
3653 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3654 size_int (INTVAL (XEXP (slot, 1)))));
3655 dest = build_va_arg_indirect_ref (dest_addr);
3657 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3658 gimplify_and_add (t, pre_p);
3664 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3665 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3666 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3667 gimplify_and_add (t, pre_p);
3671 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3672 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3673 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3674 gimplify_and_add (t, pre_p);
3677 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3678 gimplify_and_add (t, pre_p);
3680 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3681 append_to_statement_list (t, pre_p);
3684 /* ... otherwise out of the overflow area. */
3686 /* Care for on-stack alignment if needed. */
3687 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3691 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3692 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3693 build_int_cst (TREE_TYPE (ovf), align - 1));
3694 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3695 build_int_cst (TREE_TYPE (t), -align));
3697 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3699 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3700 gimplify_and_add (t2, pre_p);
3702 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3703 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3704 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3705 gimplify_and_add (t, pre_p);
3709 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3710 append_to_statement_list (t, pre_p);
3713 ptrtype = build_pointer_type (type);
3714 addr = fold_convert (ptrtype, addr);
3717 addr = build_va_arg_indirect_ref (addr);
3718 return build_va_arg_indirect_ref (addr);
3721 /* Return nonzero if OPNUM's MEM should be matched
3722 in movabs* patterns. */
3725 ix86_check_movabs (rtx insn, int opnum)
3729 set = PATTERN (insn);
3730 if (GET_CODE (set) == PARALLEL)
3731 set = XVECEXP (set, 0, 0);
3732 gcc_assert (GET_CODE (set) == SET);
3733 mem = XEXP (set, opnum);
3734 while (GET_CODE (mem) == SUBREG)
3735 mem = SUBREG_REG (mem);
3736 gcc_assert (GET_CODE (mem) == MEM);
3737 return (volatile_ok || !MEM_VOLATILE_P (mem));
3740 /* Initialize the table of extra 80387 mathematical constants. */
3743 init_ext_80387_constants (void)
3745 static const char * cst[5] =
3747 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3748 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3749 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3750 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3751 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3755 for (i = 0; i < 5; i++)
3757 real_from_string (&ext_80387_constants_table[i], cst[i]);
3758 /* Ensure each constant is rounded to XFmode precision. */
3759 real_convert (&ext_80387_constants_table[i],
3760 XFmode, &ext_80387_constants_table[i]);
3763 ext_80387_constants_init = 1;
3766 /* Return true if the constant is something that can be loaded with
3767 a special instruction. */
3770 standard_80387_constant_p (rtx x)
3772 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3775 if (x == CONST0_RTX (GET_MODE (x)))
3777 if (x == CONST1_RTX (GET_MODE (x)))
3780 /* For XFmode constants, try to find a special 80387 instruction when
3781 optimizing for size or on those CPUs that benefit from them. */
3782 if (GET_MODE (x) == XFmode
3783 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3788 if (! ext_80387_constants_init)
3789 init_ext_80387_constants ();
3791 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3792 for (i = 0; i < 5; i++)
3793 if (real_identical (&r, &ext_80387_constants_table[i]))
3800 /* Return the opcode of the special instruction to be used to load
3804 standard_80387_constant_opcode (rtx x)
3806 switch (standard_80387_constant_p (x))
3827 /* Return the CONST_DOUBLE representing the 80387 constant that is
3828 loaded by the specified special instruction. The argument IDX
3829 matches the return value from standard_80387_constant_p. */
3832 standard_80387_constant_rtx (int idx)
3836 if (! ext_80387_constants_init)
3837 init_ext_80387_constants ();
3853 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3857 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3860 standard_sse_constant_p (rtx x)
3862 if (x == const0_rtx)
3864 return (x == CONST0_RTX (GET_MODE (x)));
3867 /* Returns 1 if OP contains a symbol reference */
3870 symbolic_reference_mentioned_p (rtx op)
3875 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3878 fmt = GET_RTX_FORMAT (GET_CODE (op));
3879 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3885 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3886 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3890 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3897 /* Return 1 if it is appropriate to emit `ret' instructions in the
3898 body of a function. Do this only if the epilogue is simple, needing a
3899 couple of insns. Prior to reloading, we can't tell how many registers
3900 must be saved, so return 0 then. Return 0 if there is no frame
3901 marker to de-allocate. */
3904 ix86_can_use_return_insn_p (void)
3906 struct ix86_frame frame;
3908 if (! reload_completed || frame_pointer_needed)
3911 /* Don't allow more than 32 pop, since that's all we can do
3912 with one instruction. */
3913 if (current_function_pops_args
3914 && current_function_args_size >= 32768)
3917 ix86_compute_frame_layout (&frame);
3918 return frame.to_allocate == 0 && frame.nregs == 0;
3921 /* Value should be nonzero if functions must have frame pointers.
3922 Zero means the frame pointer need not be set up (and parms may
3923 be accessed via the stack pointer) in functions that seem suitable. */
3926 ix86_frame_pointer_required (void)
3928 /* If we accessed previous frames, then the generated code expects
3929 to be able to access the saved ebp value in our frame. */
3930 if (cfun->machine->accesses_prev_frame)
3933 /* Several x86 os'es need a frame pointer for other reasons,
3934 usually pertaining to setjmp. */
3935 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3938 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3939 the frame pointer by default. Turn it back on now if we've not
3940 got a leaf function. */
3941 if (TARGET_OMIT_LEAF_FRAME_POINTER
3942 && (!current_function_is_leaf))
3945 if (current_function_profile)
3951 /* Record that the current function accesses previous call frames. */
3954 ix86_setup_frame_addresses (void)
3956 cfun->machine->accesses_prev_frame = 1;
3959 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3960 # define USE_HIDDEN_LINKONCE 1
3962 # define USE_HIDDEN_LINKONCE 0
3965 static int pic_labels_used;
3967 /* Fills in the label name that should be used for a pc thunk for
3968 the given register. */
3971 get_pc_thunk_name (char name[32], unsigned int regno)
3973 if (USE_HIDDEN_LINKONCE)
3974 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3976 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3980 /* This function generates code for -fpic that loads %ebx with
3981 the return address of the caller and then returns. */
3984 ix86_file_end (void)
3989 for (regno = 0; regno < 8; ++regno)
3993 if (! ((pic_labels_used >> regno) & 1))
3996 get_pc_thunk_name (name, regno);
3998 if (USE_HIDDEN_LINKONCE)
4002 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4004 TREE_PUBLIC (decl) = 1;
4005 TREE_STATIC (decl) = 1;
4006 DECL_ONE_ONLY (decl) = 1;
4008 (*targetm.asm_out.unique_section) (decl, 0);
4009 named_section (decl, NULL, 0);
4011 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4012 fputs ("\t.hidden\t", asm_out_file);
4013 assemble_name (asm_out_file, name);
4014 fputc ('\n', asm_out_file);
4015 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4020 ASM_OUTPUT_LABEL (asm_out_file, name);
4023 xops[0] = gen_rtx_REG (SImode, regno);
4024 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4025 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4026 output_asm_insn ("ret", xops);
4029 if (NEED_INDICATE_EXEC_STACK)
4030 file_end_indicate_exec_stack ();
4033 /* Emit code for the SET_GOT patterns. */
4036 output_set_got (rtx dest)
4041 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4043 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4045 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4048 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4050 output_asm_insn ("call\t%a2", xops);
4053 /* Output the "canonical" label name ("Lxx$pb") here too. This
4054 is what will be referred to by the Mach-O PIC subsystem. */
4055 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4057 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4058 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4061 output_asm_insn ("pop{l}\t%0", xops);
4066 get_pc_thunk_name (name, REGNO (dest));
4067 pic_labels_used |= 1 << REGNO (dest);
4069 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4070 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4071 output_asm_insn ("call\t%X2", xops);
4074 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4075 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4076 else if (!TARGET_MACHO)
4077 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4082 /* Generate an "push" pattern for input ARG. */
4087 return gen_rtx_SET (VOIDmode,
4089 gen_rtx_PRE_DEC (Pmode,
4090 stack_pointer_rtx)),
4094 /* Return >= 0 if there is an unused call-clobbered register available
4095 for the entire function. */
4098 ix86_select_alt_pic_regnum (void)
4100 if (current_function_is_leaf && !current_function_profile)
4103 for (i = 2; i >= 0; --i)
4104 if (!regs_ever_live[i])
4108 return INVALID_REGNUM;
4111 /* Return 1 if we need to save REGNO. */
4113 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4115 if (pic_offset_table_rtx
4116 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4117 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4118 || current_function_profile
4119 || current_function_calls_eh_return
4120 || current_function_uses_const_pool))
4122 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4127 if (current_function_calls_eh_return && maybe_eh_return)
4132 unsigned test = EH_RETURN_DATA_REGNO (i);
4133 if (test == INVALID_REGNUM)
4140 return (regs_ever_live[regno]
4141 && !call_used_regs[regno]
4142 && !fixed_regs[regno]
4143 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4146 /* Return number of registers to be saved on the stack. */
4149 ix86_nsaved_regs (void)
4154 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4155 if (ix86_save_reg (regno, true))
4160 /* Return the offset between two registers, one to be eliminated, and the other
4161 its replacement, at the start of a routine. */
4164 ix86_initial_elimination_offset (int from, int to)
4166 struct ix86_frame frame;
4167 ix86_compute_frame_layout (&frame);
4169 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4170 return frame.hard_frame_pointer_offset;
4171 else if (from == FRAME_POINTER_REGNUM
4172 && to == HARD_FRAME_POINTER_REGNUM)
4173 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4176 gcc_assert (to == STACK_POINTER_REGNUM);
4178 if (from == ARG_POINTER_REGNUM)
4179 return frame.stack_pointer_offset;
4181 gcc_assert (from == FRAME_POINTER_REGNUM);
4182 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4186 /* Fill structure ix86_frame about frame of currently computed function. */
4189 ix86_compute_frame_layout (struct ix86_frame *frame)
4191 HOST_WIDE_INT total_size;
4192 unsigned int stack_alignment_needed;
4193 HOST_WIDE_INT offset;
4194 unsigned int preferred_alignment;
4195 HOST_WIDE_INT size = get_frame_size ();
4197 frame->nregs = ix86_nsaved_regs ();
4200 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4201 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4203 /* During reload iteration the amount of registers saved can change.
4204 Recompute the value as needed. Do not recompute when amount of registers
4205 didn't change as reload does multiple calls to the function and does not
4206 expect the decision to change within single iteration. */
4208 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4210 int count = frame->nregs;
4212 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4213 /* The fast prologue uses move instead of push to save registers. This
4214 is significantly longer, but also executes faster as modern hardware
4215 can execute the moves in parallel, but can't do that for push/pop.
4217 Be careful about choosing what prologue to emit: When function takes
4218 many instructions to execute we may use slow version as well as in
4219 case function is known to be outside hot spot (this is known with
4220 feedback only). Weight the size of function by number of registers
4221 to save as it is cheap to use one or two push instructions but very
4222 slow to use many of them. */
4224 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4225 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4226 || (flag_branch_probabilities
4227 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4228 cfun->machine->use_fast_prologue_epilogue = false;
4230 cfun->machine->use_fast_prologue_epilogue
4231 = !expensive_function_p (count);
4233 if (TARGET_PROLOGUE_USING_MOVE
4234 && cfun->machine->use_fast_prologue_epilogue)
4235 frame->save_regs_using_mov = true;
4237 frame->save_regs_using_mov = false;
4240 /* Skip return address and saved base pointer. */
4241 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4243 frame->hard_frame_pointer_offset = offset;
4245 /* Do some sanity checking of stack_alignment_needed and
4246 preferred_alignment, since i386 port is the only using those features
4247 that may break easily. */
4249 gcc_assert (!size || stack_alignment_needed);
4250 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4251 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4252 gcc_assert (stack_alignment_needed
4253 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4255 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4256 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4258 /* Register save area */
4259 offset += frame->nregs * UNITS_PER_WORD;
4262 if (ix86_save_varrargs_registers)
4264 offset += X86_64_VARARGS_SIZE;
4265 frame->va_arg_size = X86_64_VARARGS_SIZE;
4268 frame->va_arg_size = 0;
4270 /* Align start of frame for local function. */
4271 frame->padding1 = ((offset + stack_alignment_needed - 1)
4272 & -stack_alignment_needed) - offset;
4274 offset += frame->padding1;
4276 /* Frame pointer points here. */
4277 frame->frame_pointer_offset = offset;
4281 /* Add outgoing arguments area. Can be skipped if we eliminated
4282 all the function calls as dead code.
4283 Skipping is however impossible when function calls alloca. Alloca
4284 expander assumes that last current_function_outgoing_args_size
4285 of stack frame are unused. */
4286 if (ACCUMULATE_OUTGOING_ARGS
4287 && (!current_function_is_leaf || current_function_calls_alloca))
4289 offset += current_function_outgoing_args_size;
4290 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4293 frame->outgoing_arguments_size = 0;
4295 /* Align stack boundary. Only needed if we're calling another function
4297 if (!current_function_is_leaf || current_function_calls_alloca)
4298 frame->padding2 = ((offset + preferred_alignment - 1)
4299 & -preferred_alignment) - offset;
4301 frame->padding2 = 0;
4303 offset += frame->padding2;
4305 /* We've reached end of stack frame. */
4306 frame->stack_pointer_offset = offset;
4308 /* Size prologue needs to allocate. */
4309 frame->to_allocate =
4310 (size + frame->padding1 + frame->padding2
4311 + frame->outgoing_arguments_size + frame->va_arg_size);
4313 if ((!frame->to_allocate && frame->nregs <= 1)
4314 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4315 frame->save_regs_using_mov = false;
4317 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4318 && current_function_is_leaf)
4320 frame->red_zone_size = frame->to_allocate;
4321 if (frame->save_regs_using_mov)
4322 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4323 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4324 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4327 frame->red_zone_size = 0;
4328 frame->to_allocate -= frame->red_zone_size;
4329 frame->stack_pointer_offset -= frame->red_zone_size;
4331 fprintf (stderr, "nregs: %i\n", frame->nregs);
4332 fprintf (stderr, "size: %i\n", size);
4333 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4334 fprintf (stderr, "padding1: %i\n", frame->padding1);
4335 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4336 fprintf (stderr, "padding2: %i\n", frame->padding2);
4337 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4338 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4339 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4340 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4341 frame->hard_frame_pointer_offset);
4342 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4346 /* Emit code to save registers in the prologue. */
4349 ix86_emit_save_regs (void)
4354 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4355 if (ix86_save_reg (regno, true))
4357 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4358 RTX_FRAME_RELATED_P (insn) = 1;
4362 /* Emit code to save registers using MOV insns. First register
4363 is restored from POINTER + OFFSET. */
4365 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4370 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4371 if (ix86_save_reg (regno, true))
4373 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4375 gen_rtx_REG (Pmode, regno));
4376 RTX_FRAME_RELATED_P (insn) = 1;
4377 offset += UNITS_PER_WORD;
4381 /* Expand prologue or epilogue stack adjustment.
4382 The pattern exist to put a dependency on all ebp-based memory accesses.
4383 STYLE should be negative if instructions should be marked as frame related,
4384 zero if %r11 register is live and cannot be freely used and positive
4388 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4393 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4394 else if (x86_64_immediate_operand (offset, DImode))
4395 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4399 /* r11 is used by indirect sibcall return as well, set before the
4400 epilogue and used after the epilogue. ATM indirect sibcall
4401 shouldn't be used together with huge frame sizes in one
4402 function because of the frame_size check in sibcall.c. */
4404 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4405 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4407 RTX_FRAME_RELATED_P (insn) = 1;
4408 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4412 RTX_FRAME_RELATED_P (insn) = 1;
4415 /* Expand the prologue into a bunch of separate insns. */
4418 ix86_expand_prologue (void)
4422 struct ix86_frame frame;
4423 HOST_WIDE_INT allocate;
4425 ix86_compute_frame_layout (&frame);
4427 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4428 slower on all targets. Also sdb doesn't like it. */
4430 if (frame_pointer_needed)
4432 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4433 RTX_FRAME_RELATED_P (insn) = 1;
4435 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4436 RTX_FRAME_RELATED_P (insn) = 1;
4439 allocate = frame.to_allocate;
4441 if (!frame.save_regs_using_mov)
4442 ix86_emit_save_regs ();
4444 allocate += frame.nregs * UNITS_PER_WORD;
4446 /* When using red zone we may start register saving before allocating
4447 the stack frame saving one cycle of the prologue. */
4448 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4449 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4450 : stack_pointer_rtx,
4451 -frame.nregs * UNITS_PER_WORD);
4455 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4456 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4457 GEN_INT (-allocate), -1);
4460 /* Only valid for Win32. */
4461 rtx eax = gen_rtx_REG (SImode, 0);
4462 bool eax_live = ix86_eax_live_at_start_p ();
4465 gcc_assert (!TARGET_64BIT);
4469 emit_insn (gen_push (eax));
4473 emit_move_insn (eax, GEN_INT (allocate));
4475 insn = emit_insn (gen_allocate_stack_worker (eax));
4476 RTX_FRAME_RELATED_P (insn) = 1;
4477 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4478 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4479 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4480 t, REG_NOTES (insn));
4484 if (frame_pointer_needed)
4485 t = plus_constant (hard_frame_pointer_rtx,
4488 - frame.nregs * UNITS_PER_WORD);
4490 t = plus_constant (stack_pointer_rtx, allocate);
4491 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4495 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4497 if (!frame_pointer_needed || !frame.to_allocate)
4498 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4500 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4501 -frame.nregs * UNITS_PER_WORD);
4504 pic_reg_used = false;
4505 if (pic_offset_table_rtx
4506 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4507 || current_function_profile))
4509 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4511 if (alt_pic_reg_used != INVALID_REGNUM)
4512 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4514 pic_reg_used = true;
4519 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4521 /* Even with accurate pre-reload life analysis, we can wind up
4522 deleting all references to the pic register after reload.
4523 Consider if cross-jumping unifies two sides of a branch
4524 controlled by a comparison vs the only read from a global.
4525 In which case, allow the set_got to be deleted, though we're
4526 too late to do anything about the ebx save in the prologue. */
4527 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4530 /* Prevent function calls from be scheduled before the call to mcount.
4531 In the pic_reg_used case, make sure that the got load isn't deleted. */
4532 if (current_function_profile)
4533 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4536 /* Emit code to restore saved registers using MOV insns. First register
4537 is restored from POINTER + OFFSET. */
4539 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4540 int maybe_eh_return)
4543 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4545 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4546 if (ix86_save_reg (regno, maybe_eh_return))
4548 /* Ensure that adjust_address won't be forced to produce pointer
4549 out of range allowed by x86-64 instruction set. */
4550 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4554 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4555 emit_move_insn (r11, GEN_INT (offset));
4556 emit_insn (gen_adddi3 (r11, r11, pointer));
4557 base_address = gen_rtx_MEM (Pmode, r11);
4560 emit_move_insn (gen_rtx_REG (Pmode, regno),
4561 adjust_address (base_address, Pmode, offset));
4562 offset += UNITS_PER_WORD;
4566 /* Restore function stack, frame, and registers. */
4569 ix86_expand_epilogue (int style)
4572 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4573 struct ix86_frame frame;
4574 HOST_WIDE_INT offset;
4576 ix86_compute_frame_layout (&frame);
4578 /* Calculate start of saved registers relative to ebp. Special care
4579 must be taken for the normal return case of a function using
4580 eh_return: the eax and edx registers are marked as saved, but not
4581 restored along this path. */
4582 offset = frame.nregs;
4583 if (current_function_calls_eh_return && style != 2)
4585 offset *= -UNITS_PER_WORD;
4587 /* If we're only restoring one register and sp is not valid then
4588 using a move instruction to restore the register since it's
4589 less work than reloading sp and popping the register.
4591 The default code result in stack adjustment using add/lea instruction,
4592 while this code results in LEAVE instruction (or discrete equivalent),
4593 so it is profitable in some other cases as well. Especially when there
4594 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4595 and there is exactly one register to pop. This heuristic may need some
4596 tuning in future. */
4597 if ((!sp_valid && frame.nregs <= 1)
4598 || (TARGET_EPILOGUE_USING_MOVE
4599 && cfun->machine->use_fast_prologue_epilogue
4600 && (frame.nregs > 1 || frame.to_allocate))
4601 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4602 || (frame_pointer_needed && TARGET_USE_LEAVE
4603 && cfun->machine->use_fast_prologue_epilogue
4604 && frame.nregs == 1)
4605 || current_function_calls_eh_return)
4607 /* Restore registers. We can use ebp or esp to address the memory
4608 locations. If both are available, default to ebp, since offsets
4609 are known to be small. Only exception is esp pointing directly to the
4610 end of block of saved registers, where we may simplify addressing
4613 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4614 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4615 frame.to_allocate, style == 2);
4617 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4618 offset, style == 2);
4620 /* eh_return epilogues need %ecx added to the stack pointer. */
4623 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4625 if (frame_pointer_needed)
4627 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4628 tmp = plus_constant (tmp, UNITS_PER_WORD);
4629 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4631 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4632 emit_move_insn (hard_frame_pointer_rtx, tmp);
4634 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4639 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4640 tmp = plus_constant (tmp, (frame.to_allocate
4641 + frame.nregs * UNITS_PER_WORD));
4642 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4645 else if (!frame_pointer_needed)
4646 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4647 GEN_INT (frame.to_allocate
4648 + frame.nregs * UNITS_PER_WORD),
4650 /* If not an i386, mov & pop is faster than "leave". */
4651 else if (TARGET_USE_LEAVE || optimize_size
4652 || !cfun->machine->use_fast_prologue_epilogue)
4653 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4656 pro_epilogue_adjust_stack (stack_pointer_rtx,
4657 hard_frame_pointer_rtx,
4660 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4662 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4667 /* First step is to deallocate the stack frame so that we can
4668 pop the registers. */
4671 gcc_assert (frame_pointer_needed);
4672 pro_epilogue_adjust_stack (stack_pointer_rtx,
4673 hard_frame_pointer_rtx,
4674 GEN_INT (offset), style);
4676 else if (frame.to_allocate)
4677 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4678 GEN_INT (frame.to_allocate), style);
4680 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4681 if (ix86_save_reg (regno, false))
4684 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4686 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4688 if (frame_pointer_needed)
4690 /* Leave results in shorter dependency chains on CPUs that are
4691 able to grok it fast. */
4692 if (TARGET_USE_LEAVE)
4693 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4694 else if (TARGET_64BIT)
4695 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4697 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4701 /* Sibcall epilogues don't want a return instruction. */
4705 if (current_function_pops_args && current_function_args_size)
4707 rtx popc = GEN_INT (current_function_pops_args);
4709 /* i386 can only pop 64K bytes. If asked to pop more, pop
4710 return address, do explicit add, and jump indirectly to the
4713 if (current_function_pops_args >= 65536)
4715 rtx ecx = gen_rtx_REG (SImode, 2);
4717 /* There is no "pascal" calling convention in 64bit ABI. */
4718 gcc_assert (!TARGET_64BIT);
4720 emit_insn (gen_popsi1 (ecx));
4721 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4722 emit_jump_insn (gen_return_indirect_internal (ecx));
4725 emit_jump_insn (gen_return_pop_internal (popc));
4728 emit_jump_insn (gen_return_internal ());
4731 /* Reset from the function's potential modifications. */
4734 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4735 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4737 if (pic_offset_table_rtx)
4738 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4741 /* Extract the parts of an RTL expression that is a valid memory address
4742 for an instruction. Return 0 if the structure of the address is
4743 grossly off. Return -1 if the address contains ASHIFT, so it is not
4744 strictly valid, but still used for computing length of lea instruction. */
4747 ix86_decompose_address (rtx addr, struct ix86_address *out)
4749 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
4750 rtx base_reg, index_reg;
4751 HOST_WIDE_INT scale = 1;
4752 rtx scale_rtx = NULL_RTX;
4754 enum ix86_address_seg seg = SEG_DEFAULT;
4756 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4758 else if (GET_CODE (addr) == PLUS)
4768 addends[n++] = XEXP (op, 1);
4771 while (GET_CODE (op) == PLUS);
4776 for (i = n; i >= 0; --i)
4779 switch (GET_CODE (op))
4784 index = XEXP (op, 0);
4785 scale_rtx = XEXP (op, 1);
4789 if (XINT (op, 1) == UNSPEC_TP
4790 && TARGET_TLS_DIRECT_SEG_REFS
4791 && seg == SEG_DEFAULT)
4792 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4821 else if (GET_CODE (addr) == MULT)
4823 index = XEXP (addr, 0); /* index*scale */
4824 scale_rtx = XEXP (addr, 1);
4826 else if (GET_CODE (addr) == ASHIFT)
4830 /* We're called for lea too, which implements ashift on occasion. */
4831 index = XEXP (addr, 0);
4832 tmp = XEXP (addr, 1);
4833 if (GET_CODE (tmp) != CONST_INT)
4835 scale = INTVAL (tmp);
4836 if ((unsigned HOST_WIDE_INT) scale > 3)
4842 disp = addr; /* displacement */
4844 /* Extract the integral value of scale. */
4847 if (GET_CODE (scale_rtx) != CONST_INT)
4849 scale = INTVAL (scale_rtx);
4852 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
4853 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
4855 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4856 if (base_reg && index_reg && scale == 1
4857 && (index_reg == arg_pointer_rtx
4858 || index_reg == frame_pointer_rtx
4859 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
4862 tmp = base, base = index, index = tmp;
4863 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
4866 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4867 if ((base_reg == hard_frame_pointer_rtx
4868 || base_reg == frame_pointer_rtx
4869 || base_reg == arg_pointer_rtx) && !disp)
4872 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4873 Avoid this by transforming to [%esi+0]. */
4874 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4875 && base_reg && !index_reg && !disp
4877 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
4880 /* Special case: encode reg+reg instead of reg*2. */
4881 if (!base && index && scale && scale == 2)
4882 base = index, base_reg = index_reg, scale = 1;
4884 /* Special case: scaling cannot be encoded without base or displacement. */
4885 if (!base && !disp && index && scale != 1)
4897 /* Return cost of the memory address x.
4898 For i386, it is better to use a complex address than let gcc copy
4899 the address into a reg and make a new pseudo. But not if the address
4900 requires to two regs - that would mean more pseudos with longer
4903 ix86_address_cost (rtx x)
4905 struct ix86_address parts;
4907 int ok = ix86_decompose_address (x, &parts);
4911 if (parts.base && GET_CODE (parts.base) == SUBREG)
4912 parts.base = SUBREG_REG (parts.base);
4913 if (parts.index && GET_CODE (parts.index) == SUBREG)
4914 parts.index = SUBREG_REG (parts.index);
4916 /* More complex memory references are better. */
4917 if (parts.disp && parts.disp != const0_rtx)
4919 if (parts.seg != SEG_DEFAULT)
4922 /* Attempt to minimize number of registers in the address. */
4924 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4926 && (!REG_P (parts.index)
4927 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4931 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4933 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4934 && parts.base != parts.index)
4937 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4938 since it's predecode logic can't detect the length of instructions
4939 and it degenerates to vector decoded. Increase cost of such
4940 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4941 to split such addresses or even refuse such addresses at all.
4943 Following addressing modes are affected:
4948 The first and last case may be avoidable by explicitly coding the zero in
4949 memory address, but I don't have AMD-K6 machine handy to check this
4953 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4954 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4955 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4961 /* If X is a machine specific address (i.e. a symbol or label being
4962 referenced as a displacement from the GOT implemented using an
4963 UNSPEC), then return the base term. Otherwise return X. */
4966 ix86_find_base_term (rtx x)
4972 if (GET_CODE (x) != CONST)
4975 if (GET_CODE (term) == PLUS
4976 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4977 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4978 term = XEXP (term, 0);
4979 if (GET_CODE (term) != UNSPEC
4980 || XINT (term, 1) != UNSPEC_GOTPCREL)
4983 term = XVECEXP (term, 0, 0);
4985 if (GET_CODE (term) != SYMBOL_REF
4986 && GET_CODE (term) != LABEL_REF)
4992 term = ix86_delegitimize_address (x);
4994 if (GET_CODE (term) != SYMBOL_REF
4995 && GET_CODE (term) != LABEL_REF)
5001 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5002 this is used for to form addresses to local data when -fPIC is in
5006 darwin_local_data_pic (rtx disp)
5008 if (GET_CODE (disp) == MINUS)
5010 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5011 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5012 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5014 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5015 if (! strcmp (sym_name, "<pic base>"))
5023 /* Determine if a given RTX is a valid constant. We already know this
5024 satisfies CONSTANT_P. */
5027 legitimate_constant_p (rtx x)
5029 switch (GET_CODE (x))
5034 if (GET_CODE (x) == PLUS)
5036 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5041 if (TARGET_MACHO && darwin_local_data_pic (x))
5044 /* Only some unspecs are valid as "constants". */
5045 if (GET_CODE (x) == UNSPEC)
5046 switch (XINT (x, 1))
5050 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5052 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5057 /* We must have drilled down to a symbol. */
5058 if (!symbolic_operand (x, Pmode))
5063 /* TLS symbols are never valid. */
5064 if (tls_symbolic_operand (x, Pmode))
5072 /* Otherwise we handle everything else in the move patterns. */
5076 /* Determine if it's legal to put X into the constant pool. This
5077 is not possible for the address of thread-local symbols, which
5078 is checked above. */
5081 ix86_cannot_force_const_mem (rtx x)
5083 return !legitimate_constant_p (x);
5086 /* Determine if a given RTX is a valid constant address. */
5089 constant_address_p (rtx x)
5091 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5094 /* Nonzero if the constant value X is a legitimate general operand
5095 when generating PIC code. It is given that flag_pic is on and
5096 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5099 legitimate_pic_operand_p (rtx x)
5103 switch (GET_CODE (x))
5106 inner = XEXP (x, 0);
5108 /* Only some unspecs are valid as "constants". */
5109 if (GET_CODE (inner) == UNSPEC)
5110 switch (XINT (inner, 1))
5113 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5121 return legitimate_pic_address_disp_p (x);
5128 /* Determine if a given CONST RTX is a valid memory displacement
5132 legitimate_pic_address_disp_p (rtx disp)
5136 /* In 64bit mode we can allow direct addresses of symbols and labels
5137 when they are not dynamic symbols. */
5140 /* TLS references should always be enclosed in UNSPEC. */
5141 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5143 if (GET_CODE (disp) == SYMBOL_REF
5144 && ix86_cmodel == CM_SMALL_PIC
5145 && SYMBOL_REF_LOCAL_P (disp))
5147 if (GET_CODE (disp) == LABEL_REF)
5149 if (GET_CODE (disp) == CONST
5150 && GET_CODE (XEXP (disp, 0)) == PLUS)
5152 rtx op0 = XEXP (XEXP (disp, 0), 0);
5153 rtx op1 = XEXP (XEXP (disp, 0), 1);
5155 /* TLS references should always be enclosed in UNSPEC. */
5156 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5158 if (((GET_CODE (op0) == SYMBOL_REF
5159 && ix86_cmodel == CM_SMALL_PIC
5160 && SYMBOL_REF_LOCAL_P (op0))
5161 || GET_CODE (op0) == LABEL_REF)
5162 && GET_CODE (op1) == CONST_INT
5163 && INTVAL (op1) < 16*1024*1024
5164 && INTVAL (op1) >= -16*1024*1024)
5168 if (GET_CODE (disp) != CONST)
5170 disp = XEXP (disp, 0);
5174 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5175 of GOT tables. We should not need these anyway. */
5176 if (GET_CODE (disp) != UNSPEC
5177 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5180 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5181 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5187 if (GET_CODE (disp) == PLUS)
5189 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5191 disp = XEXP (disp, 0);
5195 if (TARGET_MACHO && darwin_local_data_pic (disp))
5198 if (GET_CODE (disp) != UNSPEC)
5201 switch (XINT (disp, 1))
5206 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5208 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5209 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5210 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5212 case UNSPEC_GOTTPOFF:
5213 case UNSPEC_GOTNTPOFF:
5214 case UNSPEC_INDNTPOFF:
5217 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5219 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5221 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5227 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5228 memory address for an instruction. The MODE argument is the machine mode
5229 for the MEM expression that wants to use this address.
5231 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5232 convert common non-canonical forms to canonical form so that they will
5236 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5238 struct ix86_address parts;
5239 rtx base, index, disp;
5240 HOST_WIDE_INT scale;
5241 const char *reason = NULL;
5242 rtx reason_rtx = NULL_RTX;
5244 if (TARGET_DEBUG_ADDR)
5247 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5248 GET_MODE_NAME (mode), strict);
5252 if (ix86_decompose_address (addr, &parts) <= 0)
5254 reason = "decomposition failed";
5259 index = parts.index;
5261 scale = parts.scale;
5263 /* Validate base register.
5265 Don't allow SUBREG's that span more than a word here. It can lead to spill
5266 failures when the base is one word out of a two word structure, which is
5267 represented internally as a DImode int. */
5276 else if (GET_CODE (base) == SUBREG
5277 && REG_P (SUBREG_REG (base))
5278 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5280 reg = SUBREG_REG (base);
5283 reason = "base is not a register";
5287 if (GET_MODE (base) != Pmode)
5289 reason = "base is not in Pmode";
5293 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5294 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5296 reason = "base is not valid";
5301 /* Validate index register.
5303 Don't allow SUBREG's that span more than a word here -- same as above. */
5312 else if (GET_CODE (index) == SUBREG
5313 && REG_P (SUBREG_REG (index))
5314 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5316 reg = SUBREG_REG (index);
5319 reason = "index is not a register";
5323 if (GET_MODE (index) != Pmode)
5325 reason = "index is not in Pmode";
5329 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5330 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5332 reason = "index is not valid";
5337 /* Validate scale factor. */
5340 reason_rtx = GEN_INT (scale);
5343 reason = "scale without index";
5347 if (scale != 2 && scale != 4 && scale != 8)
5349 reason = "scale is not a valid multiplier";
5354 /* Validate displacement. */
5359 if (GET_CODE (disp) == CONST
5360 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5361 switch (XINT (XEXP (disp, 0), 1))
5365 case UNSPEC_GOTPCREL:
5366 gcc_assert (flag_pic);
5367 goto is_legitimate_pic;
5369 case UNSPEC_GOTTPOFF:
5370 case UNSPEC_GOTNTPOFF:
5371 case UNSPEC_INDNTPOFF:
5377 reason = "invalid address unspec";
5381 else if (flag_pic && (SYMBOLIC_CONST (disp)
5383 && !machopic_operand_p (disp)
5388 if (TARGET_64BIT && (index || base))
5390 /* foo@dtpoff(%rX) is ok. */
5391 if (GET_CODE (disp) != CONST
5392 || GET_CODE (XEXP (disp, 0)) != PLUS
5393 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5394 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5395 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5396 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5398 reason = "non-constant pic memory reference";
5402 else if (! legitimate_pic_address_disp_p (disp))
5404 reason = "displacement is an invalid pic construct";
5408 /* This code used to verify that a symbolic pic displacement
5409 includes the pic_offset_table_rtx register.
5411 While this is good idea, unfortunately these constructs may
5412 be created by "adds using lea" optimization for incorrect
5421 This code is nonsensical, but results in addressing
5422 GOT table with pic_offset_table_rtx base. We can't
5423 just refuse it easily, since it gets matched by
5424 "addsi3" pattern, that later gets split to lea in the
5425 case output register differs from input. While this
5426 can be handled by separate addsi pattern for this case
5427 that never results in lea, this seems to be easier and
5428 correct fix for crash to disable this test. */
5430 else if (GET_CODE (disp) != LABEL_REF
5431 && GET_CODE (disp) != CONST_INT
5432 && (GET_CODE (disp) != CONST
5433 || !legitimate_constant_p (disp))
5434 && (GET_CODE (disp) != SYMBOL_REF
5435 || !legitimate_constant_p (disp)))
5437 reason = "displacement is not constant";
5440 else if (TARGET_64BIT
5441 && !x86_64_immediate_operand (disp, VOIDmode))
5443 reason = "displacement is out of range";
5448 /* Everything looks valid. */
5449 if (TARGET_DEBUG_ADDR)
5450 fprintf (stderr, "Success.\n");
5454 if (TARGET_DEBUG_ADDR)
5456 fprintf (stderr, "Error: %s\n", reason);
5457 debug_rtx (reason_rtx);
5462 /* Return an unique alias set for the GOT. */
5464 static HOST_WIDE_INT
5465 ix86_GOT_alias_set (void)
5467 static HOST_WIDE_INT set = -1;
5469 set = new_alias_set ();
5473 /* Return a legitimate reference for ORIG (an address) using the
5474 register REG. If REG is 0, a new pseudo is generated.
5476 There are two types of references that must be handled:
5478 1. Global data references must load the address from the GOT, via
5479 the PIC reg. An insn is emitted to do this load, and the reg is
5482 2. Static data references, constant pool addresses, and code labels
5483 compute the address as an offset from the GOT, whose base is in
5484 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5485 differentiate them from global data objects. The returned
5486 address is the PIC reg + an unspec constant.
5488 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5489 reg also appears in the address. */
5492 legitimize_pic_address (rtx orig, rtx reg)
5500 reg = gen_reg_rtx (Pmode);
5501 /* Use the generic Mach-O PIC machinery. */
5502 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5505 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5507 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5509 /* This symbol may be referenced via a displacement from the PIC
5510 base address (@GOTOFF). */
5512 if (reload_in_progress)
5513 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5514 if (GET_CODE (addr) == CONST)
5515 addr = XEXP (addr, 0);
5516 if (GET_CODE (addr) == PLUS)
5518 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5519 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5522 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5523 new = gen_rtx_CONST (Pmode, new);
5524 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5528 emit_move_insn (reg, new);
5532 else if (GET_CODE (addr) == SYMBOL_REF)
5536 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5537 new = gen_rtx_CONST (Pmode, new);
5538 new = gen_const_mem (Pmode, new);
5539 set_mem_alias_set (new, ix86_GOT_alias_set ());
5542 reg = gen_reg_rtx (Pmode);
5543 /* Use directly gen_movsi, otherwise the address is loaded
5544 into register for CSE. We don't want to CSE this addresses,
5545 instead we CSE addresses from the GOT table, so skip this. */
5546 emit_insn (gen_movsi (reg, new));
5551 /* This symbol must be referenced via a load from the
5552 Global Offset Table (@GOT). */
5554 if (reload_in_progress)
5555 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5556 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5557 new = gen_rtx_CONST (Pmode, new);
5558 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5559 new = gen_const_mem (Pmode, new);
5560 set_mem_alias_set (new, ix86_GOT_alias_set ());
5563 reg = gen_reg_rtx (Pmode);
5564 emit_move_insn (reg, new);
5570 if (GET_CODE (addr) == CONST)
5572 addr = XEXP (addr, 0);
5574 /* We must match stuff we generate before. Assume the only
5575 unspecs that can get here are ours. Not that we could do
5576 anything with them anyway.... */
5577 if (GET_CODE (addr) == UNSPEC
5578 || (GET_CODE (addr) == PLUS
5579 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5581 gcc_assert (GET_CODE (addr) == PLUS);
5583 if (GET_CODE (addr) == PLUS)
5585 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5587 /* Check first to see if this is a constant offset from a @GOTOFF
5588 symbol reference. */
5589 if (local_symbolic_operand (op0, Pmode)
5590 && GET_CODE (op1) == CONST_INT)
5594 if (reload_in_progress)
5595 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5596 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5598 new = gen_rtx_PLUS (Pmode, new, op1);
5599 new = gen_rtx_CONST (Pmode, new);
5600 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5604 emit_move_insn (reg, new);
5610 if (INTVAL (op1) < -16*1024*1024
5611 || INTVAL (op1) >= 16*1024*1024)
5612 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5617 base = legitimize_pic_address (XEXP (addr, 0), reg);
5618 new = legitimize_pic_address (XEXP (addr, 1),
5619 base == reg ? NULL_RTX : reg);
5621 if (GET_CODE (new) == CONST_INT)
5622 new = plus_constant (base, INTVAL (new));
5625 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5627 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5628 new = XEXP (new, 1);
5630 new = gen_rtx_PLUS (Pmode, base, new);
5638 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5641 get_thread_pointer (int to_reg)
5645 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5649 reg = gen_reg_rtx (Pmode);
5650 insn = gen_rtx_SET (VOIDmode, reg, tp);
5651 insn = emit_insn (insn);
5656 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5657 false if we expect this to be used for a memory address and true if
5658 we expect to load the address into a register. */
5661 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5663 rtx dest, base, off, pic;
5668 case TLS_MODEL_GLOBAL_DYNAMIC:
5669 dest = gen_reg_rtx (Pmode);
5672 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5675 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5676 insns = get_insns ();
5679 emit_libcall_block (insns, dest, rax, x);
5682 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5685 case TLS_MODEL_LOCAL_DYNAMIC:
5686 base = gen_reg_rtx (Pmode);
5689 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5692 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5693 insns = get_insns ();
5696 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5697 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5698 emit_libcall_block (insns, base, rax, note);
5701 emit_insn (gen_tls_local_dynamic_base_32 (base));
5703 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5704 off = gen_rtx_CONST (Pmode, off);
5706 return gen_rtx_PLUS (Pmode, base, off);
5708 case TLS_MODEL_INITIAL_EXEC:
5712 type = UNSPEC_GOTNTPOFF;
5716 if (reload_in_progress)
5717 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5718 pic = pic_offset_table_rtx;
5719 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5721 else if (!TARGET_GNU_TLS)
5723 pic = gen_reg_rtx (Pmode);
5724 emit_insn (gen_set_got (pic));
5725 type = UNSPEC_GOTTPOFF;
5730 type = UNSPEC_INDNTPOFF;
5733 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5734 off = gen_rtx_CONST (Pmode, off);
5736 off = gen_rtx_PLUS (Pmode, pic, off);
5737 off = gen_const_mem (Pmode, off);
5738 set_mem_alias_set (off, ix86_GOT_alias_set ());
5740 if (TARGET_64BIT || TARGET_GNU_TLS)
5742 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5743 off = force_reg (Pmode, off);
5744 return gen_rtx_PLUS (Pmode, base, off);
5748 base = get_thread_pointer (true);
5749 dest = gen_reg_rtx (Pmode);
5750 emit_insn (gen_subsi3 (dest, base, off));
5754 case TLS_MODEL_LOCAL_EXEC:
5755 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5756 (TARGET_64BIT || TARGET_GNU_TLS)
5757 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5758 off = gen_rtx_CONST (Pmode, off);
5760 if (TARGET_64BIT || TARGET_GNU_TLS)
5762 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5763 return gen_rtx_PLUS (Pmode, base, off);
5767 base = get_thread_pointer (true);
5768 dest = gen_reg_rtx (Pmode);
5769 emit_insn (gen_subsi3 (dest, base, off));
5780 /* Try machine-dependent ways of modifying an illegitimate address
5781 to be legitimate. If we find one, return the new, valid address.
5782 This macro is used in only one place: `memory_address' in explow.c.
5784 OLDX is the address as it was before break_out_memory_refs was called.
5785 In some cases it is useful to look at this to decide what needs to be done.
5787 MODE and WIN are passed so that this macro can use
5788 GO_IF_LEGITIMATE_ADDRESS.
5790 It is always safe for this macro to do nothing. It exists to recognize
5791 opportunities to optimize the output.
5793 For the 80386, we handle X+REG by loading X into a register R and
5794 using R+REG. R will go in a general reg and indexing will be used.
5795 However, if REG is a broken-out memory address or multiplication,
5796 nothing needs to be done because REG can certainly go in a general reg.
5798 When -fpic is used, special handling is needed for symbolic references.
5799 See comments by legitimize_pic_address in i386.c for details. */
5802 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5807 if (TARGET_DEBUG_ADDR)
5809 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5810 GET_MODE_NAME (mode));
5814 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5816 return legitimize_tls_address (x, log, false);
5817 if (GET_CODE (x) == CONST
5818 && GET_CODE (XEXP (x, 0)) == PLUS
5819 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5820 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5822 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5823 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5826 if (flag_pic && SYMBOLIC_CONST (x))
5827 return legitimize_pic_address (x, 0);
5829 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5830 if (GET_CODE (x) == ASHIFT
5831 && GET_CODE (XEXP (x, 1)) == CONST_INT
5832 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
5835 log = INTVAL (XEXP (x, 1));
5836 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5837 GEN_INT (1 << log));
5840 if (GET_CODE (x) == PLUS)
5842 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5844 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5845 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5846 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
5849 log = INTVAL (XEXP (XEXP (x, 0), 1));
5850 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5851 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5852 GEN_INT (1 << log));
5855 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5856 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5857 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
5860 log = INTVAL (XEXP (XEXP (x, 1), 1));
5861 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5862 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5863 GEN_INT (1 << log));
5866 /* Put multiply first if it isn't already. */
5867 if (GET_CODE (XEXP (x, 1)) == MULT)
5869 rtx tmp = XEXP (x, 0);
5870 XEXP (x, 0) = XEXP (x, 1);
5875 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5876 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5877 created by virtual register instantiation, register elimination, and
5878 similar optimizations. */
5879 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5882 x = gen_rtx_PLUS (Pmode,
5883 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5884 XEXP (XEXP (x, 1), 0)),
5885 XEXP (XEXP (x, 1), 1));
5889 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5890 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5891 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5892 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5893 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5894 && CONSTANT_P (XEXP (x, 1)))
5897 rtx other = NULL_RTX;
5899 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5901 constant = XEXP (x, 1);
5902 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5904 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5906 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5907 other = XEXP (x, 1);
5915 x = gen_rtx_PLUS (Pmode,
5916 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5917 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5918 plus_constant (other, INTVAL (constant)));
5922 if (changed && legitimate_address_p (mode, x, FALSE))
5925 if (GET_CODE (XEXP (x, 0)) == MULT)
5928 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5931 if (GET_CODE (XEXP (x, 1)) == MULT)
5934 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5938 && GET_CODE (XEXP (x, 1)) == REG
5939 && GET_CODE (XEXP (x, 0)) == REG)
5942 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5945 x = legitimize_pic_address (x, 0);
5948 if (changed && legitimate_address_p (mode, x, FALSE))
5951 if (GET_CODE (XEXP (x, 0)) == REG)
5953 rtx temp = gen_reg_rtx (Pmode);
5954 rtx val = force_operand (XEXP (x, 1), temp);
5956 emit_move_insn (temp, val);
5962 else if (GET_CODE (XEXP (x, 1)) == REG)
5964 rtx temp = gen_reg_rtx (Pmode);
5965 rtx val = force_operand (XEXP (x, 0), temp);
5967 emit_move_insn (temp, val);
5977 /* Print an integer constant expression in assembler syntax. Addition
5978 and subtraction are the only arithmetic that may appear in these
5979 expressions. FILE is the stdio stream to write to, X is the rtx, and
5980 CODE is the operand print code from the output string. */
5983 output_pic_addr_const (FILE *file, rtx x, int code)
5987 switch (GET_CODE (x))
5990 gcc_assert (flag_pic);
5995 assemble_name (file, XSTR (x, 0));
5996 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5997 fputs ("@PLT", file);
6004 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6005 assemble_name (asm_out_file, buf);
6009 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6013 /* This used to output parentheses around the expression,
6014 but that does not work on the 386 (either ATT or BSD assembler). */
6015 output_pic_addr_const (file, XEXP (x, 0), code);
6019 if (GET_MODE (x) == VOIDmode)
6021 /* We can use %d if the number is <32 bits and positive. */
6022 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6023 fprintf (file, "0x%lx%08lx",
6024 (unsigned long) CONST_DOUBLE_HIGH (x),
6025 (unsigned long) CONST_DOUBLE_LOW (x));
6027 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6030 /* We can't handle floating point constants;
6031 PRINT_OPERAND must handle them. */
6032 output_operand_lossage ("floating constant misused");
6036 /* Some assemblers need integer constants to appear first. */
6037 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6039 output_pic_addr_const (file, XEXP (x, 0), code);
6041 output_pic_addr_const (file, XEXP (x, 1), code);
6045 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6046 output_pic_addr_const (file, XEXP (x, 1), code);
6048 output_pic_addr_const (file, XEXP (x, 0), code);
6054 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6055 output_pic_addr_const (file, XEXP (x, 0), code);
6057 output_pic_addr_const (file, XEXP (x, 1), code);
6059 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6063 gcc_assert (XVECLEN (x, 0) == 1);
6064 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6065 switch (XINT (x, 1))
6068 fputs ("@GOT", file);
6071 fputs ("@GOTOFF", file);
6073 case UNSPEC_GOTPCREL:
6074 fputs ("@GOTPCREL(%rip)", file);
6076 case UNSPEC_GOTTPOFF:
6077 /* FIXME: This might be @TPOFF in Sun ld too. */
6078 fputs ("@GOTTPOFF", file);
6081 fputs ("@TPOFF", file);
6085 fputs ("@TPOFF", file);
6087 fputs ("@NTPOFF", file);
6090 fputs ("@DTPOFF", file);
6092 case UNSPEC_GOTNTPOFF:
6094 fputs ("@GOTTPOFF(%rip)", file);
6096 fputs ("@GOTNTPOFF", file);
6098 case UNSPEC_INDNTPOFF:
6099 fputs ("@INDNTPOFF", file);
6102 output_operand_lossage ("invalid UNSPEC as operand");
6108 output_operand_lossage ("invalid expression as operand");
6112 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6113 We need to emit DTP-relative relocations. */
6116 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6118 fputs (ASM_LONG, file);
6119 output_addr_const (file, x);
6120 fputs ("@DTPOFF", file);
6126 fputs (", 0", file);
6133 /* In the name of slightly smaller debug output, and to cater to
6134 general assembler lossage, recognize PIC+GOTOFF and turn it back
6135 into a direct symbol reference. */
6138 ix86_delegitimize_address (rtx orig_x)
6142 if (GET_CODE (x) == MEM)
6147 if (GET_CODE (x) != CONST
6148 || GET_CODE (XEXP (x, 0)) != UNSPEC
6149 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6150 || GET_CODE (orig_x) != MEM)
6152 return XVECEXP (XEXP (x, 0), 0, 0);
6155 if (GET_CODE (x) != PLUS
6156 || GET_CODE (XEXP (x, 1)) != CONST)
6159 if (GET_CODE (XEXP (x, 0)) == REG
6160 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6161 /* %ebx + GOT/GOTOFF */
6163 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6165 /* %ebx + %reg * scale + GOT/GOTOFF */
6167 if (GET_CODE (XEXP (y, 0)) == REG
6168 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6170 else if (GET_CODE (XEXP (y, 1)) == REG
6171 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6175 if (GET_CODE (y) != REG
6176 && GET_CODE (y) != MULT
6177 && GET_CODE (y) != ASHIFT)
6183 x = XEXP (XEXP (x, 1), 0);
6184 if (GET_CODE (x) == UNSPEC
6185 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6186 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6189 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6190 return XVECEXP (x, 0, 0);
6193 if (GET_CODE (x) == PLUS
6194 && GET_CODE (XEXP (x, 0)) == UNSPEC
6195 && GET_CODE (XEXP (x, 1)) == CONST_INT
6196 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6197 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6198 && GET_CODE (orig_x) != MEM)))
6200 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6202 return gen_rtx_PLUS (Pmode, y, x);
6210 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6215 if (mode == CCFPmode || mode == CCFPUmode)
6217 enum rtx_code second_code, bypass_code;
6218 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6219 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
6220 code = ix86_fp_compare_code_to_integer (code);
6224 code = reverse_condition (code);
6235 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
6239 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6240 Those same assemblers have the same but opposite lossage on cmov. */
6241 gcc_assert (mode == CCmode);
6242 suffix = fp ? "nbe" : "a";
6262 gcc_assert (mode == CCmode);
6284 gcc_assert (mode == CCmode);
6285 suffix = fp ? "nb" : "ae";
6288 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
6292 gcc_assert (mode == CCmode);
6296 suffix = fp ? "u" : "p";
6299 suffix = fp ? "nu" : "np";
6304 fputs (suffix, file);
6307 /* Print the name of register X to FILE based on its machine mode and number.
6308 If CODE is 'w', pretend the mode is HImode.
6309 If CODE is 'b', pretend the mode is QImode.
6310 If CODE is 'k', pretend the mode is SImode.
6311 If CODE is 'q', pretend the mode is DImode.
6312 If CODE is 'h', pretend the reg is the 'high' byte register.
6313 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6316 print_reg (rtx x, int code, FILE *file)
6318 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
6319 && REGNO (x) != FRAME_POINTER_REGNUM
6320 && REGNO (x) != FLAGS_REG
6321 && REGNO (x) != FPSR_REG);
6323 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6326 if (code == 'w' || MMX_REG_P (x))
6328 else if (code == 'b')
6330 else if (code == 'k')
6332 else if (code == 'q')
6334 else if (code == 'y')
6336 else if (code == 'h')
6339 code = GET_MODE_SIZE (GET_MODE (x));
6341 /* Irritatingly, AMD extended registers use different naming convention
6342 from the normal registers. */
6343 if (REX_INT_REG_P (x))
6345 gcc_assert (TARGET_64BIT);
6349 error ("extended registers have no high halves");
6352 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6355 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6358 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6361 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6364 error ("unsupported operand size for extended register");
6372 if (STACK_TOP_P (x))
6374 fputs ("st(0)", file);
6381 if (! ANY_FP_REG_P (x))
6382 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6387 fputs (hi_reg_name[REGNO (x)], file);
6390 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6392 fputs (qi_reg_name[REGNO (x)], file);
6395 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6397 fputs (qi_high_reg_name[REGNO (x)], file);
6404 /* Locate some local-dynamic symbol still in use by this function
6405 so that we can print its name in some tls_local_dynamic_base
6409 get_some_local_dynamic_name (void)
6413 if (cfun->machine->some_ld_name)
6414 return cfun->machine->some_ld_name;
6416 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6418 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6419 return cfun->machine->some_ld_name;
6425 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6429 if (GET_CODE (x) == SYMBOL_REF
6430 && local_dynamic_symbolic_operand (x, Pmode))
6432 cfun->machine->some_ld_name = XSTR (x, 0);
6440 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6441 C -- print opcode suffix for set/cmov insn.
6442 c -- like C, but print reversed condition
6443 F,f -- likewise, but for floating-point.
6444 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6446 R -- print the prefix for register names.
6447 z -- print the opcode suffix for the size of the current operand.
6448 * -- print a star (in certain assembler syntax)
6449 A -- print an absolute memory reference.
6450 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6451 s -- print a shift double count, followed by the assemblers argument
6453 b -- print the QImode name of the register for the indicated operand.
6454 %b0 would print %al if operands[0] is reg 0.
6455 w -- likewise, print the HImode name of the register.
6456 k -- likewise, print the SImode name of the register.
6457 q -- likewise, print the DImode name of the register.
6458 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6459 y -- print "st(0)" instead of "st" as a register.
6460 D -- print condition for SSE cmp instruction.
6461 P -- if PIC, print an @PLT suffix.
6462 X -- don't print any sort of PIC '@' suffix for a symbol.
6463 & -- print some in-use local-dynamic symbol name.
6464 H -- print a memory address offset by 8; used for sse high-parts
6468 print_operand (FILE *file, rtx x, int code)
6475 if (ASSEMBLER_DIALECT == ASM_ATT)
6480 assemble_name (file, get_some_local_dynamic_name ());
6484 switch (ASSEMBLER_DIALECT)
6491 /* Intel syntax. For absolute addresses, registers should not
6492 be surrounded by braces. */
6493 if (GET_CODE (x) != REG)
6496 PRINT_OPERAND (file, x, 0);
6506 PRINT_OPERAND (file, x, 0);
6511 if (ASSEMBLER_DIALECT == ASM_ATT)
6516 if (ASSEMBLER_DIALECT == ASM_ATT)
6521 if (ASSEMBLER_DIALECT == ASM_ATT)
6526 if (ASSEMBLER_DIALECT == ASM_ATT)
6531 if (ASSEMBLER_DIALECT == ASM_ATT)
6536 if (ASSEMBLER_DIALECT == ASM_ATT)
6541 /* 387 opcodes don't get size suffixes if the operands are
6543 if (STACK_REG_P (x))
6546 /* Likewise if using Intel opcodes. */
6547 if (ASSEMBLER_DIALECT == ASM_INTEL)
6550 /* This is the size of op from size of operand. */
6551 switch (GET_MODE_SIZE (GET_MODE (x)))
6554 #ifdef HAVE_GAS_FILDS_FISTS
6560 if (GET_MODE (x) == SFmode)
6575 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6577 #ifdef GAS_MNEMONICS
6603 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6605 PRINT_OPERAND (file, x, 0);
6611 /* Little bit of braindamage here. The SSE compare instructions
6612 does use completely different names for the comparisons that the
6613 fp conditional moves. */
6614 switch (GET_CODE (x))
6629 fputs ("unord", file);
6633 fputs ("neq", file);
6637 fputs ("nlt", file);
6641 fputs ("nle", file);
6644 fputs ("ord", file);
6651 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6652 if (ASSEMBLER_DIALECT == ASM_ATT)
6654 switch (GET_MODE (x))
6656 case HImode: putc ('w', file); break;
6658 case SFmode: putc ('l', file); break;
6660 case DFmode: putc ('q', file); break;
6661 default: gcc_unreachable ();
6668 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6671 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6672 if (ASSEMBLER_DIALECT == ASM_ATT)
6675 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6678 /* Like above, but reverse condition */
6680 /* Check to see if argument to %c is really a constant
6681 and not a condition code which needs to be reversed. */
6682 if (!COMPARISON_P (x))
6684 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6687 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6690 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6691 if (ASSEMBLER_DIALECT == ASM_ATT)
6694 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6698 /* It doesn't actually matter what mode we use here, as we're
6699 only going to use this for printing. */
6700 x = adjust_address_nv (x, DImode, 8);
6707 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6710 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6713 int pred_val = INTVAL (XEXP (x, 0));
6715 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6716 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6718 int taken = pred_val > REG_BR_PROB_BASE / 2;
6719 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6721 /* Emit hints only in the case default branch prediction
6722 heuristics would fail. */
6723 if (taken != cputaken)
6725 /* We use 3e (DS) prefix for taken branches and
6726 2e (CS) prefix for not taken branches. */
6728 fputs ("ds ; ", file);
6730 fputs ("cs ; ", file);
6737 output_operand_lossage ("invalid operand code '%c'", code);
6741 if (GET_CODE (x) == REG)
6742 print_reg (x, code, file);
6744 else if (GET_CODE (x) == MEM)
6746 /* No `byte ptr' prefix for call instructions. */
6747 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6750 switch (GET_MODE_SIZE (GET_MODE (x)))
6752 case 1: size = "BYTE"; break;
6753 case 2: size = "WORD"; break;
6754 case 4: size = "DWORD"; break;
6755 case 8: size = "QWORD"; break;
6756 case 12: size = "XWORD"; break;
6757 case 16: size = "XMMWORD"; break;
6762 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6765 else if (code == 'w')
6767 else if (code == 'k')
6771 fputs (" PTR ", file);
6775 /* Avoid (%rip) for call operands. */
6776 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6777 && GET_CODE (x) != CONST_INT)
6778 output_addr_const (file, x);
6779 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6780 output_operand_lossage ("invalid constraints for operand");
6785 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6790 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6791 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6793 if (ASSEMBLER_DIALECT == ASM_ATT)
6795 fprintf (file, "0x%08lx", l);
6798 /* These float cases don't actually occur as immediate operands. */
6799 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6803 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6804 fprintf (file, "%s", dstr);
6807 else if (GET_CODE (x) == CONST_DOUBLE
6808 && GET_MODE (x) == XFmode)
6812 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6813 fprintf (file, "%s", dstr);
6818 /* We have patterns that allow zero sets of memory, for instance.
6819 In 64-bit mode, we should probably support all 8-byte vectors,
6820 since we can in fact encode that into an immediate. */
6821 if (GET_CODE (x) == CONST_VECTOR)
6823 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
6829 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6831 if (ASSEMBLER_DIALECT == ASM_ATT)
6834 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6835 || GET_CODE (x) == LABEL_REF)
6837 if (ASSEMBLER_DIALECT == ASM_ATT)
6840 fputs ("OFFSET FLAT:", file);
6843 if (GET_CODE (x) == CONST_INT)
6844 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6846 output_pic_addr_const (file, x, code);
6848 output_addr_const (file, x);
6852 /* Print a memory operand whose address is ADDR. */
6855 print_operand_address (FILE *file, rtx addr)
6857 struct ix86_address parts;
6858 rtx base, index, disp;
6860 int ok = ix86_decompose_address (addr, &parts);
6865 index = parts.index;
6867 scale = parts.scale;
6875 if (USER_LABEL_PREFIX[0] == 0)
6877 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6883 if (!base && !index)
6885 /* Displacement only requires special attention. */
6887 if (GET_CODE (disp) == CONST_INT)
6889 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6891 if (USER_LABEL_PREFIX[0] == 0)
6893 fputs ("ds:", file);
6895 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6898 output_pic_addr_const (file, disp, 0);
6900 output_addr_const (file, disp);
6902 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6904 && ((GET_CODE (disp) == SYMBOL_REF
6905 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6906 || GET_CODE (disp) == LABEL_REF
6907 || (GET_CODE (disp) == CONST
6908 && GET_CODE (XEXP (disp, 0)) == PLUS
6909 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6910 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6911 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6912 fputs ("(%rip)", file);
6916 if (ASSEMBLER_DIALECT == ASM_ATT)
6921 output_pic_addr_const (file, disp, 0);
6922 else if (GET_CODE (disp) == LABEL_REF)
6923 output_asm_label (disp);
6925 output_addr_const (file, disp);
6930 print_reg (base, 0, file);
6934 print_reg (index, 0, file);
6936 fprintf (file, ",%d", scale);
6942 rtx offset = NULL_RTX;
6946 /* Pull out the offset of a symbol; print any symbol itself. */
6947 if (GET_CODE (disp) == CONST
6948 && GET_CODE (XEXP (disp, 0)) == PLUS
6949 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6951 offset = XEXP (XEXP (disp, 0), 1);
6952 disp = gen_rtx_CONST (VOIDmode,
6953 XEXP (XEXP (disp, 0), 0));
6957 output_pic_addr_const (file, disp, 0);
6958 else if (GET_CODE (disp) == LABEL_REF)
6959 output_asm_label (disp);
6960 else if (GET_CODE (disp) == CONST_INT)
6963 output_addr_const (file, disp);
6969 print_reg (base, 0, file);
6972 if (INTVAL (offset) >= 0)
6974 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6978 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6985 print_reg (index, 0, file);
6987 fprintf (file, "*%d", scale);
6995 output_addr_const_extra (FILE *file, rtx x)
6999 if (GET_CODE (x) != UNSPEC)
7002 op = XVECEXP (x, 0, 0);
7003 switch (XINT (x, 1))
7005 case UNSPEC_GOTTPOFF:
7006 output_addr_const (file, op);
7007 /* FIXME: This might be @TPOFF in Sun ld. */
7008 fputs ("@GOTTPOFF", file);
7011 output_addr_const (file, op);
7012 fputs ("@TPOFF", file);
7015 output_addr_const (file, op);
7017 fputs ("@TPOFF", file);
7019 fputs ("@NTPOFF", file);
7022 output_addr_const (file, op);
7023 fputs ("@DTPOFF", file);
7025 case UNSPEC_GOTNTPOFF:
7026 output_addr_const (file, op);
7028 fputs ("@GOTTPOFF(%rip)", file);
7030 fputs ("@GOTNTPOFF", file);
7032 case UNSPEC_INDNTPOFF:
7033 output_addr_const (file, op);
7034 fputs ("@INDNTPOFF", file);
7044 /* Split one or more DImode RTL references into pairs of SImode
7045 references. The RTL can be REG, offsettable MEM, integer constant, or
7046 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7047 split and "num" is its length. lo_half and hi_half are output arrays
7048 that parallel "operands". */
7051 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7055 rtx op = operands[num];
7057 /* simplify_subreg refuse to split volatile memory addresses,
7058 but we still have to handle it. */
7059 if (GET_CODE (op) == MEM)
7061 lo_half[num] = adjust_address (op, SImode, 0);
7062 hi_half[num] = adjust_address (op, SImode, 4);
7066 lo_half[num] = simplify_gen_subreg (SImode, op,
7067 GET_MODE (op) == VOIDmode
7068 ? DImode : GET_MODE (op), 0);
7069 hi_half[num] = simplify_gen_subreg (SImode, op,
7070 GET_MODE (op) == VOIDmode
7071 ? DImode : GET_MODE (op), 4);
7075 /* Split one or more TImode RTL references into pairs of SImode
7076 references. The RTL can be REG, offsettable MEM, integer constant, or
7077 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7078 split and "num" is its length. lo_half and hi_half are output arrays
7079 that parallel "operands". */
7082 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7086 rtx op = operands[num];
7088 /* simplify_subreg refuse to split volatile memory addresses, but we
7089 still have to handle it. */
7090 if (GET_CODE (op) == MEM)
7092 lo_half[num] = adjust_address (op, DImode, 0);
7093 hi_half[num] = adjust_address (op, DImode, 8);
7097 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7098 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7103 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7104 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7105 is the expression of the binary operation. The output may either be
7106 emitted here, or returned to the caller, like all output_* functions.
7108 There is no guarantee that the operands are the same mode, as they
7109 might be within FLOAT or FLOAT_EXTEND expressions. */
7111 #ifndef SYSV386_COMPAT
7112 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7113 wants to fix the assemblers because that causes incompatibility
7114 with gcc. No-one wants to fix gcc because that causes
7115 incompatibility with assemblers... You can use the option of
7116 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7117 #define SYSV386_COMPAT 1
7121 output_387_binary_op (rtx insn, rtx *operands)
7123 static char buf[30];
7126 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7128 #ifdef ENABLE_CHECKING
7129 /* Even if we do not want to check the inputs, this documents input
7130 constraints. Which helps in understanding the following code. */
7131 if (STACK_REG_P (operands[0])
7132 && ((REG_P (operands[1])
7133 && REGNO (operands[0]) == REGNO (operands[1])
7134 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7135 || (REG_P (operands[2])
7136 && REGNO (operands[0]) == REGNO (operands[2])
7137 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7138 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7141 gcc_assert (is_sse);
7144 switch (GET_CODE (operands[3]))
7147 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7148 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7156 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7157 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7165 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7166 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7174 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7175 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7189 if (GET_MODE (operands[0]) == SFmode)
7190 strcat (buf, "ss\t{%2, %0|%0, %2}");
7192 strcat (buf, "sd\t{%2, %0|%0, %2}");
7197 switch (GET_CODE (operands[3]))
7201 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7203 rtx temp = operands[2];
7204 operands[2] = operands[1];
7208 /* know operands[0] == operands[1]. */
7210 if (GET_CODE (operands[2]) == MEM)
7216 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7218 if (STACK_TOP_P (operands[0]))
7219 /* How is it that we are storing to a dead operand[2]?
7220 Well, presumably operands[1] is dead too. We can't
7221 store the result to st(0) as st(0) gets popped on this
7222 instruction. Instead store to operands[2] (which I
7223 think has to be st(1)). st(1) will be popped later.
7224 gcc <= 2.8.1 didn't have this check and generated
7225 assembly code that the Unixware assembler rejected. */
7226 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7228 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7232 if (STACK_TOP_P (operands[0]))
7233 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7235 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7240 if (GET_CODE (operands[1]) == MEM)
7246 if (GET_CODE (operands[2]) == MEM)
7252 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7255 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7256 derived assemblers, confusingly reverse the direction of
7257 the operation for fsub{r} and fdiv{r} when the
7258 destination register is not st(0). The Intel assembler
7259 doesn't have this brain damage. Read !SYSV386_COMPAT to
7260 figure out what the hardware really does. */
7261 if (STACK_TOP_P (operands[0]))
7262 p = "{p\t%0, %2|rp\t%2, %0}";
7264 p = "{rp\t%2, %0|p\t%0, %2}";
7266 if (STACK_TOP_P (operands[0]))
7267 /* As above for fmul/fadd, we can't store to st(0). */
7268 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7270 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7275 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7278 if (STACK_TOP_P (operands[0]))
7279 p = "{rp\t%0, %1|p\t%1, %0}";
7281 p = "{p\t%1, %0|rp\t%0, %1}";
7283 if (STACK_TOP_P (operands[0]))
7284 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7286 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7291 if (STACK_TOP_P (operands[0]))
7293 if (STACK_TOP_P (operands[1]))
7294 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7296 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7299 else if (STACK_TOP_P (operands[1]))
7302 p = "{\t%1, %0|r\t%0, %1}";
7304 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7310 p = "{r\t%2, %0|\t%0, %2}";
7312 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7325 /* Output code to initialize control word copies used by trunc?f?i and
7326 rounding patterns. CURRENT_MODE is set to current control word,
7327 while NEW_MODE is set to new control word. */
7330 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7332 rtx reg = gen_reg_rtx (HImode);
7334 emit_insn (gen_x86_fnstcw_1 (current_mode));
7335 emit_move_insn (reg, current_mode);
7337 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7343 /* round down toward -oo */
7344 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7348 /* round up toward +oo */
7349 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7353 /* round toward zero (truncate) */
7354 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7357 case I387_CW_MASK_PM:
7358 /* mask precision exception for nearbyint() */
7359 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7371 /* round down toward -oo */
7372 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7373 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7377 /* round up toward +oo */
7378 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7379 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7383 /* round toward zero (truncate) */
7384 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7387 case I387_CW_MASK_PM:
7388 /* mask precision exception for nearbyint() */
7389 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7397 emit_move_insn (new_mode, reg);
7400 /* Output code for INSN to convert a float to a signed int. OPERANDS
7401 are the insn operands. The output may be [HSD]Imode and the input
7402 operand may be [SDX]Fmode. */
7405 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
7407 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7408 int dimode_p = GET_MODE (operands[0]) == DImode;
7409 int round_mode = get_attr_i387_cw (insn);
7411 /* Jump through a hoop or two for DImode, since the hardware has no
7412 non-popping instruction. We used to do this a different way, but
7413 that was somewhat fragile and broke with post-reload splitters. */
7414 if ((dimode_p || fisttp) && !stack_top_dies)
7415 output_asm_insn ("fld\t%y1", operands);
7417 gcc_assert (STACK_TOP_P (operands[1]));
7418 gcc_assert (GET_CODE (operands[0]) == MEM);
7421 output_asm_insn ("fisttp%z0\t%0", operands);
7424 if (round_mode != I387_CW_ANY)
7425 output_asm_insn ("fldcw\t%3", operands);
7426 if (stack_top_dies || dimode_p)
7427 output_asm_insn ("fistp%z0\t%0", operands);
7429 output_asm_insn ("fist%z0\t%0", operands);
7430 if (round_mode != I387_CW_ANY)
7431 output_asm_insn ("fldcw\t%2", operands);
7437 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7438 should be used. UNORDERED_P is true when fucom should be used. */
7441 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7444 rtx cmp_op0, cmp_op1;
7445 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7449 cmp_op0 = operands[0];
7450 cmp_op1 = operands[1];
7454 cmp_op0 = operands[1];
7455 cmp_op1 = operands[2];
7460 if (GET_MODE (operands[0]) == SFmode)
7462 return "ucomiss\t{%1, %0|%0, %1}";
7464 return "comiss\t{%1, %0|%0, %1}";
7467 return "ucomisd\t{%1, %0|%0, %1}";
7469 return "comisd\t{%1, %0|%0, %1}";
7472 gcc_assert (STACK_TOP_P (cmp_op0));
7474 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7476 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7480 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7481 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7484 return "ftst\n\tfnstsw\t%0";
7487 if (STACK_REG_P (cmp_op1)
7489 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7490 && REGNO (cmp_op1) != FIRST_STACK_REG)
7492 /* If both the top of the 387 stack dies, and the other operand
7493 is also a stack register that dies, then this must be a
7494 `fcompp' float compare */
7498 /* There is no double popping fcomi variant. Fortunately,
7499 eflags is immune from the fstp's cc clobbering. */
7501 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7503 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7504 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7509 return "fucompp\n\tfnstsw\t%0";
7511 return "fcompp\n\tfnstsw\t%0";
7516 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7518 static const char * const alt[16] =
7520 "fcom%z2\t%y2\n\tfnstsw\t%0",
7521 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7522 "fucom%z2\t%y2\n\tfnstsw\t%0",
7523 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7525 "ficom%z2\t%y2\n\tfnstsw\t%0",
7526 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7530 "fcomi\t{%y1, %0|%0, %y1}",
7531 "fcomip\t{%y1, %0|%0, %y1}",
7532 "fucomi\t{%y1, %0|%0, %y1}",
7533 "fucomip\t{%y1, %0|%0, %y1}",
7544 mask = eflags_p << 3;
7545 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7546 mask |= unordered_p << 1;
7547 mask |= stack_top_dies;
7549 gcc_assert (mask < 16);
7558 ix86_output_addr_vec_elt (FILE *file, int value)
7560 const char *directive = ASM_LONG;
7564 directive = ASM_QUAD;
7566 gcc_assert (!TARGET_64BIT);
7569 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7573 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7576 fprintf (file, "%s%s%d-%s%d\n",
7577 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7578 else if (HAVE_AS_GOTOFF_IN_DATA)
7579 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7581 else if (TARGET_MACHO)
7583 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7584 machopic_output_function_base_name (file);
7585 fprintf(file, "\n");
7589 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7590 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7593 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7597 ix86_expand_clear (rtx dest)
7601 /* We play register width games, which are only valid after reload. */
7602 gcc_assert (reload_completed);
7604 /* Avoid HImode and its attendant prefix byte. */
7605 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7606 dest = gen_rtx_REG (SImode, REGNO (dest));
7608 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7610 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7611 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7613 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7614 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7620 /* X is an unchanging MEM. If it is a constant pool reference, return
7621 the constant pool rtx, else NULL. */
7624 maybe_get_pool_constant (rtx x)
7626 x = ix86_delegitimize_address (XEXP (x, 0));
7628 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7629 return get_pool_constant (x);
7635 ix86_expand_move (enum machine_mode mode, rtx operands[])
7637 int strict = (reload_in_progress || reload_completed);
7639 enum tls_model model;
7644 if (GET_CODE (op1) == SYMBOL_REF)
7646 model = SYMBOL_REF_TLS_MODEL (op1);
7649 op1 = legitimize_tls_address (op1, model, true);
7650 op1 = force_operand (op1, op0);
7655 else if (GET_CODE (op1) == CONST
7656 && GET_CODE (XEXP (op1, 0)) == PLUS
7657 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7659 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7662 rtx addend = XEXP (XEXP (op1, 0), 1);
7663 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7664 op1 = force_operand (op1, NULL);
7665 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7666 op0, 1, OPTAB_DIRECT);
7672 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7677 rtx temp = ((reload_in_progress
7678 || ((op0 && GET_CODE (op0) == REG)
7680 ? op0 : gen_reg_rtx (Pmode));
7681 op1 = machopic_indirect_data_reference (op1, temp);
7682 op1 = machopic_legitimize_pic_address (op1, mode,
7683 temp == op1 ? 0 : temp);
7685 else if (MACHOPIC_INDIRECT)
7686 op1 = machopic_indirect_data_reference (op1, 0);
7690 if (GET_CODE (op0) == MEM)
7691 op1 = force_reg (Pmode, op1);
7693 op1 = legitimize_address (op1, op1, Pmode);
7694 #endif /* TARGET_MACHO */
7698 if (GET_CODE (op0) == MEM
7699 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7700 || !push_operand (op0, mode))
7701 && GET_CODE (op1) == MEM)
7702 op1 = force_reg (mode, op1);
7704 if (push_operand (op0, mode)
7705 && ! general_no_elim_operand (op1, mode))
7706 op1 = copy_to_mode_reg (mode, op1);
7708 /* Force large constants in 64bit compilation into register
7709 to get them CSEed. */
7710 if (TARGET_64BIT && mode == DImode
7711 && immediate_operand (op1, mode)
7712 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7713 && !register_operand (op0, mode)
7714 && optimize && !reload_completed && !reload_in_progress)
7715 op1 = copy_to_mode_reg (mode, op1);
7717 if (FLOAT_MODE_P (mode))
7719 /* If we are loading a floating point constant to a register,
7720 force the value to memory now, since we'll get better code
7721 out the back end. */
7725 else if (GET_CODE (op1) == CONST_DOUBLE)
7727 op1 = validize_mem (force_const_mem (mode, op1));
7728 if (!register_operand (op0, mode))
7730 rtx temp = gen_reg_rtx (mode);
7731 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7732 emit_move_insn (op0, temp);
7739 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7743 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7745 rtx op0 = operands[0], op1 = operands[1];
7747 /* Force constants other than zero into memory. We do not know how
7748 the instructions used to build constants modify the upper 64 bits
7749 of the register, once we have that information we may be able
7750 to handle some of them more efficiently. */
7751 if ((reload_in_progress | reload_completed) == 0
7752 && register_operand (op0, mode)
7753 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7754 op1 = validize_mem (force_const_mem (mode, op1));
7756 /* Make operand1 a register if it isn't already. */
7758 && !register_operand (op0, mode)
7759 && !register_operand (op1, mode))
7761 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7765 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7768 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7769 straight to ix86_expand_vector_move. */
7772 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7781 /* If we're optimizing for size, movups is the smallest. */
7784 op0 = gen_lowpart (V4SFmode, op0);
7785 op1 = gen_lowpart (V4SFmode, op1);
7786 emit_insn (gen_sse_movups (op0, op1));
7790 /* ??? If we have typed data, then it would appear that using
7791 movdqu is the only way to get unaligned data loaded with
7793 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7795 op0 = gen_lowpart (V16QImode, op0);
7796 op1 = gen_lowpart (V16QImode, op1);
7797 emit_insn (gen_sse2_movdqu (op0, op1));
7801 if (TARGET_SSE2 && mode == V2DFmode)
7805 /* When SSE registers are split into halves, we can avoid
7806 writing to the top half twice. */
7807 if (TARGET_SSE_SPLIT_REGS)
7809 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7814 /* ??? Not sure about the best option for the Intel chips.
7815 The following would seem to satisfy; the register is
7816 entirely cleared, breaking the dependency chain. We
7817 then store to the upper half, with a dependency depth
7818 of one. A rumor has it that Intel recommends two movsd
7819 followed by an unpacklpd, but this is unconfirmed. And
7820 given that the dependency depth of the unpacklpd would
7821 still be one, I'm not sure why this would be better. */
7822 zero = CONST0_RTX (V2DFmode);
7825 m = adjust_address (op1, DFmode, 0);
7826 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7827 m = adjust_address (op1, DFmode, 8);
7828 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7832 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7833 emit_move_insn (op0, CONST0_RTX (mode));
7835 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7837 if (mode != V4SFmode)
7838 op0 = gen_lowpart (V4SFmode, op0);
7839 m = adjust_address (op1, V2SFmode, 0);
7840 emit_insn (gen_sse_loadlps (op0, op0, m));
7841 m = adjust_address (op1, V2SFmode, 8);
7842 emit_insn (gen_sse_loadhps (op0, op0, m));
7845 else if (MEM_P (op0))
7847 /* If we're optimizing for size, movups is the smallest. */
7850 op0 = gen_lowpart (V4SFmode, op0);
7851 op1 = gen_lowpart (V4SFmode, op1);
7852 emit_insn (gen_sse_movups (op0, op1));
7856 /* ??? Similar to above, only less clear because of quote
7857 typeless stores unquote. */
7858 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7859 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7861 op0 = gen_lowpart (V16QImode, op0);
7862 op1 = gen_lowpart (V16QImode, op1);
7863 emit_insn (gen_sse2_movdqu (op0, op1));
7867 if (TARGET_SSE2 && mode == V2DFmode)
7869 m = adjust_address (op0, DFmode, 0);
7870 emit_insn (gen_sse2_storelpd (m, op1));
7871 m = adjust_address (op0, DFmode, 8);
7872 emit_insn (gen_sse2_storehpd (m, op1));
7876 if (mode != V4SFmode)
7877 op1 = gen_lowpart (V4SFmode, op1);
7878 m = adjust_address (op0, V2SFmode, 0);
7879 emit_insn (gen_sse_storelps (m, op1));
7880 m = adjust_address (op0, V2SFmode, 8);
7881 emit_insn (gen_sse_storehps (m, op1));
7888 /* Expand a push in MODE. This is some mode for which we do not support
7889 proper push instructions, at least from the registers that we expect
7890 the value to live in. */
7893 ix86_expand_push (enum machine_mode mode, rtx x)
7897 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
7898 GEN_INT (-GET_MODE_SIZE (mode)),
7899 stack_pointer_rtx, 1, OPTAB_DIRECT);
7900 if (tmp != stack_pointer_rtx)
7901 emit_move_insn (stack_pointer_rtx, tmp);
7903 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
7904 emit_move_insn (tmp, x);
7907 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7908 destination to use for the operation. If different from the true
7909 destination in operands[0], a copy operation will be required. */
7912 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7915 int matching_memory;
7916 rtx src1, src2, dst;
7922 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7923 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7924 && (rtx_equal_p (dst, src2)
7925 || immediate_operand (src1, mode)))
7932 /* If the destination is memory, and we do not have matching source
7933 operands, do things in registers. */
7934 matching_memory = 0;
7935 if (GET_CODE (dst) == MEM)
7937 if (rtx_equal_p (dst, src1))
7938 matching_memory = 1;
7939 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7940 && rtx_equal_p (dst, src2))
7941 matching_memory = 2;
7943 dst = gen_reg_rtx (mode);
7946 /* Both source operands cannot be in memory. */
7947 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7949 if (matching_memory != 2)
7950 src2 = force_reg (mode, src2);
7952 src1 = force_reg (mode, src1);
7955 /* If the operation is not commutable, source 1 cannot be a constant
7956 or non-matching memory. */
7957 if ((CONSTANT_P (src1)
7958 || (!matching_memory && GET_CODE (src1) == MEM))
7959 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7960 src1 = force_reg (mode, src1);
7962 /* If optimizing, copy to regs to improve CSE */
7963 if (optimize && ! no_new_pseudos)
7965 if (GET_CODE (dst) == MEM)
7966 dst = gen_reg_rtx (mode);
7967 if (GET_CODE (src1) == MEM)
7968 src1 = force_reg (mode, src1);
7969 if (GET_CODE (src2) == MEM)
7970 src2 = force_reg (mode, src2);
7973 src1 = operands[1] = src1;
7974 src2 = operands[2] = src2;
7978 /* Similarly, but assume that the destination has already been
7982 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7983 enum machine_mode mode, rtx operands[])
7985 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7986 gcc_assert (dst == operands[0]);
7989 /* Attempt to expand a binary operator. Make the expansion closer to the
7990 actual machine, then just general_operand, which will allow 3 separate
7991 memory references (one output, two input) in a single insn. */
7994 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7997 rtx src1, src2, dst, op, clob;
7999 dst = ix86_fixup_binary_operands (code, mode, operands);
8003 /* Emit the instruction. */
8005 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8006 if (reload_in_progress)
8008 /* Reload doesn't know about the flags register, and doesn't know that
8009 it doesn't want to clobber it. We can only do this with PLUS. */
8010 gcc_assert (code == PLUS);
8015 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8016 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8019 /* Fix up the destination if needed. */
8020 if (dst != operands[0])
8021 emit_move_insn (operands[0], dst);
8024 /* Return TRUE or FALSE depending on whether the binary operator meets the
8025 appropriate constraints. */
8028 ix86_binary_operator_ok (enum rtx_code code,
8029 enum machine_mode mode ATTRIBUTE_UNUSED,
8032 /* Both source operands cannot be in memory. */
8033 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8035 /* If the operation is not commutable, source 1 cannot be a constant. */
8036 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8038 /* If the destination is memory, we must have a matching source operand. */
8039 if (GET_CODE (operands[0]) == MEM
8040 && ! (rtx_equal_p (operands[0], operands[1])
8041 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8042 && rtx_equal_p (operands[0], operands[2]))))
8044 /* If the operation is not commutable and the source 1 is memory, we must
8045 have a matching destination. */
8046 if (GET_CODE (operands[1]) == MEM
8047 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8048 && ! rtx_equal_p (operands[0], operands[1]))
8053 /* Attempt to expand a unary operator. Make the expansion closer to the
8054 actual machine, then just general_operand, which will allow 2 separate
8055 memory references (one output, one input) in a single insn. */
8058 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8061 int matching_memory;
8062 rtx src, dst, op, clob;
8067 /* If the destination is memory, and we do not have matching source
8068 operands, do things in registers. */
8069 matching_memory = 0;
8072 if (rtx_equal_p (dst, src))
8073 matching_memory = 1;
8075 dst = gen_reg_rtx (mode);
8078 /* When source operand is memory, destination must match. */
8079 if (MEM_P (src) && !matching_memory)
8080 src = force_reg (mode, src);
8082 /* If optimizing, copy to regs to improve CSE. */
8083 if (optimize && ! no_new_pseudos)
8085 if (GET_CODE (dst) == MEM)
8086 dst = gen_reg_rtx (mode);
8087 if (GET_CODE (src) == MEM)
8088 src = force_reg (mode, src);
8091 /* Emit the instruction. */
8093 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8094 if (reload_in_progress || code == NOT)
8096 /* Reload doesn't know about the flags register, and doesn't know that
8097 it doesn't want to clobber it. */
8098 gcc_assert (code == NOT);
8103 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8104 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8107 /* Fix up the destination if needed. */
8108 if (dst != operands[0])
8109 emit_move_insn (operands[0], dst);
8112 /* Return TRUE or FALSE depending on whether the unary operator meets the
8113 appropriate constraints. */
8116 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8117 enum machine_mode mode ATTRIBUTE_UNUSED,
8118 rtx operands[2] ATTRIBUTE_UNUSED)
8120 /* If one of operands is memory, source and destination must match. */
8121 if ((GET_CODE (operands[0]) == MEM
8122 || GET_CODE (operands[1]) == MEM)
8123 && ! rtx_equal_p (operands[0], operands[1]))
8128 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8129 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8130 true, then replicate the mask for all elements of the vector register.
8131 If INVERT is true, then create a mask excluding the sign bit. */
8134 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8136 enum machine_mode vec_mode;
8137 HOST_WIDE_INT hi, lo;
8142 /* Find the sign bit, sign extended to 2*HWI. */
8144 lo = 0x80000000, hi = lo < 0;
8145 else if (HOST_BITS_PER_WIDE_INT >= 64)
8146 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8148 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8153 /* Force this value into the low part of a fp vector constant. */
8154 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8155 mask = gen_lowpart (mode, mask);
8160 v = gen_rtvec (4, mask, mask, mask, mask);
8162 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8163 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8164 vec_mode = V4SFmode;
8169 v = gen_rtvec (2, mask, mask);
8171 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8172 vec_mode = V2DFmode;
8175 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8178 /* Generate code for floating point ABS or NEG. */
8181 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8184 rtx mask, set, use, clob, dst, src;
8185 bool matching_memory;
8186 bool use_sse = false;
8187 bool vector_mode = VECTOR_MODE_P (mode);
8188 enum machine_mode elt_mode = mode;
8192 elt_mode = GET_MODE_INNER (mode);
8195 else if (TARGET_SSE_MATH)
8196 use_sse = SSE_FLOAT_MODE_P (mode);
8198 /* NEG and ABS performed with SSE use bitwise mask operations.
8199 Create the appropriate mask now. */
8201 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8204 /* When not using SSE, we don't use the mask, but prefer to keep the
8205 same general form of the insn pattern to reduce duplication when
8206 it comes time to split. */
8213 /* If the destination is memory, and we don't have matching source
8214 operands, do things in registers. */
8215 matching_memory = false;
8218 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8219 matching_memory = true;
8221 dst = gen_reg_rtx (mode);
8223 if (MEM_P (src) && !matching_memory)
8224 src = force_reg (mode, src);
8228 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8229 set = gen_rtx_SET (VOIDmode, dst, set);
8234 set = gen_rtx_fmt_e (code, mode, src);
8235 set = gen_rtx_SET (VOIDmode, dst, set);
8236 use = gen_rtx_USE (VOIDmode, mask);
8237 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8238 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8241 if (dst != operands[0])
8242 emit_move_insn (operands[0], dst);
8245 /* Expand a copysign operation. Special case operand 0 being a constant. */
8248 ix86_expand_copysign (rtx operands[])
8250 enum machine_mode mode, vmode;
8251 rtx dest, op0, op1, mask, nmask;
8257 mode = GET_MODE (dest);
8258 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8260 if (GET_CODE (op0) == CONST_DOUBLE)
8264 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8265 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8267 if (op0 == CONST0_RTX (mode))
8268 op0 = CONST0_RTX (vmode);
8272 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8273 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8275 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8276 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8279 mask = ix86_build_signbit_mask (mode, 0, 0);
8282 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8284 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8288 nmask = ix86_build_signbit_mask (mode, 0, 1);
8289 mask = ix86_build_signbit_mask (mode, 0, 0);
8292 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8294 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8298 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8299 be a constant, and so has already been expanded into a vector constant. */
8302 ix86_split_copysign_const (rtx operands[])
8304 enum machine_mode mode, vmode;
8305 rtx dest, op0, op1, mask, x;
8312 mode = GET_MODE (dest);
8313 vmode = GET_MODE (mask);
8315 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8316 x = gen_rtx_AND (vmode, dest, mask);
8317 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8319 if (op0 != CONST0_RTX (vmode))
8321 x = gen_rtx_IOR (vmode, dest, op0);
8322 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8326 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8327 so we have to do two masks. */
8330 ix86_split_copysign_var (rtx operands[])
8332 enum machine_mode mode, vmode;
8333 rtx dest, scratch, op0, op1, mask, nmask, x;
8336 scratch = operands[1];
8339 nmask = operands[4];
8342 mode = GET_MODE (dest);
8343 vmode = GET_MODE (mask);
8345 if (rtx_equal_p (op0, op1))
8347 /* Shouldn't happen often (it's useless, obviously), but when it does
8348 we'd generate incorrect code if we continue below. */
8349 emit_move_insn (dest, op0);
8353 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8355 gcc_assert (REGNO (op1) == REGNO (scratch));
8357 x = gen_rtx_AND (vmode, scratch, mask);
8358 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8361 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8362 x = gen_rtx_NOT (vmode, dest);
8363 x = gen_rtx_AND (vmode, x, op0);
8364 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8368 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8370 x = gen_rtx_AND (vmode, scratch, mask);
8372 else /* alternative 2,4 */
8374 gcc_assert (REGNO (mask) == REGNO (scratch));
8375 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8376 x = gen_rtx_AND (vmode, scratch, op1);
8378 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8380 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8382 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8383 x = gen_rtx_AND (vmode, dest, nmask);
8385 else /* alternative 3,4 */
8387 gcc_assert (REGNO (nmask) == REGNO (dest));
8389 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8390 x = gen_rtx_AND (vmode, dest, op0);
8392 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8395 x = gen_rtx_IOR (vmode, dest, scratch);
8396 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8399 /* Return TRUE or FALSE depending on whether the first SET in INSN
8400 has source and destination with matching CC modes, and that the
8401 CC mode is at least as constrained as REQ_MODE. */
8404 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8407 enum machine_mode set_mode;
8409 set = PATTERN (insn);
8410 if (GET_CODE (set) == PARALLEL)
8411 set = XVECEXP (set, 0, 0);
8412 gcc_assert (GET_CODE (set) == SET);
8413 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
8415 set_mode = GET_MODE (SET_DEST (set));
8419 if (req_mode != CCNOmode
8420 && (req_mode != CCmode
8421 || XEXP (SET_SRC (set), 1) != const0_rtx))
8425 if (req_mode == CCGCmode)
8429 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8433 if (req_mode == CCZmode)
8443 return (GET_MODE (SET_SRC (set)) == set_mode);
8446 /* Generate insn patterns to do an integer compare of OPERANDS. */
8449 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8451 enum machine_mode cmpmode;
8454 cmpmode = SELECT_CC_MODE (code, op0, op1);
8455 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8457 /* This is very simple, but making the interface the same as in the
8458 FP case makes the rest of the code easier. */
8459 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8460 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8462 /* Return the test that should be put into the flags user, i.e.
8463 the bcc, scc, or cmov instruction. */
8464 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8467 /* Figure out whether to use ordered or unordered fp comparisons.
8468 Return the appropriate mode to use. */
8471 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8473 /* ??? In order to make all comparisons reversible, we do all comparisons
8474 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8475 all forms trapping and nontrapping comparisons, we can make inequality
8476 comparisons trapping again, since it results in better code when using
8477 FCOM based compares. */
8478 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8482 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8484 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8485 return ix86_fp_compare_mode (code);
8488 /* Only zero flag is needed. */
8490 case NE: /* ZF!=0 */
8492 /* Codes needing carry flag. */
8493 case GEU: /* CF=0 */
8494 case GTU: /* CF=0 & ZF=0 */
8495 case LTU: /* CF=1 */
8496 case LEU: /* CF=1 | ZF=1 */
8498 /* Codes possibly doable only with sign flag when
8499 comparing against zero. */
8500 case GE: /* SF=OF or SF=0 */
8501 case LT: /* SF<>OF or SF=1 */
8502 if (op1 == const0_rtx)
8505 /* For other cases Carry flag is not required. */
8507 /* Codes doable only with sign flag when comparing
8508 against zero, but we miss jump instruction for it
8509 so we need to use relational tests against overflow
8510 that thus needs to be zero. */
8511 case GT: /* ZF=0 & SF=OF */
8512 case LE: /* ZF=1 | SF<>OF */
8513 if (op1 == const0_rtx)
8517 /* strcmp pattern do (use flags) and combine may ask us for proper
8526 /* Return the fixed registers used for condition codes. */
8529 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8536 /* If two condition code modes are compatible, return a condition code
8537 mode which is compatible with both. Otherwise, return
8540 static enum machine_mode
8541 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8546 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8549 if ((m1 == CCGCmode && m2 == CCGOCmode)
8550 || (m1 == CCGOCmode && m2 == CCGCmode))
8578 /* These are only compatible with themselves, which we already
8584 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8587 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8589 enum rtx_code swapped_code = swap_condition (code);
8590 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8591 || (ix86_fp_comparison_cost (swapped_code)
8592 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8595 /* Swap, force into registers, or otherwise massage the two operands
8596 to a fp comparison. The operands are updated in place; the new
8597 comparison code is returned. */
8599 static enum rtx_code
8600 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8602 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8603 rtx op0 = *pop0, op1 = *pop1;
8604 enum machine_mode op_mode = GET_MODE (op0);
8605 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8607 /* All of the unordered compare instructions only work on registers.
8608 The same is true of the fcomi compare instructions. The same is
8609 true of the XFmode compare instructions if not comparing with
8610 zero (ftst insn is used in this case). */
8613 && (fpcmp_mode == CCFPUmode
8614 || (op_mode == XFmode
8615 && ! (standard_80387_constant_p (op0) == 1
8616 || standard_80387_constant_p (op1) == 1))
8617 || ix86_use_fcomi_compare (code)))
8619 op0 = force_reg (op_mode, op0);
8620 op1 = force_reg (op_mode, op1);
8624 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8625 things around if they appear profitable, otherwise force op0
8628 if (standard_80387_constant_p (op0) == 0
8629 || (GET_CODE (op0) == MEM
8630 && ! (standard_80387_constant_p (op1) == 0
8631 || GET_CODE (op1) == MEM)))
8634 tmp = op0, op0 = op1, op1 = tmp;
8635 code = swap_condition (code);
8638 if (GET_CODE (op0) != REG)
8639 op0 = force_reg (op_mode, op0);
8641 if (CONSTANT_P (op1))
8643 int tmp = standard_80387_constant_p (op1);
8645 op1 = validize_mem (force_const_mem (op_mode, op1));
8649 op1 = force_reg (op_mode, op1);
8652 op1 = force_reg (op_mode, op1);
8656 /* Try to rearrange the comparison to make it cheaper. */
8657 if (ix86_fp_comparison_cost (code)
8658 > ix86_fp_comparison_cost (swap_condition (code))
8659 && (GET_CODE (op1) == REG || !no_new_pseudos))
8662 tmp = op0, op0 = op1, op1 = tmp;
8663 code = swap_condition (code);
8664 if (GET_CODE (op0) != REG)
8665 op0 = force_reg (op_mode, op0);
8673 /* Convert comparison codes we use to represent FP comparison to integer
8674 code that will result in proper branch. Return UNKNOWN if no such code
8678 ix86_fp_compare_code_to_integer (enum rtx_code code)
8707 /* Split comparison code CODE into comparisons we can do using branch
8708 instructions. BYPASS_CODE is comparison code for branch that will
8709 branch around FIRST_CODE and SECOND_CODE. If some of branches
8710 is not required, set value to UNKNOWN.
8711 We never require more than two branches. */
8714 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8715 enum rtx_code *first_code,
8716 enum rtx_code *second_code)
8719 *bypass_code = UNKNOWN;
8720 *second_code = UNKNOWN;
8722 /* The fcomi comparison sets flags as follows:
8732 case GT: /* GTU - CF=0 & ZF=0 */
8733 case GE: /* GEU - CF=0 */
8734 case ORDERED: /* PF=0 */
8735 case UNORDERED: /* PF=1 */
8736 case UNEQ: /* EQ - ZF=1 */
8737 case UNLT: /* LTU - CF=1 */
8738 case UNLE: /* LEU - CF=1 | ZF=1 */
8739 case LTGT: /* EQ - ZF=0 */
8741 case LT: /* LTU - CF=1 - fails on unordered */
8743 *bypass_code = UNORDERED;
8745 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8747 *bypass_code = UNORDERED;
8749 case EQ: /* EQ - ZF=1 - fails on unordered */
8751 *bypass_code = UNORDERED;
8753 case NE: /* NE - ZF=0 - fails on unordered */
8755 *second_code = UNORDERED;
8757 case UNGE: /* GEU - CF=0 - fails on unordered */
8759 *second_code = UNORDERED;
8761 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8763 *second_code = UNORDERED;
8768 if (!TARGET_IEEE_FP)
8770 *second_code = UNKNOWN;
8771 *bypass_code = UNKNOWN;
8775 /* Return cost of comparison done fcom + arithmetics operations on AX.
8776 All following functions do use number of instructions as a cost metrics.
8777 In future this should be tweaked to compute bytes for optimize_size and
8778 take into account performance of various instructions on various CPUs. */
8780 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8782 if (!TARGET_IEEE_FP)
8784 /* The cost of code output by ix86_expand_fp_compare. */
8812 /* Return cost of comparison done using fcomi operation.
8813 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8815 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8817 enum rtx_code bypass_code, first_code, second_code;
8818 /* Return arbitrarily high cost when instruction is not supported - this
8819 prevents gcc from using it. */
8822 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8823 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8826 /* Return cost of comparison done using sahf operation.
8827 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8829 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8831 enum rtx_code bypass_code, first_code, second_code;
8832 /* Return arbitrarily high cost when instruction is not preferred - this
8833 avoids gcc from using it. */
8834 if (!TARGET_USE_SAHF && !optimize_size)
8836 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8837 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8840 /* Compute cost of the comparison done using any method.
8841 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8843 ix86_fp_comparison_cost (enum rtx_code code)
8845 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8848 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8849 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8851 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8852 if (min > sahf_cost)
8854 if (min > fcomi_cost)
8859 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8862 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8863 rtx *second_test, rtx *bypass_test)
8865 enum machine_mode fpcmp_mode, intcmp_mode;
8867 int cost = ix86_fp_comparison_cost (code);
8868 enum rtx_code bypass_code, first_code, second_code;
8870 fpcmp_mode = ix86_fp_compare_mode (code);
8871 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8874 *second_test = NULL_RTX;
8876 *bypass_test = NULL_RTX;
8878 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8880 /* Do fcomi/sahf based test when profitable. */
8881 if ((bypass_code == UNKNOWN || bypass_test)
8882 && (second_code == UNKNOWN || second_test)
8883 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8887 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8888 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8894 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8895 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8897 scratch = gen_reg_rtx (HImode);
8898 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8899 emit_insn (gen_x86_sahf_1 (scratch));
8902 /* The FP codes work out to act like unsigned. */
8903 intcmp_mode = fpcmp_mode;
8905 if (bypass_code != UNKNOWN)
8906 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8907 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8909 if (second_code != UNKNOWN)
8910 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8911 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8916 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8917 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8918 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8920 scratch = gen_reg_rtx (HImode);
8921 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8923 /* In the unordered case, we have to check C2 for NaN's, which
8924 doesn't happen to work out to anything nice combination-wise.
8925 So do some bit twiddling on the value we've got in AH to come
8926 up with an appropriate set of condition codes. */
8928 intcmp_mode = CCNOmode;
8933 if (code == GT || !TARGET_IEEE_FP)
8935 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8940 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8941 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8942 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8943 intcmp_mode = CCmode;
8949 if (code == LT && TARGET_IEEE_FP)
8951 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8952 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8953 intcmp_mode = CCmode;
8958 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8964 if (code == GE || !TARGET_IEEE_FP)
8966 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8971 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8972 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8979 if (code == LE && TARGET_IEEE_FP)
8981 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8982 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8983 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8984 intcmp_mode = CCmode;
8989 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8995 if (code == EQ && TARGET_IEEE_FP)
8997 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8998 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8999 intcmp_mode = CCmode;
9004 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9011 if (code == NE && TARGET_IEEE_FP)
9013 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9014 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9020 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9026 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9030 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9039 /* Return the test that should be put into the flags user, i.e.
9040 the bcc, scc, or cmov instruction. */
9041 return gen_rtx_fmt_ee (code, VOIDmode,
9042 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9047 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9050 op0 = ix86_compare_op0;
9051 op1 = ix86_compare_op1;
9054 *second_test = NULL_RTX;
9056 *bypass_test = NULL_RTX;
9058 if (ix86_compare_emitted)
9060 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
9061 ix86_compare_emitted = NULL_RTX;
9063 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9064 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9065 second_test, bypass_test);
9067 ret = ix86_expand_int_compare (code, op0, op1);
9072 /* Return true if the CODE will result in nontrivial jump sequence. */
9074 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9076 enum rtx_code bypass_code, first_code, second_code;
9079 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9080 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9084 ix86_expand_branch (enum rtx_code code, rtx label)
9088 switch (GET_MODE (ix86_compare_op0))
9094 tmp = ix86_expand_compare (code, NULL, NULL);
9095 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9096 gen_rtx_LABEL_REF (VOIDmode, label),
9098 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9107 enum rtx_code bypass_code, first_code, second_code;
9109 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9112 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9114 /* Check whether we will use the natural sequence with one jump. If
9115 so, we can expand jump early. Otherwise delay expansion by
9116 creating compound insn to not confuse optimizers. */
9117 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9120 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9121 gen_rtx_LABEL_REF (VOIDmode, label),
9122 pc_rtx, NULL_RTX, NULL_RTX);
9126 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9127 ix86_compare_op0, ix86_compare_op1);
9128 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9129 gen_rtx_LABEL_REF (VOIDmode, label),
9131 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9133 use_fcomi = ix86_use_fcomi_compare (code);
9134 vec = rtvec_alloc (3 + !use_fcomi);
9135 RTVEC_ELT (vec, 0) = tmp;
9137 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9139 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9142 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9144 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9152 /* Expand DImode branch into multiple compare+branch. */
9154 rtx lo[2], hi[2], label2;
9155 enum rtx_code code1, code2, code3;
9157 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9159 tmp = ix86_compare_op0;
9160 ix86_compare_op0 = ix86_compare_op1;
9161 ix86_compare_op1 = tmp;
9162 code = swap_condition (code);
9164 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9165 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9167 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9168 avoid two branches. This costs one extra insn, so disable when
9169 optimizing for size. */
9171 if ((code == EQ || code == NE)
9173 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9178 if (hi[1] != const0_rtx)
9179 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9180 NULL_RTX, 0, OPTAB_WIDEN);
9183 if (lo[1] != const0_rtx)
9184 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9185 NULL_RTX, 0, OPTAB_WIDEN);
9187 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9188 NULL_RTX, 0, OPTAB_WIDEN);
9190 ix86_compare_op0 = tmp;
9191 ix86_compare_op1 = const0_rtx;
9192 ix86_expand_branch (code, label);
9196 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9197 op1 is a constant and the low word is zero, then we can just
9198 examine the high word. */
9200 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9203 case LT: case LTU: case GE: case GEU:
9204 ix86_compare_op0 = hi[0];
9205 ix86_compare_op1 = hi[1];
9206 ix86_expand_branch (code, label);
9212 /* Otherwise, we need two or three jumps. */
9214 label2 = gen_label_rtx ();
9217 code2 = swap_condition (code);
9218 code3 = unsigned_condition (code);
9222 case LT: case GT: case LTU: case GTU:
9225 case LE: code1 = LT; code2 = GT; break;
9226 case GE: code1 = GT; code2 = LT; break;
9227 case LEU: code1 = LTU; code2 = GTU; break;
9228 case GEU: code1 = GTU; code2 = LTU; break;
9230 case EQ: code1 = UNKNOWN; code2 = NE; break;
9231 case NE: code2 = UNKNOWN; break;
9239 * if (hi(a) < hi(b)) goto true;
9240 * if (hi(a) > hi(b)) goto false;
9241 * if (lo(a) < lo(b)) goto true;
9245 ix86_compare_op0 = hi[0];
9246 ix86_compare_op1 = hi[1];
9248 if (code1 != UNKNOWN)
9249 ix86_expand_branch (code1, label);
9250 if (code2 != UNKNOWN)
9251 ix86_expand_branch (code2, label2);
9253 ix86_compare_op0 = lo[0];
9254 ix86_compare_op1 = lo[1];
9255 ix86_expand_branch (code3, label);
9257 if (code2 != UNKNOWN)
9258 emit_label (label2);
9267 /* Split branch based on floating point condition. */
9269 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9270 rtx target1, rtx target2, rtx tmp, rtx pushed)
9273 rtx label = NULL_RTX;
9275 int bypass_probability = -1, second_probability = -1, probability = -1;
9278 if (target2 != pc_rtx)
9281 code = reverse_condition_maybe_unordered (code);
9286 condition = ix86_expand_fp_compare (code, op1, op2,
9287 tmp, &second, &bypass);
9289 /* Remove pushed operand from stack. */
9291 ix86_free_from_memory (GET_MODE (pushed));
9293 if (split_branch_probability >= 0)
9295 /* Distribute the probabilities across the jumps.
9296 Assume the BYPASS and SECOND to be always test
9298 probability = split_branch_probability;
9300 /* Value of 1 is low enough to make no need for probability
9301 to be updated. Later we may run some experiments and see
9302 if unordered values are more frequent in practice. */
9304 bypass_probability = 1;
9306 second_probability = 1;
9308 if (bypass != NULL_RTX)
9310 label = gen_label_rtx ();
9311 i = emit_jump_insn (gen_rtx_SET
9313 gen_rtx_IF_THEN_ELSE (VOIDmode,
9315 gen_rtx_LABEL_REF (VOIDmode,
9318 if (bypass_probability >= 0)
9320 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9321 GEN_INT (bypass_probability),
9324 i = emit_jump_insn (gen_rtx_SET
9326 gen_rtx_IF_THEN_ELSE (VOIDmode,
9327 condition, target1, target2)));
9328 if (probability >= 0)
9330 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9331 GEN_INT (probability),
9333 if (second != NULL_RTX)
9335 i = emit_jump_insn (gen_rtx_SET
9337 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9339 if (second_probability >= 0)
9341 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9342 GEN_INT (second_probability),
9345 if (label != NULL_RTX)
9350 ix86_expand_setcc (enum rtx_code code, rtx dest)
9352 rtx ret, tmp, tmpreg, equiv;
9353 rtx second_test, bypass_test;
9355 if (GET_MODE (ix86_compare_op0) == DImode
9357 return 0; /* FAIL */
9359 gcc_assert (GET_MODE (dest) == QImode);
9361 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9362 PUT_MODE (ret, QImode);
9367 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9368 if (bypass_test || second_test)
9370 rtx test = second_test;
9372 rtx tmp2 = gen_reg_rtx (QImode);
9375 gcc_assert (!second_test);
9378 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9380 PUT_MODE (test, QImode);
9381 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9384 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9386 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9389 /* Attach a REG_EQUAL note describing the comparison result. */
9390 if (ix86_compare_op0 && ix86_compare_op1)
9392 equiv = simplify_gen_relational (code, QImode,
9393 GET_MODE (ix86_compare_op0),
9394 ix86_compare_op0, ix86_compare_op1);
9395 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9398 return 1; /* DONE */
9401 /* Expand comparison setting or clearing carry flag. Return true when
9402 successful and set pop for the operation. */
9404 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9406 enum machine_mode mode =
9407 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9409 /* Do not handle DImode compares that go trought special path. Also we can't
9410 deal with FP compares yet. This is possible to add. */
9411 if ((mode == DImode && !TARGET_64BIT))
9413 if (FLOAT_MODE_P (mode))
9415 rtx second_test = NULL, bypass_test = NULL;
9416 rtx compare_op, compare_seq;
9418 /* Shortcut: following common codes never translate into carry flag compares. */
9419 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9420 || code == ORDERED || code == UNORDERED)
9423 /* These comparisons require zero flag; swap operands so they won't. */
9424 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9430 code = swap_condition (code);
9433 /* Try to expand the comparison and verify that we end up with carry flag
9434 based comparison. This is fails to be true only when we decide to expand
9435 comparison using arithmetic that is not too common scenario. */
9437 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9438 &second_test, &bypass_test);
9439 compare_seq = get_insns ();
9442 if (second_test || bypass_test)
9444 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9445 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9446 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9448 code = GET_CODE (compare_op);
9449 if (code != LTU && code != GEU)
9451 emit_insn (compare_seq);
9455 if (!INTEGRAL_MODE_P (mode))
9463 /* Convert a==0 into (unsigned)a<1. */
9466 if (op1 != const0_rtx)
9469 code = (code == EQ ? LTU : GEU);
9472 /* Convert a>b into b<a or a>=b-1. */
9475 if (GET_CODE (op1) == CONST_INT)
9477 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9478 /* Bail out on overflow. We still can swap operands but that
9479 would force loading of the constant into register. */
9480 if (op1 == const0_rtx
9481 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9483 code = (code == GTU ? GEU : LTU);
9490 code = (code == GTU ? LTU : GEU);
9494 /* Convert a>=0 into (unsigned)a<0x80000000. */
9497 if (mode == DImode || op1 != const0_rtx)
9499 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9500 code = (code == LT ? GEU : LTU);
9504 if (mode == DImode || op1 != constm1_rtx)
9506 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9507 code = (code == LE ? GEU : LTU);
9513 /* Swapping operands may cause constant to appear as first operand. */
9514 if (!nonimmediate_operand (op0, VOIDmode))
9518 op0 = force_reg (mode, op0);
9520 ix86_compare_op0 = op0;
9521 ix86_compare_op1 = op1;
9522 *pop = ix86_expand_compare (code, NULL, NULL);
9523 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
9528 ix86_expand_int_movcc (rtx operands[])
9530 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9531 rtx compare_seq, compare_op;
9532 rtx second_test, bypass_test;
9533 enum machine_mode mode = GET_MODE (operands[0]);
9534 bool sign_bit_compare_p = false;;
9537 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9538 compare_seq = get_insns ();
9541 compare_code = GET_CODE (compare_op);
9543 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9544 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9545 sign_bit_compare_p = true;
9547 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9548 HImode insns, we'd be swallowed in word prefix ops. */
9550 if ((mode != HImode || TARGET_FAST_PREFIX)
9551 && (mode != DImode || TARGET_64BIT)
9552 && GET_CODE (operands[2]) == CONST_INT
9553 && GET_CODE (operands[3]) == CONST_INT)
9555 rtx out = operands[0];
9556 HOST_WIDE_INT ct = INTVAL (operands[2]);
9557 HOST_WIDE_INT cf = INTVAL (operands[3]);
9561 /* Sign bit compares are better done using shifts than we do by using
9563 if (sign_bit_compare_p
9564 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9565 ix86_compare_op1, &compare_op))
9567 /* Detect overlap between destination and compare sources. */
9570 if (!sign_bit_compare_p)
9574 compare_code = GET_CODE (compare_op);
9576 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9577 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9580 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9583 /* To simplify rest of code, restrict to the GEU case. */
9584 if (compare_code == LTU)
9586 HOST_WIDE_INT tmp = ct;
9589 compare_code = reverse_condition (compare_code);
9590 code = reverse_condition (code);
9595 PUT_CODE (compare_op,
9596 reverse_condition_maybe_unordered
9597 (GET_CODE (compare_op)));
9599 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9603 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9604 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9605 tmp = gen_reg_rtx (mode);
9608 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9610 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9614 if (code == GT || code == GE)
9615 code = reverse_condition (code);
9618 HOST_WIDE_INT tmp = ct;
9623 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9624 ix86_compare_op1, VOIDmode, 0, -1);
9637 tmp = expand_simple_binop (mode, PLUS,
9639 copy_rtx (tmp), 1, OPTAB_DIRECT);
9650 tmp = expand_simple_binop (mode, IOR,
9652 copy_rtx (tmp), 1, OPTAB_DIRECT);
9654 else if (diff == -1 && ct)
9664 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9666 tmp = expand_simple_binop (mode, PLUS,
9667 copy_rtx (tmp), GEN_INT (cf),
9668 copy_rtx (tmp), 1, OPTAB_DIRECT);
9676 * andl cf - ct, dest
9686 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9689 tmp = expand_simple_binop (mode, AND,
9691 gen_int_mode (cf - ct, mode),
9692 copy_rtx (tmp), 1, OPTAB_DIRECT);
9694 tmp = expand_simple_binop (mode, PLUS,
9695 copy_rtx (tmp), GEN_INT (ct),
9696 copy_rtx (tmp), 1, OPTAB_DIRECT);
9699 if (!rtx_equal_p (tmp, out))
9700 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9702 return 1; /* DONE */
9708 tmp = ct, ct = cf, cf = tmp;
9710 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9712 /* We may be reversing unordered compare to normal compare, that
9713 is not valid in general (we may convert non-trapping condition
9714 to trapping one), however on i386 we currently emit all
9715 comparisons unordered. */
9716 compare_code = reverse_condition_maybe_unordered (compare_code);
9717 code = reverse_condition_maybe_unordered (code);
9721 compare_code = reverse_condition (compare_code);
9722 code = reverse_condition (code);
9726 compare_code = UNKNOWN;
9727 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9728 && GET_CODE (ix86_compare_op1) == CONST_INT)
9730 if (ix86_compare_op1 == const0_rtx
9731 && (code == LT || code == GE))
9732 compare_code = code;
9733 else if (ix86_compare_op1 == constm1_rtx)
9737 else if (code == GT)
9742 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9743 if (compare_code != UNKNOWN
9744 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9745 && (cf == -1 || ct == -1))
9747 /* If lea code below could be used, only optimize
9748 if it results in a 2 insn sequence. */
9750 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9751 || diff == 3 || diff == 5 || diff == 9)
9752 || (compare_code == LT && ct == -1)
9753 || (compare_code == GE && cf == -1))
9756 * notl op1 (if necessary)
9764 code = reverse_condition (code);
9767 out = emit_store_flag (out, code, ix86_compare_op0,
9768 ix86_compare_op1, VOIDmode, 0, -1);
9770 out = expand_simple_binop (mode, IOR,
9772 out, 1, OPTAB_DIRECT);
9773 if (out != operands[0])
9774 emit_move_insn (operands[0], out);
9776 return 1; /* DONE */
9781 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9782 || diff == 3 || diff == 5 || diff == 9)
9783 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9785 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9791 * lea cf(dest*(ct-cf)),dest
9795 * This also catches the degenerate setcc-only case.
9801 out = emit_store_flag (out, code, ix86_compare_op0,
9802 ix86_compare_op1, VOIDmode, 0, 1);
9805 /* On x86_64 the lea instruction operates on Pmode, so we need
9806 to get arithmetics done in proper mode to match. */
9808 tmp = copy_rtx (out);
9812 out1 = copy_rtx (out);
9813 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9817 tmp = gen_rtx_PLUS (mode, tmp, out1);
9823 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9826 if (!rtx_equal_p (tmp, out))
9829 out = force_operand (tmp, copy_rtx (out));
9831 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9833 if (!rtx_equal_p (out, operands[0]))
9834 emit_move_insn (operands[0], copy_rtx (out));
9836 return 1; /* DONE */
9840 * General case: Jumpful:
9841 * xorl dest,dest cmpl op1, op2
9842 * cmpl op1, op2 movl ct, dest
9844 * decl dest movl cf, dest
9845 * andl (cf-ct),dest 1:
9850 * This is reasonably steep, but branch mispredict costs are
9851 * high on modern cpus, so consider failing only if optimizing
9855 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9856 && BRANCH_COST >= 2)
9862 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9863 /* We may be reversing unordered compare to normal compare,
9864 that is not valid in general (we may convert non-trapping
9865 condition to trapping one), however on i386 we currently
9866 emit all comparisons unordered. */
9867 code = reverse_condition_maybe_unordered (code);
9870 code = reverse_condition (code);
9871 if (compare_code != UNKNOWN)
9872 compare_code = reverse_condition (compare_code);
9876 if (compare_code != UNKNOWN)
9878 /* notl op1 (if needed)
9883 For x < 0 (resp. x <= -1) there will be no notl,
9884 so if possible swap the constants to get rid of the
9886 True/false will be -1/0 while code below (store flag
9887 followed by decrement) is 0/-1, so the constants need
9888 to be exchanged once more. */
9890 if (compare_code == GE || !cf)
9892 code = reverse_condition (code);
9897 HOST_WIDE_INT tmp = cf;
9902 out = emit_store_flag (out, code, ix86_compare_op0,
9903 ix86_compare_op1, VOIDmode, 0, -1);
9907 out = emit_store_flag (out, code, ix86_compare_op0,
9908 ix86_compare_op1, VOIDmode, 0, 1);
9910 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9911 copy_rtx (out), 1, OPTAB_DIRECT);
9914 out = expand_simple_binop (mode, AND, copy_rtx (out),
9915 gen_int_mode (cf - ct, mode),
9916 copy_rtx (out), 1, OPTAB_DIRECT);
9918 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9919 copy_rtx (out), 1, OPTAB_DIRECT);
9920 if (!rtx_equal_p (out, operands[0]))
9921 emit_move_insn (operands[0], copy_rtx (out));
9923 return 1; /* DONE */
9927 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9929 /* Try a few things more with specific constants and a variable. */
9932 rtx var, orig_out, out, tmp;
9934 if (BRANCH_COST <= 2)
9935 return 0; /* FAIL */
9937 /* If one of the two operands is an interesting constant, load a
9938 constant with the above and mask it in with a logical operation. */
9940 if (GET_CODE (operands[2]) == CONST_INT)
9943 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9944 operands[3] = constm1_rtx, op = and_optab;
9945 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9946 operands[3] = const0_rtx, op = ior_optab;
9948 return 0; /* FAIL */
9950 else if (GET_CODE (operands[3]) == CONST_INT)
9953 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9954 operands[2] = constm1_rtx, op = and_optab;
9955 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9956 operands[2] = const0_rtx, op = ior_optab;
9958 return 0; /* FAIL */
9961 return 0; /* FAIL */
9963 orig_out = operands[0];
9964 tmp = gen_reg_rtx (mode);
9967 /* Recurse to get the constant loaded. */
9968 if (ix86_expand_int_movcc (operands) == 0)
9969 return 0; /* FAIL */
9971 /* Mask in the interesting variable. */
9972 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9974 if (!rtx_equal_p (out, orig_out))
9975 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9977 return 1; /* DONE */
9981 * For comparison with above,
9991 if (! nonimmediate_operand (operands[2], mode))
9992 operands[2] = force_reg (mode, operands[2]);
9993 if (! nonimmediate_operand (operands[3], mode))
9994 operands[3] = force_reg (mode, operands[3]);
9996 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9998 rtx tmp = gen_reg_rtx (mode);
9999 emit_move_insn (tmp, operands[3]);
10002 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10004 rtx tmp = gen_reg_rtx (mode);
10005 emit_move_insn (tmp, operands[2]);
10009 if (! register_operand (operands[2], VOIDmode)
10011 || ! register_operand (operands[3], VOIDmode)))
10012 operands[2] = force_reg (mode, operands[2]);
10015 && ! register_operand (operands[3], VOIDmode))
10016 operands[3] = force_reg (mode, operands[3]);
10018 emit_insn (compare_seq);
10019 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10020 gen_rtx_IF_THEN_ELSE (mode,
10021 compare_op, operands[2],
10024 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10025 gen_rtx_IF_THEN_ELSE (mode,
10027 copy_rtx (operands[3]),
10028 copy_rtx (operands[0]))));
10030 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10031 gen_rtx_IF_THEN_ELSE (mode,
10033 copy_rtx (operands[2]),
10034 copy_rtx (operands[0]))));
10036 return 1; /* DONE */
10039 /* Swap, force into registers, or otherwise massage the two operands
10040 to an sse comparison with a mask result. Thus we differ a bit from
10041 ix86_prepare_fp_compare_args which expects to produce a flags result.
10043 The DEST operand exists to help determine whether to commute commutative
10044 operators. The POP0/POP1 operands are updated in place. The new
10045 comparison code is returned, or UNKNOWN if not implementable. */
10047 static enum rtx_code
10048 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
10049 rtx *pop0, rtx *pop1)
10057 /* We have no LTGT as an operator. We could implement it with
10058 NE & ORDERED, but this requires an extra temporary. It's
10059 not clear that it's worth it. */
10066 /* These are supported directly. */
10073 /* For commutative operators, try to canonicalize the destination
10074 operand to be first in the comparison - this helps reload to
10075 avoid extra moves. */
10076 if (!dest || !rtx_equal_p (dest, *pop1))
10084 /* These are not supported directly. Swap the comparison operands
10085 to transform into something that is supported. */
10089 code = swap_condition (code);
10093 gcc_unreachable ();
10099 /* Detect conditional moves that exactly match min/max operational
10100 semantics. Note that this is IEEE safe, as long as we don't
10101 interchange the operands.
10103 Returns FALSE if this conditional move doesn't match a MIN/MAX,
10104 and TRUE if the operation is successful and instructions are emitted. */
10107 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
10108 rtx cmp_op1, rtx if_true, rtx if_false)
10110 enum machine_mode mode;
10116 else if (code == UNGE)
10119 if_true = if_false;
10125 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
10127 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
10132 mode = GET_MODE (dest);
10134 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10135 but MODE may be a vector mode and thus not appropriate. */
10136 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
10138 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
10141 if_true = force_reg (mode, if_true);
10142 v = gen_rtvec (2, if_true, if_false);
10143 tmp = gen_rtx_UNSPEC (mode, v, u);
10147 code = is_min ? SMIN : SMAX;
10148 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
10151 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
10155 /* Expand an sse vector comparison. Return the register with the result. */
10158 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
10159 rtx op_true, rtx op_false)
10161 enum machine_mode mode = GET_MODE (dest);
10164 cmp_op0 = force_reg (mode, cmp_op0);
10165 if (!nonimmediate_operand (cmp_op1, mode))
10166 cmp_op1 = force_reg (mode, cmp_op1);
10169 || reg_overlap_mentioned_p (dest, op_true)
10170 || reg_overlap_mentioned_p (dest, op_false))
10171 dest = gen_reg_rtx (mode);
10173 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10174 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10179 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10180 operations. This is used for both scalar and vector conditional moves. */
10183 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
10185 enum machine_mode mode = GET_MODE (dest);
10188 if (op_false == CONST0_RTX (mode))
10190 op_true = force_reg (mode, op_true);
10191 x = gen_rtx_AND (mode, cmp, op_true);
10192 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10194 else if (op_true == CONST0_RTX (mode))
10196 op_false = force_reg (mode, op_false);
10197 x = gen_rtx_NOT (mode, cmp);
10198 x = gen_rtx_AND (mode, x, op_false);
10199 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10203 op_true = force_reg (mode, op_true);
10204 op_false = force_reg (mode, op_false);
10206 t2 = gen_reg_rtx (mode);
10208 t3 = gen_reg_rtx (mode);
10212 x = gen_rtx_AND (mode, op_true, cmp);
10213 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
10215 x = gen_rtx_NOT (mode, cmp);
10216 x = gen_rtx_AND (mode, x, op_false);
10217 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
10219 x = gen_rtx_IOR (mode, t3, t2);
10220 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10224 /* Expand a floating-point conditional move. Return true if successful. */
10227 ix86_expand_fp_movcc (rtx operands[])
10229 enum machine_mode mode = GET_MODE (operands[0]);
10230 enum rtx_code code = GET_CODE (operands[1]);
10231 rtx tmp, compare_op, second_test, bypass_test;
10233 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10235 enum machine_mode cmode;
10237 /* Since we've no cmove for sse registers, don't force bad register
10238 allocation just to gain access to it. Deny movcc when the
10239 comparison mode doesn't match the move mode. */
10240 cmode = GET_MODE (ix86_compare_op0);
10241 if (cmode == VOIDmode)
10242 cmode = GET_MODE (ix86_compare_op1);
10246 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10248 &ix86_compare_op1);
10249 if (code == UNKNOWN)
10252 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
10253 ix86_compare_op1, operands[2],
10257 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
10258 ix86_compare_op1, operands[2], operands[3]);
10259 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
10263 /* The floating point conditional move instructions don't directly
10264 support conditions resulting from a signed integer comparison. */
10266 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10268 /* The floating point conditional move instructions don't directly
10269 support signed integer comparisons. */
10271 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10273 gcc_assert (!second_test && !bypass_test);
10274 tmp = gen_reg_rtx (QImode);
10275 ix86_expand_setcc (code, tmp);
10277 ix86_compare_op0 = tmp;
10278 ix86_compare_op1 = const0_rtx;
10279 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10281 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10283 tmp = gen_reg_rtx (mode);
10284 emit_move_insn (tmp, operands[3]);
10287 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10289 tmp = gen_reg_rtx (mode);
10290 emit_move_insn (tmp, operands[2]);
10294 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10295 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10296 operands[2], operands[3])));
10298 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10299 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10300 operands[3], operands[0])));
10302 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10303 gen_rtx_IF_THEN_ELSE (mode, second_test,
10304 operands[2], operands[0])));
10309 /* Expand a floating-point vector conditional move; a vcond operation
10310 rather than a movcc operation. */
10313 ix86_expand_fp_vcond (rtx operands[])
10315 enum rtx_code code = GET_CODE (operands[3]);
10318 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10319 &operands[4], &operands[5]);
10320 if (code == UNKNOWN)
10323 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
10324 operands[5], operands[1], operands[2]))
10327 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10328 operands[1], operands[2]);
10329 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
10333 /* Expand a signed integral vector conditional move. */
10336 ix86_expand_int_vcond (rtx operands[], bool unsignedp)
10338 enum machine_mode mode = GET_MODE (operands[0]);
10339 enum rtx_code code = GET_CODE (operands[3]);
10343 code = signed_condition (code);
10344 if (code == NE || code == LE || code == GE)
10346 /* Inverse of a supported code. */
10348 operands[1] = operands[2];
10350 code = reverse_condition (code);
10354 /* Swap of a supported code. */
10356 operands[4] = operands[5];
10358 code = swap_condition (code);
10360 gcc_assert (code == EQ || code == GT);
10362 /* Unlike floating-point, we can rely on the optimizers to have already
10363 converted to MIN/MAX expressions, so we don't have to handle that. */
10365 /* Unsigned GT is not directly supported. We can zero-extend QI and
10366 HImode elements to the next wider element size, use a signed compare,
10367 then repack. For three extra instructions, this is definitely a win. */
10368 if (code == GT && unsignedp)
10370 rtx o0l, o0h, o1l, o1h, cl, ch, zero;
10371 enum machine_mode wider;
10372 rtx (*unpackl) (rtx, rtx, rtx);
10373 rtx (*unpackh) (rtx, rtx, rtx);
10374 rtx (*pack) (rtx, rtx, rtx);
10380 unpackl = gen_sse2_punpcklbw;
10381 unpackh = gen_sse2_punpckhbw;
10382 pack = gen_sse2_packsswb;
10386 unpackl = gen_sse2_punpcklwd;
10387 unpackh = gen_sse2_punpckhwd;
10388 pack = gen_sse2_packssdw;
10391 gcc_unreachable ();
10394 operands[4] = force_reg (mode, operands[4]);
10395 operands[5] = force_reg (mode, operands[5]);
10397 o0l = gen_reg_rtx (wider);
10398 o0h = gen_reg_rtx (wider);
10399 o1l = gen_reg_rtx (wider);
10400 o1h = gen_reg_rtx (wider);
10401 cl = gen_reg_rtx (wider);
10402 ch = gen_reg_rtx (wider);
10403 cmp = gen_reg_rtx (mode);
10404 zero = force_reg (mode, CONST0_RTX (mode));
10406 emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero));
10407 emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero));
10408 emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero));
10409 emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero));
10411 x = gen_rtx_GT (wider, o0l, o1l);
10412 emit_insn (gen_rtx_SET (VOIDmode, cl, x));
10414 x = gen_rtx_GT (wider, o0h, o1h);
10415 emit_insn (gen_rtx_SET (VOIDmode, ch, x));
10417 emit_insn (pack (cmp, cl, ch));
10420 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10421 operands[1], operands[2]);
10423 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
10427 /* Expand conditional increment or decrement using adb/sbb instructions.
10428 The default case using setcc followed by the conditional move can be
10429 done by generic code. */
10431 ix86_expand_int_addcc (rtx operands[])
10433 enum rtx_code code = GET_CODE (operands[1]);
10435 rtx val = const0_rtx;
10436 bool fpcmp = false;
10437 enum machine_mode mode = GET_MODE (operands[0]);
10439 if (operands[3] != const1_rtx
10440 && operands[3] != constm1_rtx)
10442 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10443 ix86_compare_op1, &compare_op))
10445 code = GET_CODE (compare_op);
10447 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10448 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10451 code = ix86_fp_compare_code_to_integer (code);
10458 PUT_CODE (compare_op,
10459 reverse_condition_maybe_unordered
10460 (GET_CODE (compare_op)));
10462 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10464 PUT_MODE (compare_op, mode);
10466 /* Construct either adc or sbb insn. */
10467 if ((code == LTU) == (operands[3] == constm1_rtx))
10469 switch (GET_MODE (operands[0]))
10472 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10475 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10478 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10481 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10484 gcc_unreachable ();
10489 switch (GET_MODE (operands[0]))
10492 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10495 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10498 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10501 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10504 gcc_unreachable ();
10507 return 1; /* DONE */
10511 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10512 works for floating pointer parameters and nonoffsetable memories.
10513 For pushes, it returns just stack offsets; the values will be saved
10514 in the right order. Maximally three parts are generated. */
10517 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10522 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10524 size = (GET_MODE_SIZE (mode) + 4) / 8;
10526 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
10527 gcc_assert (size >= 2 && size <= 3);
10529 /* Optimize constant pool reference to immediates. This is used by fp
10530 moves, that force all constants to memory to allow combining. */
10531 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10533 rtx tmp = maybe_get_pool_constant (operand);
10538 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10540 /* The only non-offsetable memories we handle are pushes. */
10541 int ok = push_operand (operand, VOIDmode);
10545 operand = copy_rtx (operand);
10546 PUT_MODE (operand, Pmode);
10547 parts[0] = parts[1] = parts[2] = operand;
10551 if (GET_CODE (operand) == CONST_VECTOR)
10553 enum machine_mode imode = int_mode_for_mode (mode);
10554 /* Caution: if we looked through a constant pool memory above,
10555 the operand may actually have a different mode now. That's
10556 ok, since we want to pun this all the way back to an integer. */
10557 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
10558 gcc_assert (operand != NULL);
10564 if (mode == DImode)
10565 split_di (&operand, 1, &parts[0], &parts[1]);
10568 if (REG_P (operand))
10570 gcc_assert (reload_completed);
10571 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10572 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10574 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10576 else if (offsettable_memref_p (operand))
10578 operand = adjust_address (operand, SImode, 0);
10579 parts[0] = operand;
10580 parts[1] = adjust_address (operand, SImode, 4);
10582 parts[2] = adjust_address (operand, SImode, 8);
10584 else if (GET_CODE (operand) == CONST_DOUBLE)
10589 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10593 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10594 parts[2] = gen_int_mode (l[2], SImode);
10597 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10600 gcc_unreachable ();
10602 parts[1] = gen_int_mode (l[1], SImode);
10603 parts[0] = gen_int_mode (l[0], SImode);
10606 gcc_unreachable ();
10611 if (mode == TImode)
10612 split_ti (&operand, 1, &parts[0], &parts[1]);
10613 if (mode == XFmode || mode == TFmode)
10615 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10616 if (REG_P (operand))
10618 gcc_assert (reload_completed);
10619 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10620 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10622 else if (offsettable_memref_p (operand))
10624 operand = adjust_address (operand, DImode, 0);
10625 parts[0] = operand;
10626 parts[1] = adjust_address (operand, upper_mode, 8);
10628 else if (GET_CODE (operand) == CONST_DOUBLE)
10633 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10634 real_to_target (l, &r, mode);
10636 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10637 if (HOST_BITS_PER_WIDE_INT >= 64)
10640 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10641 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10644 parts[0] = immed_double_const (l[0], l[1], DImode);
10646 if (upper_mode == SImode)
10647 parts[1] = gen_int_mode (l[2], SImode);
10648 else if (HOST_BITS_PER_WIDE_INT >= 64)
10651 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10652 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10655 parts[1] = immed_double_const (l[2], l[3], DImode);
10658 gcc_unreachable ();
10665 /* Emit insns to perform a move or push of DI, DF, and XF values.
10666 Return false when normal moves are needed; true when all required
10667 insns have been emitted. Operands 2-4 contain the input values
10668 int the correct order; operands 5-7 contain the output values. */
10671 ix86_split_long_move (rtx operands[])
10676 int collisions = 0;
10677 enum machine_mode mode = GET_MODE (operands[0]);
10679 /* The DFmode expanders may ask us to move double.
10680 For 64bit target this is single move. By hiding the fact
10681 here we simplify i386.md splitters. */
10682 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10684 /* Optimize constant pool reference to immediates. This is used by
10685 fp moves, that force all constants to memory to allow combining. */
10687 if (GET_CODE (operands[1]) == MEM
10688 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10689 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10690 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10691 if (push_operand (operands[0], VOIDmode))
10693 operands[0] = copy_rtx (operands[0]);
10694 PUT_MODE (operands[0], Pmode);
10697 operands[0] = gen_lowpart (DImode, operands[0]);
10698 operands[1] = gen_lowpart (DImode, operands[1]);
10699 emit_move_insn (operands[0], operands[1]);
10703 /* The only non-offsettable memory we handle is push. */
10704 if (push_operand (operands[0], VOIDmode))
10707 gcc_assert (GET_CODE (operands[0]) != MEM
10708 || offsettable_memref_p (operands[0]));
10710 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10711 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10713 /* When emitting push, take care for source operands on the stack. */
10714 if (push && GET_CODE (operands[1]) == MEM
10715 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10718 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10719 XEXP (part[1][2], 0));
10720 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10721 XEXP (part[1][1], 0));
10724 /* We need to do copy in the right order in case an address register
10725 of the source overlaps the destination. */
10726 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10728 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10730 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10733 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10736 /* Collision in the middle part can be handled by reordering. */
10737 if (collisions == 1 && nparts == 3
10738 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10741 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10742 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10745 /* If there are more collisions, we can't handle it by reordering.
10746 Do an lea to the last part and use only one colliding move. */
10747 else if (collisions > 1)
10753 base = part[0][nparts - 1];
10755 /* Handle the case when the last part isn't valid for lea.
10756 Happens in 64-bit mode storing the 12-byte XFmode. */
10757 if (GET_MODE (base) != Pmode)
10758 base = gen_rtx_REG (Pmode, REGNO (base));
10760 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10761 part[1][0] = replace_equiv_address (part[1][0], base);
10762 part[1][1] = replace_equiv_address (part[1][1],
10763 plus_constant (base, UNITS_PER_WORD));
10765 part[1][2] = replace_equiv_address (part[1][2],
10766 plus_constant (base, 8));
10776 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10777 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10778 emit_move_insn (part[0][2], part[1][2]);
10783 /* In 64bit mode we don't have 32bit push available. In case this is
10784 register, it is OK - we will just use larger counterpart. We also
10785 retype memory - these comes from attempt to avoid REX prefix on
10786 moving of second half of TFmode value. */
10787 if (GET_MODE (part[1][1]) == SImode)
10789 switch (GET_CODE (part[1][1]))
10792 part[1][1] = adjust_address (part[1][1], DImode, 0);
10796 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10800 gcc_unreachable ();
10803 if (GET_MODE (part[1][0]) == SImode)
10804 part[1][0] = part[1][1];
10807 emit_move_insn (part[0][1], part[1][1]);
10808 emit_move_insn (part[0][0], part[1][0]);
10812 /* Choose correct order to not overwrite the source before it is copied. */
10813 if ((REG_P (part[0][0])
10814 && REG_P (part[1][1])
10815 && (REGNO (part[0][0]) == REGNO (part[1][1])
10817 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10819 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10823 operands[2] = part[0][2];
10824 operands[3] = part[0][1];
10825 operands[4] = part[0][0];
10826 operands[5] = part[1][2];
10827 operands[6] = part[1][1];
10828 operands[7] = part[1][0];
10832 operands[2] = part[0][1];
10833 operands[3] = part[0][0];
10834 operands[5] = part[1][1];
10835 operands[6] = part[1][0];
10842 operands[2] = part[0][0];
10843 operands[3] = part[0][1];
10844 operands[4] = part[0][2];
10845 operands[5] = part[1][0];
10846 operands[6] = part[1][1];
10847 operands[7] = part[1][2];
10851 operands[2] = part[0][0];
10852 operands[3] = part[0][1];
10853 operands[5] = part[1][0];
10854 operands[6] = part[1][1];
10858 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10861 if (GET_CODE (operands[5]) == CONST_INT
10862 && operands[5] != const0_rtx
10863 && REG_P (operands[2]))
10865 if (GET_CODE (operands[6]) == CONST_INT
10866 && INTVAL (operands[6]) == INTVAL (operands[5]))
10867 operands[6] = operands[2];
10870 && GET_CODE (operands[7]) == CONST_INT
10871 && INTVAL (operands[7]) == INTVAL (operands[5]))
10872 operands[7] = operands[2];
10876 && GET_CODE (operands[6]) == CONST_INT
10877 && operands[6] != const0_rtx
10878 && REG_P (operands[3])
10879 && GET_CODE (operands[7]) == CONST_INT
10880 && INTVAL (operands[7]) == INTVAL (operands[6]))
10881 operands[7] = operands[3];
10884 emit_move_insn (operands[2], operands[5]);
10885 emit_move_insn (operands[3], operands[6]);
10887 emit_move_insn (operands[4], operands[7]);
10892 /* Helper function of ix86_split_ashldi used to generate an SImode
10893 left shift by a constant, either using a single shift or
10894 a sequence of add instructions. */
10897 ix86_expand_ashlsi3_const (rtx operand, int count)
10900 emit_insn (gen_addsi3 (operand, operand, operand));
10901 else if (!optimize_size
10902 && count * ix86_cost->add <= ix86_cost->shift_const)
10905 for (i=0; i<count; i++)
10906 emit_insn (gen_addsi3 (operand, operand, operand));
10909 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10913 ix86_split_ashldi (rtx *operands, rtx scratch)
10915 rtx low[2], high[2];
10918 if (GET_CODE (operands[2]) == CONST_INT)
10920 split_di (operands, 2, low, high);
10921 count = INTVAL (operands[2]) & 63;
10925 emit_move_insn (high[0], low[1]);
10926 emit_move_insn (low[0], const0_rtx);
10929 ix86_expand_ashlsi3_const (high[0], count - 32);
10933 if (!rtx_equal_p (operands[0], operands[1]))
10934 emit_move_insn (operands[0], operands[1]);
10935 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10936 ix86_expand_ashlsi3_const (low[0], count);
10941 split_di (operands, 1, low, high);
10943 if (operands[1] == const1_rtx)
10945 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10946 can be done with two 32-bit shifts, no branches, no cmoves. */
10947 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10949 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10951 ix86_expand_clear (low[0]);
10952 ix86_expand_clear (high[0]);
10953 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10955 d = gen_lowpart (QImode, low[0]);
10956 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10957 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10958 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10960 d = gen_lowpart (QImode, high[0]);
10961 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10962 s = gen_rtx_NE (QImode, flags, const0_rtx);
10963 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10966 /* Otherwise, we can get the same results by manually performing
10967 a bit extract operation on bit 5, and then performing the two
10968 shifts. The two methods of getting 0/1 into low/high are exactly
10969 the same size. Avoiding the shift in the bit extract case helps
10970 pentium4 a bit; no one else seems to care much either way. */
10975 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10976 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10978 x = gen_lowpart (SImode, operands[2]);
10979 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10981 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10982 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10983 emit_move_insn (low[0], high[0]);
10984 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10987 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10988 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10992 if (operands[1] == constm1_rtx)
10994 /* For -1LL << N, we can avoid the shld instruction, because we
10995 know that we're shifting 0...31 ones into a -1. */
10996 emit_move_insn (low[0], constm1_rtx);
10998 emit_move_insn (high[0], low[0]);
11000 emit_move_insn (high[0], constm1_rtx);
11004 if (!rtx_equal_p (operands[0], operands[1]))
11005 emit_move_insn (operands[0], operands[1]);
11007 split_di (operands, 1, low, high);
11008 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
11011 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
11013 if (TARGET_CMOVE && scratch)
11015 ix86_expand_clear (scratch);
11016 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
11019 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11023 ix86_split_ashrdi (rtx *operands, rtx scratch)
11025 rtx low[2], high[2];
11028 if (GET_CODE (operands[2]) == CONST_INT)
11030 split_di (operands, 2, low, high);
11031 count = INTVAL (operands[2]) & 63;
11035 emit_move_insn (high[0], high[1]);
11036 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11037 emit_move_insn (low[0], high[0]);
11040 else if (count >= 32)
11042 emit_move_insn (low[0], high[1]);
11043 emit_move_insn (high[0], low[0]);
11044 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11046 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11050 if (!rtx_equal_p (operands[0], operands[1]))
11051 emit_move_insn (operands[0], operands[1]);
11052 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11053 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11058 if (!rtx_equal_p (operands[0], operands[1]))
11059 emit_move_insn (operands[0], operands[1]);
11061 split_di (operands, 1, low, high);
11063 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11064 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11066 if (TARGET_CMOVE && scratch)
11068 emit_move_insn (scratch, high[0]);
11069 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11070 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11074 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11079 ix86_split_lshrdi (rtx *operands, rtx scratch)
11081 rtx low[2], high[2];
11084 if (GET_CODE (operands[2]) == CONST_INT)
11086 split_di (operands, 2, low, high);
11087 count = INTVAL (operands[2]) & 63;
11091 emit_move_insn (low[0], high[1]);
11092 ix86_expand_clear (high[0]);
11095 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11099 if (!rtx_equal_p (operands[0], operands[1]))
11100 emit_move_insn (operands[0], operands[1]);
11101 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11102 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11107 if (!rtx_equal_p (operands[0], operands[1]))
11108 emit_move_insn (operands[0], operands[1]);
11110 split_di (operands, 1, low, high);
11112 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11113 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11115 /* Heh. By reversing the arguments, we can reuse this pattern. */
11116 if (TARGET_CMOVE && scratch)
11118 ix86_expand_clear (scratch);
11119 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11123 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11127 /* Helper function for the string operations below. Dest VARIABLE whether
11128 it is aligned to VALUE bytes. If true, jump to the label. */
11130 ix86_expand_aligntest (rtx variable, int value)
11132 rtx label = gen_label_rtx ();
11133 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11134 if (GET_MODE (variable) == DImode)
11135 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11137 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11138 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11143 /* Adjust COUNTER by the VALUE. */
11145 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11147 if (GET_MODE (countreg) == DImode)
11148 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11150 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11153 /* Zero extend possibly SImode EXP to Pmode register. */
11155 ix86_zero_extend_to_Pmode (rtx exp)
11158 if (GET_MODE (exp) == VOIDmode)
11159 return force_reg (Pmode, exp);
11160 if (GET_MODE (exp) == Pmode)
11161 return copy_to_mode_reg (Pmode, exp);
11162 r = gen_reg_rtx (Pmode);
11163 emit_insn (gen_zero_extendsidi2 (r, exp));
11167 /* Expand string move (memcpy) operation. Use i386 string operations when
11168 profitable. expand_clrmem contains similar code. */
11170 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11172 rtx srcreg, destreg, countreg, srcexp, destexp;
11173 enum machine_mode counter_mode;
11174 HOST_WIDE_INT align = 0;
11175 unsigned HOST_WIDE_INT count = 0;
11177 if (GET_CODE (align_exp) == CONST_INT)
11178 align = INTVAL (align_exp);
11180 /* Can't use any of this if the user has appropriated esi or edi. */
11181 if (global_regs[4] || global_regs[5])
11184 /* This simple hack avoids all inlining code and simplifies code below. */
11185 if (!TARGET_ALIGN_STRINGOPS)
11188 if (GET_CODE (count_exp) == CONST_INT)
11190 count = INTVAL (count_exp);
11191 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11195 /* Figure out proper mode for counter. For 32bits it is always SImode,
11196 for 64bits use SImode when possible, otherwise DImode.
11197 Set count to number of bytes copied when known at compile time. */
11199 || GET_MODE (count_exp) == SImode
11200 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11201 counter_mode = SImode;
11203 counter_mode = DImode;
11205 gcc_assert (counter_mode == SImode || counter_mode == DImode);
11207 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11208 if (destreg != XEXP (dst, 0))
11209 dst = replace_equiv_address_nv (dst, destreg);
11210 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11211 if (srcreg != XEXP (src, 0))
11212 src = replace_equiv_address_nv (src, srcreg);
11214 /* When optimizing for size emit simple rep ; movsb instruction for
11215 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
11216 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
11217 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
11218 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
11219 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
11220 known to be zero or not. The rep; movsb sequence causes higher
11221 register pressure though, so take that into account. */
11223 if ((!optimize || optimize_size)
11228 || (count & 3) + count / 4 > 6))))
11230 emit_insn (gen_cld ());
11231 countreg = ix86_zero_extend_to_Pmode (count_exp);
11232 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11233 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11234 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11238 /* For constant aligned (or small unaligned) copies use rep movsl
11239 followed by code copying the rest. For PentiumPro ensure 8 byte
11240 alignment to allow rep movsl acceleration. */
11242 else if (count != 0
11244 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11245 || optimize_size || count < (unsigned int) 64))
11247 unsigned HOST_WIDE_INT offset = 0;
11248 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11249 rtx srcmem, dstmem;
11251 emit_insn (gen_cld ());
11252 if (count & ~(size - 1))
11254 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
11256 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
11258 while (offset < (count & ~(size - 1)))
11260 srcmem = adjust_automodify_address_nv (src, movs_mode,
11262 dstmem = adjust_automodify_address_nv (dst, movs_mode,
11264 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11270 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
11271 & (TARGET_64BIT ? -1 : 0x3fffffff));
11272 countreg = copy_to_mode_reg (counter_mode, countreg);
11273 countreg = ix86_zero_extend_to_Pmode (countreg);
11275 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11276 GEN_INT (size == 4 ? 2 : 3));
11277 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11278 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11280 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11281 countreg, destexp, srcexp));
11282 offset = count & ~(size - 1);
11285 if (size == 8 && (count & 0x04))
11287 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11289 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11291 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11296 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11298 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11300 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11305 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11307 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11309 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11312 /* The generic code based on the glibc implementation:
11313 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11314 allowing accelerated copying there)
11315 - copy the data using rep movsl
11316 - copy the rest. */
11321 rtx srcmem, dstmem;
11322 int desired_alignment = (TARGET_PENTIUMPRO
11323 && (count == 0 || count >= (unsigned int) 260)
11324 ? 8 : UNITS_PER_WORD);
11325 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11326 dst = change_address (dst, BLKmode, destreg);
11327 src = change_address (src, BLKmode, srcreg);
11329 /* In case we don't know anything about the alignment, default to
11330 library version, since it is usually equally fast and result in
11333 Also emit call when we know that the count is large and call overhead
11334 will not be important. */
11335 if (!TARGET_INLINE_ALL_STRINGOPS
11336 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11339 if (TARGET_SINGLE_STRINGOP)
11340 emit_insn (gen_cld ());
11342 countreg2 = gen_reg_rtx (Pmode);
11343 countreg = copy_to_mode_reg (counter_mode, count_exp);
11345 /* We don't use loops to align destination and to copy parts smaller
11346 than 4 bytes, because gcc is able to optimize such code better (in
11347 the case the destination or the count really is aligned, gcc is often
11348 able to predict the branches) and also it is friendlier to the
11349 hardware branch prediction.
11351 Using loops is beneficial for generic case, because we can
11352 handle small counts using the loops. Many CPUs (such as Athlon)
11353 have large REP prefix setup costs.
11355 This is quite costly. Maybe we can revisit this decision later or
11356 add some customizability to this code. */
11358 if (count == 0 && align < desired_alignment)
11360 label = gen_label_rtx ();
11361 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11362 LEU, 0, counter_mode, 1, label);
11366 rtx label = ix86_expand_aligntest (destreg, 1);
11367 srcmem = change_address (src, QImode, srcreg);
11368 dstmem = change_address (dst, QImode, destreg);
11369 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11370 ix86_adjust_counter (countreg, 1);
11371 emit_label (label);
11372 LABEL_NUSES (label) = 1;
11376 rtx label = ix86_expand_aligntest (destreg, 2);
11377 srcmem = change_address (src, HImode, srcreg);
11378 dstmem = change_address (dst, HImode, destreg);
11379 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11380 ix86_adjust_counter (countreg, 2);
11381 emit_label (label);
11382 LABEL_NUSES (label) = 1;
11384 if (align <= 4 && desired_alignment > 4)
11386 rtx label = ix86_expand_aligntest (destreg, 4);
11387 srcmem = change_address (src, SImode, srcreg);
11388 dstmem = change_address (dst, SImode, destreg);
11389 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11390 ix86_adjust_counter (countreg, 4);
11391 emit_label (label);
11392 LABEL_NUSES (label) = 1;
11395 if (label && desired_alignment > 4 && !TARGET_64BIT)
11397 emit_label (label);
11398 LABEL_NUSES (label) = 1;
11401 if (!TARGET_SINGLE_STRINGOP)
11402 emit_insn (gen_cld ());
11405 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11407 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11411 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11412 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11414 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11415 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11416 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11417 countreg2, destexp, srcexp));
11421 emit_label (label);
11422 LABEL_NUSES (label) = 1;
11424 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11426 srcmem = change_address (src, SImode, srcreg);
11427 dstmem = change_address (dst, SImode, destreg);
11428 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11430 if ((align <= 4 || count == 0) && TARGET_64BIT)
11432 rtx label = ix86_expand_aligntest (countreg, 4);
11433 srcmem = change_address (src, SImode, srcreg);
11434 dstmem = change_address (dst, SImode, destreg);
11435 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11436 emit_label (label);
11437 LABEL_NUSES (label) = 1;
11439 if (align > 2 && count != 0 && (count & 2))
11441 srcmem = change_address (src, HImode, srcreg);
11442 dstmem = change_address (dst, HImode, destreg);
11443 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11445 if (align <= 2 || count == 0)
11447 rtx label = ix86_expand_aligntest (countreg, 2);
11448 srcmem = change_address (src, HImode, srcreg);
11449 dstmem = change_address (dst, HImode, destreg);
11450 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11451 emit_label (label);
11452 LABEL_NUSES (label) = 1;
11454 if (align > 1 && count != 0 && (count & 1))
11456 srcmem = change_address (src, QImode, srcreg);
11457 dstmem = change_address (dst, QImode, destreg);
11458 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11460 if (align <= 1 || count == 0)
11462 rtx label = ix86_expand_aligntest (countreg, 1);
11463 srcmem = change_address (src, QImode, srcreg);
11464 dstmem = change_address (dst, QImode, destreg);
11465 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11466 emit_label (label);
11467 LABEL_NUSES (label) = 1;
11474 /* Expand string clear operation (bzero). Use i386 string operations when
11475 profitable. expand_movmem contains similar code. */
11477 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11479 rtx destreg, zeroreg, countreg, destexp;
11480 enum machine_mode counter_mode;
11481 HOST_WIDE_INT align = 0;
11482 unsigned HOST_WIDE_INT count = 0;
11484 if (GET_CODE (align_exp) == CONST_INT)
11485 align = INTVAL (align_exp);
11487 /* Can't use any of this if the user has appropriated esi. */
11488 if (global_regs[4])
11491 /* This simple hack avoids all inlining code and simplifies code below. */
11492 if (!TARGET_ALIGN_STRINGOPS)
11495 if (GET_CODE (count_exp) == CONST_INT)
11497 count = INTVAL (count_exp);
11498 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11501 /* Figure out proper mode for counter. For 32bits it is always SImode,
11502 for 64bits use SImode when possible, otherwise DImode.
11503 Set count to number of bytes copied when known at compile time. */
11505 || GET_MODE (count_exp) == SImode
11506 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11507 counter_mode = SImode;
11509 counter_mode = DImode;
11511 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11512 if (destreg != XEXP (dst, 0))
11513 dst = replace_equiv_address_nv (dst, destreg);
11516 /* When optimizing for size emit simple rep ; movsb instruction for
11517 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11518 sequence is 7 bytes long, so if optimizing for size and count is
11519 small enough that some stosl, stosw and stosb instructions without
11520 rep are shorter, fall back into the next if. */
11522 if ((!optimize || optimize_size)
11525 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11527 emit_insn (gen_cld ());
11529 countreg = ix86_zero_extend_to_Pmode (count_exp);
11530 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11531 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11532 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11534 else if (count != 0
11536 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11537 || optimize_size || count < (unsigned int) 64))
11539 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11540 unsigned HOST_WIDE_INT offset = 0;
11542 emit_insn (gen_cld ());
11544 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11545 if (count & ~(size - 1))
11547 unsigned HOST_WIDE_INT repcount;
11548 unsigned int max_nonrep;
11550 repcount = count >> (size == 4 ? 2 : 3);
11552 repcount &= 0x3fffffff;
11554 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11555 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11556 bytes. In both cases the latter seems to be faster for small
11558 max_nonrep = size == 4 ? 7 : 4;
11559 if (!optimize_size)
11562 case PROCESSOR_PENTIUM4:
11563 case PROCESSOR_NOCONA:
11570 if (repcount <= max_nonrep)
11571 while (repcount-- > 0)
11573 rtx mem = adjust_automodify_address_nv (dst,
11574 GET_MODE (zeroreg),
11576 emit_insn (gen_strset (destreg, mem, zeroreg));
11581 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11582 countreg = ix86_zero_extend_to_Pmode (countreg);
11583 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11584 GEN_INT (size == 4 ? 2 : 3));
11585 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11586 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11588 offset = count & ~(size - 1);
11591 if (size == 8 && (count & 0x04))
11593 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11595 emit_insn (gen_strset (destreg, mem,
11596 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11601 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11603 emit_insn (gen_strset (destreg, mem,
11604 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11609 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11611 emit_insn (gen_strset (destreg, mem,
11612 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11619 /* Compute desired alignment of the string operation. */
11620 int desired_alignment = (TARGET_PENTIUMPRO
11621 && (count == 0 || count >= (unsigned int) 260)
11622 ? 8 : UNITS_PER_WORD);
11624 /* In case we don't know anything about the alignment, default to
11625 library version, since it is usually equally fast and result in
11628 Also emit call when we know that the count is large and call overhead
11629 will not be important. */
11630 if (!TARGET_INLINE_ALL_STRINGOPS
11631 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11634 if (TARGET_SINGLE_STRINGOP)
11635 emit_insn (gen_cld ());
11637 countreg2 = gen_reg_rtx (Pmode);
11638 countreg = copy_to_mode_reg (counter_mode, count_exp);
11639 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11640 /* Get rid of MEM_OFFSET, it won't be accurate. */
11641 dst = change_address (dst, BLKmode, destreg);
11643 if (count == 0 && align < desired_alignment)
11645 label = gen_label_rtx ();
11646 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11647 LEU, 0, counter_mode, 1, label);
11651 rtx label = ix86_expand_aligntest (destreg, 1);
11652 emit_insn (gen_strset (destreg, dst,
11653 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11654 ix86_adjust_counter (countreg, 1);
11655 emit_label (label);
11656 LABEL_NUSES (label) = 1;
11660 rtx label = ix86_expand_aligntest (destreg, 2);
11661 emit_insn (gen_strset (destreg, dst,
11662 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11663 ix86_adjust_counter (countreg, 2);
11664 emit_label (label);
11665 LABEL_NUSES (label) = 1;
11667 if (align <= 4 && desired_alignment > 4)
11669 rtx label = ix86_expand_aligntest (destreg, 4);
11670 emit_insn (gen_strset (destreg, dst,
11672 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11674 ix86_adjust_counter (countreg, 4);
11675 emit_label (label);
11676 LABEL_NUSES (label) = 1;
11679 if (label && desired_alignment > 4 && !TARGET_64BIT)
11681 emit_label (label);
11682 LABEL_NUSES (label) = 1;
11686 if (!TARGET_SINGLE_STRINGOP)
11687 emit_insn (gen_cld ());
11690 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11692 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11696 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11697 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11699 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11700 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11704 emit_label (label);
11705 LABEL_NUSES (label) = 1;
11708 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11709 emit_insn (gen_strset (destreg, dst,
11710 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11711 if (TARGET_64BIT && (align <= 4 || count == 0))
11713 rtx label = ix86_expand_aligntest (countreg, 4);
11714 emit_insn (gen_strset (destreg, dst,
11715 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11716 emit_label (label);
11717 LABEL_NUSES (label) = 1;
11719 if (align > 2 && count != 0 && (count & 2))
11720 emit_insn (gen_strset (destreg, dst,
11721 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11722 if (align <= 2 || count == 0)
11724 rtx label = ix86_expand_aligntest (countreg, 2);
11725 emit_insn (gen_strset (destreg, dst,
11726 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11727 emit_label (label);
11728 LABEL_NUSES (label) = 1;
11730 if (align > 1 && count != 0 && (count & 1))
11731 emit_insn (gen_strset (destreg, dst,
11732 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11733 if (align <= 1 || count == 0)
11735 rtx label = ix86_expand_aligntest (countreg, 1);
11736 emit_insn (gen_strset (destreg, dst,
11737 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11738 emit_label (label);
11739 LABEL_NUSES (label) = 1;
11745 /* Expand strlen. */
11747 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11749 rtx addr, scratch1, scratch2, scratch3, scratch4;
11751 /* The generic case of strlen expander is long. Avoid it's
11752 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11754 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11755 && !TARGET_INLINE_ALL_STRINGOPS
11757 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11760 addr = force_reg (Pmode, XEXP (src, 0));
11761 scratch1 = gen_reg_rtx (Pmode);
11763 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11766 /* Well it seems that some optimizer does not combine a call like
11767 foo(strlen(bar), strlen(bar));
11768 when the move and the subtraction is done here. It does calculate
11769 the length just once when these instructions are done inside of
11770 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11771 often used and I use one fewer register for the lifetime of
11772 output_strlen_unroll() this is better. */
11774 emit_move_insn (out, addr);
11776 ix86_expand_strlensi_unroll_1 (out, src, align);
11778 /* strlensi_unroll_1 returns the address of the zero at the end of
11779 the string, like memchr(), so compute the length by subtracting
11780 the start address. */
11782 emit_insn (gen_subdi3 (out, out, addr));
11784 emit_insn (gen_subsi3 (out, out, addr));
11789 scratch2 = gen_reg_rtx (Pmode);
11790 scratch3 = gen_reg_rtx (Pmode);
11791 scratch4 = force_reg (Pmode, constm1_rtx);
11793 emit_move_insn (scratch3, addr);
11794 eoschar = force_reg (QImode, eoschar);
11796 emit_insn (gen_cld ());
11797 src = replace_equiv_address_nv (src, scratch3);
11799 /* If .md starts supporting :P, this can be done in .md. */
11800 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11801 scratch4), UNSPEC_SCAS);
11802 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11805 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11806 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11810 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11811 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11817 /* Expand the appropriate insns for doing strlen if not just doing
11820 out = result, initialized with the start address
11821 align_rtx = alignment of the address.
11822 scratch = scratch register, initialized with the startaddress when
11823 not aligned, otherwise undefined
11825 This is just the body. It needs the initializations mentioned above and
11826 some address computing at the end. These things are done in i386.md. */
11829 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11833 rtx align_2_label = NULL_RTX;
11834 rtx align_3_label = NULL_RTX;
11835 rtx align_4_label = gen_label_rtx ();
11836 rtx end_0_label = gen_label_rtx ();
11838 rtx tmpreg = gen_reg_rtx (SImode);
11839 rtx scratch = gen_reg_rtx (SImode);
11843 if (GET_CODE (align_rtx) == CONST_INT)
11844 align = INTVAL (align_rtx);
11846 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11848 /* Is there a known alignment and is it less than 4? */
11851 rtx scratch1 = gen_reg_rtx (Pmode);
11852 emit_move_insn (scratch1, out);
11853 /* Is there a known alignment and is it not 2? */
11856 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11857 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11859 /* Leave just the 3 lower bits. */
11860 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11861 NULL_RTX, 0, OPTAB_WIDEN);
11863 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11864 Pmode, 1, align_4_label);
11865 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11866 Pmode, 1, align_2_label);
11867 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11868 Pmode, 1, align_3_label);
11872 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11873 check if is aligned to 4 - byte. */
11875 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11876 NULL_RTX, 0, OPTAB_WIDEN);
11878 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11879 Pmode, 1, align_4_label);
11882 mem = change_address (src, QImode, out);
11884 /* Now compare the bytes. */
11886 /* Compare the first n unaligned byte on a byte per byte basis. */
11887 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11888 QImode, 1, end_0_label);
11890 /* Increment the address. */
11892 emit_insn (gen_adddi3 (out, out, const1_rtx));
11894 emit_insn (gen_addsi3 (out, out, const1_rtx));
11896 /* Not needed with an alignment of 2 */
11899 emit_label (align_2_label);
11901 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11905 emit_insn (gen_adddi3 (out, out, const1_rtx));
11907 emit_insn (gen_addsi3 (out, out, const1_rtx));
11909 emit_label (align_3_label);
11912 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11916 emit_insn (gen_adddi3 (out, out, const1_rtx));
11918 emit_insn (gen_addsi3 (out, out, const1_rtx));
11921 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11922 align this loop. It gives only huge programs, but does not help to
11924 emit_label (align_4_label);
11926 mem = change_address (src, SImode, out);
11927 emit_move_insn (scratch, mem);
11929 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11931 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11933 /* This formula yields a nonzero result iff one of the bytes is zero.
11934 This saves three branches inside loop and many cycles. */
11936 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11937 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11938 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11939 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11940 gen_int_mode (0x80808080, SImode)));
11941 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11946 rtx reg = gen_reg_rtx (SImode);
11947 rtx reg2 = gen_reg_rtx (Pmode);
11948 emit_move_insn (reg, tmpreg);
11949 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11951 /* If zero is not in the first two bytes, move two bytes forward. */
11952 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11953 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11954 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11955 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11956 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11959 /* Emit lea manually to avoid clobbering of flags. */
11960 emit_insn (gen_rtx_SET (SImode, reg2,
11961 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11963 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11964 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11965 emit_insn (gen_rtx_SET (VOIDmode, out,
11966 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11973 rtx end_2_label = gen_label_rtx ();
11974 /* Is zero in the first two bytes? */
11976 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11977 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11978 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11979 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11980 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11982 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11983 JUMP_LABEL (tmp) = end_2_label;
11985 /* Not in the first two. Move two bytes forward. */
11986 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11988 emit_insn (gen_adddi3 (out, out, const2_rtx));
11990 emit_insn (gen_addsi3 (out, out, const2_rtx));
11992 emit_label (end_2_label);
11996 /* Avoid branch in fixing the byte. */
11997 tmpreg = gen_lowpart (QImode, tmpreg);
11998 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11999 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
12001 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
12003 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
12005 emit_label (end_0_label);
12009 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
12010 rtx callarg2 ATTRIBUTE_UNUSED,
12011 rtx pop, int sibcall)
12013 rtx use = NULL, call;
12015 if (pop == const0_rtx)
12017 gcc_assert (!TARGET_64BIT || !pop);
12020 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
12021 fnaddr = machopic_indirect_call_target (fnaddr);
12023 /* Static functions and indirect calls don't need the pic register. */
12024 if (! TARGET_64BIT && flag_pic
12025 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12026 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
12027 use_reg (&use, pic_offset_table_rtx);
12029 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
12031 rtx al = gen_rtx_REG (QImode, 0);
12032 emit_move_insn (al, callarg2);
12033 use_reg (&use, al);
12035 #endif /* TARGET_MACHO */
12037 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
12039 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12040 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12042 if (sibcall && TARGET_64BIT
12043 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
12046 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12047 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12048 emit_move_insn (fnaddr, addr);
12049 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12052 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12054 call = gen_rtx_SET (VOIDmode, retval, call);
12057 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12058 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12059 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12062 call = emit_call_insn (call);
12064 CALL_INSN_FUNCTION_USAGE (call) = use;
12068 /* Clear stack slot assignments remembered from previous functions.
12069 This is called from INIT_EXPANDERS once before RTL is emitted for each
12072 static struct machine_function *
12073 ix86_init_machine_status (void)
12075 struct machine_function *f;
12077 f = ggc_alloc_cleared (sizeof (struct machine_function));
12078 f->use_fast_prologue_epilogue_nregs = -1;
12083 /* Return a MEM corresponding to a stack slot with mode MODE.
12084 Allocate a new slot if necessary.
12086 The RTL for a function can have several slots available: N is
12087 which slot to use. */
12090 assign_386_stack_local (enum machine_mode mode, int n)
12092 struct stack_local_entry *s;
12094 gcc_assert (n >= 0 && n < MAX_386_STACK_LOCALS);
12096 for (s = ix86_stack_locals; s; s = s->next)
12097 if (s->mode == mode && s->n == n)
12100 s = (struct stack_local_entry *)
12101 ggc_alloc (sizeof (struct stack_local_entry));
12104 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12106 s->next = ix86_stack_locals;
12107 ix86_stack_locals = s;
12111 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12113 static GTY(()) rtx ix86_tls_symbol;
12115 ix86_tls_get_addr (void)
12118 if (!ix86_tls_symbol)
12120 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12121 (TARGET_GNU_TLS && !TARGET_64BIT)
12122 ? "___tls_get_addr"
12123 : "__tls_get_addr");
12126 return ix86_tls_symbol;
12129 /* Calculate the length of the memory address in the instruction
12130 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12133 memory_address_length (rtx addr)
12135 struct ix86_address parts;
12136 rtx base, index, disp;
12140 if (GET_CODE (addr) == PRE_DEC
12141 || GET_CODE (addr) == POST_INC
12142 || GET_CODE (addr) == PRE_MODIFY
12143 || GET_CODE (addr) == POST_MODIFY)
12146 ok = ix86_decompose_address (addr, &parts);
12149 if (parts.base && GET_CODE (parts.base) == SUBREG)
12150 parts.base = SUBREG_REG (parts.base);
12151 if (parts.index && GET_CODE (parts.index) == SUBREG)
12152 parts.index = SUBREG_REG (parts.index);
12155 index = parts.index;
12160 - esp as the base always wants an index,
12161 - ebp as the base always wants a displacement. */
12163 /* Register Indirect. */
12164 if (base && !index && !disp)
12166 /* esp (for its index) and ebp (for its displacement) need
12167 the two-byte modrm form. */
12168 if (addr == stack_pointer_rtx
12169 || addr == arg_pointer_rtx
12170 || addr == frame_pointer_rtx
12171 || addr == hard_frame_pointer_rtx)
12175 /* Direct Addressing. */
12176 else if (disp && !base && !index)
12181 /* Find the length of the displacement constant. */
12184 if (GET_CODE (disp) == CONST_INT
12185 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12191 /* ebp always wants a displacement. */
12192 else if (base == hard_frame_pointer_rtx)
12195 /* An index requires the two-byte modrm form.... */
12197 /* ...like esp, which always wants an index. */
12198 || base == stack_pointer_rtx
12199 || base == arg_pointer_rtx
12200 || base == frame_pointer_rtx)
12207 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12208 is set, expect that insn have 8bit immediate alternative. */
12210 ix86_attr_length_immediate_default (rtx insn, int shortform)
12214 extract_insn_cached (insn);
12215 for (i = recog_data.n_operands - 1; i >= 0; --i)
12216 if (CONSTANT_P (recog_data.operand[i]))
12220 && GET_CODE (recog_data.operand[i]) == CONST_INT
12221 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12225 switch (get_attr_mode (insn))
12236 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12241 fatal_insn ("unknown insn mode", insn);
12247 /* Compute default value for "length_address" attribute. */
12249 ix86_attr_length_address_default (rtx insn)
12253 if (get_attr_type (insn) == TYPE_LEA)
12255 rtx set = PATTERN (insn);
12257 if (GET_CODE (set) == PARALLEL)
12258 set = XVECEXP (set, 0, 0);
12260 gcc_assert (GET_CODE (set) == SET);
12262 return memory_address_length (SET_SRC (set));
12265 extract_insn_cached (insn);
12266 for (i = recog_data.n_operands - 1; i >= 0; --i)
12267 if (GET_CODE (recog_data.operand[i]) == MEM)
12269 return memory_address_length (XEXP (recog_data.operand[i], 0));
12275 /* Return the maximum number of instructions a cpu can issue. */
12278 ix86_issue_rate (void)
12282 case PROCESSOR_PENTIUM:
12286 case PROCESSOR_PENTIUMPRO:
12287 case PROCESSOR_PENTIUM4:
12288 case PROCESSOR_ATHLON:
12290 case PROCESSOR_NOCONA:
12298 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12299 by DEP_INSN and nothing set by DEP_INSN. */
12302 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12306 /* Simplify the test for uninteresting insns. */
12307 if (insn_type != TYPE_SETCC
12308 && insn_type != TYPE_ICMOV
12309 && insn_type != TYPE_FCMOV
12310 && insn_type != TYPE_IBR)
12313 if ((set = single_set (dep_insn)) != 0)
12315 set = SET_DEST (set);
12318 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12319 && XVECLEN (PATTERN (dep_insn), 0) == 2
12320 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12321 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12323 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12324 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12329 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12332 /* This test is true if the dependent insn reads the flags but
12333 not any other potentially set register. */
12334 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12337 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12343 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12344 address with operands set by DEP_INSN. */
12347 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12351 if (insn_type == TYPE_LEA
12354 addr = PATTERN (insn);
12356 if (GET_CODE (addr) == PARALLEL)
12357 addr = XVECEXP (addr, 0, 0);
12359 gcc_assert (GET_CODE (addr) == SET);
12361 addr = SET_SRC (addr);
12366 extract_insn_cached (insn);
12367 for (i = recog_data.n_operands - 1; i >= 0; --i)
12368 if (GET_CODE (recog_data.operand[i]) == MEM)
12370 addr = XEXP (recog_data.operand[i], 0);
12377 return modified_in_p (addr, dep_insn);
12381 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12383 enum attr_type insn_type, dep_insn_type;
12384 enum attr_memory memory;
12386 int dep_insn_code_number;
12388 /* Anti and output dependencies have zero cost on all CPUs. */
12389 if (REG_NOTE_KIND (link) != 0)
12392 dep_insn_code_number = recog_memoized (dep_insn);
12394 /* If we can't recognize the insns, we can't really do anything. */
12395 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12398 insn_type = get_attr_type (insn);
12399 dep_insn_type = get_attr_type (dep_insn);
12403 case PROCESSOR_PENTIUM:
12404 /* Address Generation Interlock adds a cycle of latency. */
12405 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12408 /* ??? Compares pair with jump/setcc. */
12409 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12412 /* Floating point stores require value to be ready one cycle earlier. */
12413 if (insn_type == TYPE_FMOV
12414 && get_attr_memory (insn) == MEMORY_STORE
12415 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12419 case PROCESSOR_PENTIUMPRO:
12420 memory = get_attr_memory (insn);
12422 /* INT->FP conversion is expensive. */
12423 if (get_attr_fp_int_src (dep_insn))
12426 /* There is one cycle extra latency between an FP op and a store. */
12427 if (insn_type == TYPE_FMOV
12428 && (set = single_set (dep_insn)) != NULL_RTX
12429 && (set2 = single_set (insn)) != NULL_RTX
12430 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12431 && GET_CODE (SET_DEST (set2)) == MEM)
12434 /* Show ability of reorder buffer to hide latency of load by executing
12435 in parallel with previous instruction in case
12436 previous instruction is not needed to compute the address. */
12437 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12438 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12440 /* Claim moves to take one cycle, as core can issue one load
12441 at time and the next load can start cycle later. */
12442 if (dep_insn_type == TYPE_IMOV
12443 || dep_insn_type == TYPE_FMOV)
12451 memory = get_attr_memory (insn);
12453 /* The esp dependency is resolved before the instruction is really
12455 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12456 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12459 /* INT->FP conversion is expensive. */
12460 if (get_attr_fp_int_src (dep_insn))
12463 /* Show ability of reorder buffer to hide latency of load by executing
12464 in parallel with previous instruction in case
12465 previous instruction is not needed to compute the address. */
12466 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12467 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12469 /* Claim moves to take one cycle, as core can issue one load
12470 at time and the next load can start cycle later. */
12471 if (dep_insn_type == TYPE_IMOV
12472 || dep_insn_type == TYPE_FMOV)
12481 case PROCESSOR_ATHLON:
12483 memory = get_attr_memory (insn);
12485 /* Show ability of reorder buffer to hide latency of load by executing
12486 in parallel with previous instruction in case
12487 previous instruction is not needed to compute the address. */
12488 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12489 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12491 enum attr_unit unit = get_attr_unit (insn);
12494 /* Because of the difference between the length of integer and
12495 floating unit pipeline preparation stages, the memory operands
12496 for floating point are cheaper.
12498 ??? For Athlon it the difference is most probably 2. */
12499 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12502 loadcost = TARGET_ATHLON ? 2 : 0;
12504 if (cost >= loadcost)
12517 /* How many alternative schedules to try. This should be as wide as the
12518 scheduling freedom in the DFA, but no wider. Making this value too
12519 large results extra work for the scheduler. */
12522 ia32_multipass_dfa_lookahead (void)
12524 if (ix86_tune == PROCESSOR_PENTIUM)
12527 if (ix86_tune == PROCESSOR_PENTIUMPRO
12528 || ix86_tune == PROCESSOR_K6)
12536 /* Compute the alignment given to a constant that is being placed in memory.
12537 EXP is the constant and ALIGN is the alignment that the object would
12539 The value of this function is used instead of that alignment to align
12543 ix86_constant_alignment (tree exp, int align)
12545 if (TREE_CODE (exp) == REAL_CST)
12547 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12549 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12552 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12553 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12554 return BITS_PER_WORD;
12559 /* Compute the alignment for a static variable.
12560 TYPE is the data type, and ALIGN is the alignment that
12561 the object would ordinarily have. The value of this function is used
12562 instead of that alignment to align the object. */
12565 ix86_data_alignment (tree type, int align)
12567 if (AGGREGATE_TYPE_P (type)
12568 && TYPE_SIZE (type)
12569 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12570 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12571 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12574 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12575 to 16byte boundary. */
12578 if (AGGREGATE_TYPE_P (type)
12579 && TYPE_SIZE (type)
12580 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12581 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12582 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12586 if (TREE_CODE (type) == ARRAY_TYPE)
12588 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12590 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12593 else if (TREE_CODE (type) == COMPLEX_TYPE)
12596 if (TYPE_MODE (type) == DCmode && align < 64)
12598 if (TYPE_MODE (type) == XCmode && align < 128)
12601 else if ((TREE_CODE (type) == RECORD_TYPE
12602 || TREE_CODE (type) == UNION_TYPE
12603 || TREE_CODE (type) == QUAL_UNION_TYPE)
12604 && TYPE_FIELDS (type))
12606 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12608 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12611 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12612 || TREE_CODE (type) == INTEGER_TYPE)
12614 if (TYPE_MODE (type) == DFmode && align < 64)
12616 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12623 /* Compute the alignment for a local variable.
12624 TYPE is the data type, and ALIGN is the alignment that
12625 the object would ordinarily have. The value of this macro is used
12626 instead of that alignment to align the object. */
12629 ix86_local_alignment (tree type, int align)
12631 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12632 to 16byte boundary. */
12635 if (AGGREGATE_TYPE_P (type)
12636 && TYPE_SIZE (type)
12637 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12638 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12639 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12642 if (TREE_CODE (type) == ARRAY_TYPE)
12644 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12646 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12649 else if (TREE_CODE (type) == COMPLEX_TYPE)
12651 if (TYPE_MODE (type) == DCmode && align < 64)
12653 if (TYPE_MODE (type) == XCmode && align < 128)
12656 else if ((TREE_CODE (type) == RECORD_TYPE
12657 || TREE_CODE (type) == UNION_TYPE
12658 || TREE_CODE (type) == QUAL_UNION_TYPE)
12659 && TYPE_FIELDS (type))
12661 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12663 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12666 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12667 || TREE_CODE (type) == INTEGER_TYPE)
12670 if (TYPE_MODE (type) == DFmode && align < 64)
12672 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12678 /* Emit RTL insns to initialize the variable parts of a trampoline.
12679 FNADDR is an RTX for the address of the function's pure code.
12680 CXT is an RTX for the static chain value for the function. */
12682 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12686 /* Compute offset from the end of the jmp to the target function. */
12687 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12688 plus_constant (tramp, 10),
12689 NULL_RTX, 1, OPTAB_DIRECT);
12690 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12691 gen_int_mode (0xb9, QImode));
12692 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12693 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12694 gen_int_mode (0xe9, QImode));
12695 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12700 /* Try to load address using shorter movl instead of movabs.
12701 We may want to support movq for kernel mode, but kernel does not use
12702 trampolines at the moment. */
12703 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12705 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12706 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12707 gen_int_mode (0xbb41, HImode));
12708 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12709 gen_lowpart (SImode, fnaddr));
12714 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12715 gen_int_mode (0xbb49, HImode));
12716 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12720 /* Load static chain using movabs to r10. */
12721 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12722 gen_int_mode (0xba49, HImode));
12723 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12726 /* Jump to the r11 */
12727 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12728 gen_int_mode (0xff49, HImode));
12729 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12730 gen_int_mode (0xe3, QImode));
12732 gcc_assert (offset <= TRAMPOLINE_SIZE);
12735 #ifdef ENABLE_EXECUTE_STACK
12736 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12737 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12741 /* Codes for all the SSE/MMX builtins. */
12744 IX86_BUILTIN_ADDPS,
12745 IX86_BUILTIN_ADDSS,
12746 IX86_BUILTIN_DIVPS,
12747 IX86_BUILTIN_DIVSS,
12748 IX86_BUILTIN_MULPS,
12749 IX86_BUILTIN_MULSS,
12750 IX86_BUILTIN_SUBPS,
12751 IX86_BUILTIN_SUBSS,
12753 IX86_BUILTIN_CMPEQPS,
12754 IX86_BUILTIN_CMPLTPS,
12755 IX86_BUILTIN_CMPLEPS,
12756 IX86_BUILTIN_CMPGTPS,
12757 IX86_BUILTIN_CMPGEPS,
12758 IX86_BUILTIN_CMPNEQPS,
12759 IX86_BUILTIN_CMPNLTPS,
12760 IX86_BUILTIN_CMPNLEPS,
12761 IX86_BUILTIN_CMPNGTPS,
12762 IX86_BUILTIN_CMPNGEPS,
12763 IX86_BUILTIN_CMPORDPS,
12764 IX86_BUILTIN_CMPUNORDPS,
12765 IX86_BUILTIN_CMPNEPS,
12766 IX86_BUILTIN_CMPEQSS,
12767 IX86_BUILTIN_CMPLTSS,
12768 IX86_BUILTIN_CMPLESS,
12769 IX86_BUILTIN_CMPNEQSS,
12770 IX86_BUILTIN_CMPNLTSS,
12771 IX86_BUILTIN_CMPNLESS,
12772 IX86_BUILTIN_CMPNGTSS,
12773 IX86_BUILTIN_CMPNGESS,
12774 IX86_BUILTIN_CMPORDSS,
12775 IX86_BUILTIN_CMPUNORDSS,
12776 IX86_BUILTIN_CMPNESS,
12778 IX86_BUILTIN_COMIEQSS,
12779 IX86_BUILTIN_COMILTSS,
12780 IX86_BUILTIN_COMILESS,
12781 IX86_BUILTIN_COMIGTSS,
12782 IX86_BUILTIN_COMIGESS,
12783 IX86_BUILTIN_COMINEQSS,
12784 IX86_BUILTIN_UCOMIEQSS,
12785 IX86_BUILTIN_UCOMILTSS,
12786 IX86_BUILTIN_UCOMILESS,
12787 IX86_BUILTIN_UCOMIGTSS,
12788 IX86_BUILTIN_UCOMIGESS,
12789 IX86_BUILTIN_UCOMINEQSS,
12791 IX86_BUILTIN_CVTPI2PS,
12792 IX86_BUILTIN_CVTPS2PI,
12793 IX86_BUILTIN_CVTSI2SS,
12794 IX86_BUILTIN_CVTSI642SS,
12795 IX86_BUILTIN_CVTSS2SI,
12796 IX86_BUILTIN_CVTSS2SI64,
12797 IX86_BUILTIN_CVTTPS2PI,
12798 IX86_BUILTIN_CVTTSS2SI,
12799 IX86_BUILTIN_CVTTSS2SI64,
12801 IX86_BUILTIN_MAXPS,
12802 IX86_BUILTIN_MAXSS,
12803 IX86_BUILTIN_MINPS,
12804 IX86_BUILTIN_MINSS,
12806 IX86_BUILTIN_LOADUPS,
12807 IX86_BUILTIN_STOREUPS,
12808 IX86_BUILTIN_MOVSS,
12810 IX86_BUILTIN_MOVHLPS,
12811 IX86_BUILTIN_MOVLHPS,
12812 IX86_BUILTIN_LOADHPS,
12813 IX86_BUILTIN_LOADLPS,
12814 IX86_BUILTIN_STOREHPS,
12815 IX86_BUILTIN_STORELPS,
12817 IX86_BUILTIN_MASKMOVQ,
12818 IX86_BUILTIN_MOVMSKPS,
12819 IX86_BUILTIN_PMOVMSKB,
12821 IX86_BUILTIN_MOVNTPS,
12822 IX86_BUILTIN_MOVNTQ,
12824 IX86_BUILTIN_LOADDQU,
12825 IX86_BUILTIN_STOREDQU,
12827 IX86_BUILTIN_PACKSSWB,
12828 IX86_BUILTIN_PACKSSDW,
12829 IX86_BUILTIN_PACKUSWB,
12831 IX86_BUILTIN_PADDB,
12832 IX86_BUILTIN_PADDW,
12833 IX86_BUILTIN_PADDD,
12834 IX86_BUILTIN_PADDQ,
12835 IX86_BUILTIN_PADDSB,
12836 IX86_BUILTIN_PADDSW,
12837 IX86_BUILTIN_PADDUSB,
12838 IX86_BUILTIN_PADDUSW,
12839 IX86_BUILTIN_PSUBB,
12840 IX86_BUILTIN_PSUBW,
12841 IX86_BUILTIN_PSUBD,
12842 IX86_BUILTIN_PSUBQ,
12843 IX86_BUILTIN_PSUBSB,
12844 IX86_BUILTIN_PSUBSW,
12845 IX86_BUILTIN_PSUBUSB,
12846 IX86_BUILTIN_PSUBUSW,
12849 IX86_BUILTIN_PANDN,
12853 IX86_BUILTIN_PAVGB,
12854 IX86_BUILTIN_PAVGW,
12856 IX86_BUILTIN_PCMPEQB,
12857 IX86_BUILTIN_PCMPEQW,
12858 IX86_BUILTIN_PCMPEQD,
12859 IX86_BUILTIN_PCMPGTB,
12860 IX86_BUILTIN_PCMPGTW,
12861 IX86_BUILTIN_PCMPGTD,
12863 IX86_BUILTIN_PMADDWD,
12865 IX86_BUILTIN_PMAXSW,
12866 IX86_BUILTIN_PMAXUB,
12867 IX86_BUILTIN_PMINSW,
12868 IX86_BUILTIN_PMINUB,
12870 IX86_BUILTIN_PMULHUW,
12871 IX86_BUILTIN_PMULHW,
12872 IX86_BUILTIN_PMULLW,
12874 IX86_BUILTIN_PSADBW,
12875 IX86_BUILTIN_PSHUFW,
12877 IX86_BUILTIN_PSLLW,
12878 IX86_BUILTIN_PSLLD,
12879 IX86_BUILTIN_PSLLQ,
12880 IX86_BUILTIN_PSRAW,
12881 IX86_BUILTIN_PSRAD,
12882 IX86_BUILTIN_PSRLW,
12883 IX86_BUILTIN_PSRLD,
12884 IX86_BUILTIN_PSRLQ,
12885 IX86_BUILTIN_PSLLWI,
12886 IX86_BUILTIN_PSLLDI,
12887 IX86_BUILTIN_PSLLQI,
12888 IX86_BUILTIN_PSRAWI,
12889 IX86_BUILTIN_PSRADI,
12890 IX86_BUILTIN_PSRLWI,
12891 IX86_BUILTIN_PSRLDI,
12892 IX86_BUILTIN_PSRLQI,
12894 IX86_BUILTIN_PUNPCKHBW,
12895 IX86_BUILTIN_PUNPCKHWD,
12896 IX86_BUILTIN_PUNPCKHDQ,
12897 IX86_BUILTIN_PUNPCKLBW,
12898 IX86_BUILTIN_PUNPCKLWD,
12899 IX86_BUILTIN_PUNPCKLDQ,
12901 IX86_BUILTIN_SHUFPS,
12903 IX86_BUILTIN_RCPPS,
12904 IX86_BUILTIN_RCPSS,
12905 IX86_BUILTIN_RSQRTPS,
12906 IX86_BUILTIN_RSQRTSS,
12907 IX86_BUILTIN_SQRTPS,
12908 IX86_BUILTIN_SQRTSS,
12910 IX86_BUILTIN_UNPCKHPS,
12911 IX86_BUILTIN_UNPCKLPS,
12913 IX86_BUILTIN_ANDPS,
12914 IX86_BUILTIN_ANDNPS,
12916 IX86_BUILTIN_XORPS,
12919 IX86_BUILTIN_LDMXCSR,
12920 IX86_BUILTIN_STMXCSR,
12921 IX86_BUILTIN_SFENCE,
12923 /* 3DNow! Original */
12924 IX86_BUILTIN_FEMMS,
12925 IX86_BUILTIN_PAVGUSB,
12926 IX86_BUILTIN_PF2ID,
12927 IX86_BUILTIN_PFACC,
12928 IX86_BUILTIN_PFADD,
12929 IX86_BUILTIN_PFCMPEQ,
12930 IX86_BUILTIN_PFCMPGE,
12931 IX86_BUILTIN_PFCMPGT,
12932 IX86_BUILTIN_PFMAX,
12933 IX86_BUILTIN_PFMIN,
12934 IX86_BUILTIN_PFMUL,
12935 IX86_BUILTIN_PFRCP,
12936 IX86_BUILTIN_PFRCPIT1,
12937 IX86_BUILTIN_PFRCPIT2,
12938 IX86_BUILTIN_PFRSQIT1,
12939 IX86_BUILTIN_PFRSQRT,
12940 IX86_BUILTIN_PFSUB,
12941 IX86_BUILTIN_PFSUBR,
12942 IX86_BUILTIN_PI2FD,
12943 IX86_BUILTIN_PMULHRW,
12945 /* 3DNow! Athlon Extensions */
12946 IX86_BUILTIN_PF2IW,
12947 IX86_BUILTIN_PFNACC,
12948 IX86_BUILTIN_PFPNACC,
12949 IX86_BUILTIN_PI2FW,
12950 IX86_BUILTIN_PSWAPDSI,
12951 IX86_BUILTIN_PSWAPDSF,
12954 IX86_BUILTIN_ADDPD,
12955 IX86_BUILTIN_ADDSD,
12956 IX86_BUILTIN_DIVPD,
12957 IX86_BUILTIN_DIVSD,
12958 IX86_BUILTIN_MULPD,
12959 IX86_BUILTIN_MULSD,
12960 IX86_BUILTIN_SUBPD,
12961 IX86_BUILTIN_SUBSD,
12963 IX86_BUILTIN_CMPEQPD,
12964 IX86_BUILTIN_CMPLTPD,
12965 IX86_BUILTIN_CMPLEPD,
12966 IX86_BUILTIN_CMPGTPD,
12967 IX86_BUILTIN_CMPGEPD,
12968 IX86_BUILTIN_CMPNEQPD,
12969 IX86_BUILTIN_CMPNLTPD,
12970 IX86_BUILTIN_CMPNLEPD,
12971 IX86_BUILTIN_CMPNGTPD,
12972 IX86_BUILTIN_CMPNGEPD,
12973 IX86_BUILTIN_CMPORDPD,
12974 IX86_BUILTIN_CMPUNORDPD,
12975 IX86_BUILTIN_CMPNEPD,
12976 IX86_BUILTIN_CMPEQSD,
12977 IX86_BUILTIN_CMPLTSD,
12978 IX86_BUILTIN_CMPLESD,
12979 IX86_BUILTIN_CMPNEQSD,
12980 IX86_BUILTIN_CMPNLTSD,
12981 IX86_BUILTIN_CMPNLESD,
12982 IX86_BUILTIN_CMPORDSD,
12983 IX86_BUILTIN_CMPUNORDSD,
12984 IX86_BUILTIN_CMPNESD,
12986 IX86_BUILTIN_COMIEQSD,
12987 IX86_BUILTIN_COMILTSD,
12988 IX86_BUILTIN_COMILESD,
12989 IX86_BUILTIN_COMIGTSD,
12990 IX86_BUILTIN_COMIGESD,
12991 IX86_BUILTIN_COMINEQSD,
12992 IX86_BUILTIN_UCOMIEQSD,
12993 IX86_BUILTIN_UCOMILTSD,
12994 IX86_BUILTIN_UCOMILESD,
12995 IX86_BUILTIN_UCOMIGTSD,
12996 IX86_BUILTIN_UCOMIGESD,
12997 IX86_BUILTIN_UCOMINEQSD,
12999 IX86_BUILTIN_MAXPD,
13000 IX86_BUILTIN_MAXSD,
13001 IX86_BUILTIN_MINPD,
13002 IX86_BUILTIN_MINSD,
13004 IX86_BUILTIN_ANDPD,
13005 IX86_BUILTIN_ANDNPD,
13007 IX86_BUILTIN_XORPD,
13009 IX86_BUILTIN_SQRTPD,
13010 IX86_BUILTIN_SQRTSD,
13012 IX86_BUILTIN_UNPCKHPD,
13013 IX86_BUILTIN_UNPCKLPD,
13015 IX86_BUILTIN_SHUFPD,
13017 IX86_BUILTIN_LOADUPD,
13018 IX86_BUILTIN_STOREUPD,
13019 IX86_BUILTIN_MOVSD,
13021 IX86_BUILTIN_LOADHPD,
13022 IX86_BUILTIN_LOADLPD,
13024 IX86_BUILTIN_CVTDQ2PD,
13025 IX86_BUILTIN_CVTDQ2PS,
13027 IX86_BUILTIN_CVTPD2DQ,
13028 IX86_BUILTIN_CVTPD2PI,
13029 IX86_BUILTIN_CVTPD2PS,
13030 IX86_BUILTIN_CVTTPD2DQ,
13031 IX86_BUILTIN_CVTTPD2PI,
13033 IX86_BUILTIN_CVTPI2PD,
13034 IX86_BUILTIN_CVTSI2SD,
13035 IX86_BUILTIN_CVTSI642SD,
13037 IX86_BUILTIN_CVTSD2SI,
13038 IX86_BUILTIN_CVTSD2SI64,
13039 IX86_BUILTIN_CVTSD2SS,
13040 IX86_BUILTIN_CVTSS2SD,
13041 IX86_BUILTIN_CVTTSD2SI,
13042 IX86_BUILTIN_CVTTSD2SI64,
13044 IX86_BUILTIN_CVTPS2DQ,
13045 IX86_BUILTIN_CVTPS2PD,
13046 IX86_BUILTIN_CVTTPS2DQ,
13048 IX86_BUILTIN_MOVNTI,
13049 IX86_BUILTIN_MOVNTPD,
13050 IX86_BUILTIN_MOVNTDQ,
13053 IX86_BUILTIN_MASKMOVDQU,
13054 IX86_BUILTIN_MOVMSKPD,
13055 IX86_BUILTIN_PMOVMSKB128,
13057 IX86_BUILTIN_PACKSSWB128,
13058 IX86_BUILTIN_PACKSSDW128,
13059 IX86_BUILTIN_PACKUSWB128,
13061 IX86_BUILTIN_PADDB128,
13062 IX86_BUILTIN_PADDW128,
13063 IX86_BUILTIN_PADDD128,
13064 IX86_BUILTIN_PADDQ128,
13065 IX86_BUILTIN_PADDSB128,
13066 IX86_BUILTIN_PADDSW128,
13067 IX86_BUILTIN_PADDUSB128,
13068 IX86_BUILTIN_PADDUSW128,
13069 IX86_BUILTIN_PSUBB128,
13070 IX86_BUILTIN_PSUBW128,
13071 IX86_BUILTIN_PSUBD128,
13072 IX86_BUILTIN_PSUBQ128,
13073 IX86_BUILTIN_PSUBSB128,
13074 IX86_BUILTIN_PSUBSW128,
13075 IX86_BUILTIN_PSUBUSB128,
13076 IX86_BUILTIN_PSUBUSW128,
13078 IX86_BUILTIN_PAND128,
13079 IX86_BUILTIN_PANDN128,
13080 IX86_BUILTIN_POR128,
13081 IX86_BUILTIN_PXOR128,
13083 IX86_BUILTIN_PAVGB128,
13084 IX86_BUILTIN_PAVGW128,
13086 IX86_BUILTIN_PCMPEQB128,
13087 IX86_BUILTIN_PCMPEQW128,
13088 IX86_BUILTIN_PCMPEQD128,
13089 IX86_BUILTIN_PCMPGTB128,
13090 IX86_BUILTIN_PCMPGTW128,
13091 IX86_BUILTIN_PCMPGTD128,
13093 IX86_BUILTIN_PMADDWD128,
13095 IX86_BUILTIN_PMAXSW128,
13096 IX86_BUILTIN_PMAXUB128,
13097 IX86_BUILTIN_PMINSW128,
13098 IX86_BUILTIN_PMINUB128,
13100 IX86_BUILTIN_PMULUDQ,
13101 IX86_BUILTIN_PMULUDQ128,
13102 IX86_BUILTIN_PMULHUW128,
13103 IX86_BUILTIN_PMULHW128,
13104 IX86_BUILTIN_PMULLW128,
13106 IX86_BUILTIN_PSADBW128,
13107 IX86_BUILTIN_PSHUFHW,
13108 IX86_BUILTIN_PSHUFLW,
13109 IX86_BUILTIN_PSHUFD,
13111 IX86_BUILTIN_PSLLW128,
13112 IX86_BUILTIN_PSLLD128,
13113 IX86_BUILTIN_PSLLQ128,
13114 IX86_BUILTIN_PSRAW128,
13115 IX86_BUILTIN_PSRAD128,
13116 IX86_BUILTIN_PSRLW128,
13117 IX86_BUILTIN_PSRLD128,
13118 IX86_BUILTIN_PSRLQ128,
13119 IX86_BUILTIN_PSLLDQI128,
13120 IX86_BUILTIN_PSLLWI128,
13121 IX86_BUILTIN_PSLLDI128,
13122 IX86_BUILTIN_PSLLQI128,
13123 IX86_BUILTIN_PSRAWI128,
13124 IX86_BUILTIN_PSRADI128,
13125 IX86_BUILTIN_PSRLDQI128,
13126 IX86_BUILTIN_PSRLWI128,
13127 IX86_BUILTIN_PSRLDI128,
13128 IX86_BUILTIN_PSRLQI128,
13130 IX86_BUILTIN_PUNPCKHBW128,
13131 IX86_BUILTIN_PUNPCKHWD128,
13132 IX86_BUILTIN_PUNPCKHDQ128,
13133 IX86_BUILTIN_PUNPCKHQDQ128,
13134 IX86_BUILTIN_PUNPCKLBW128,
13135 IX86_BUILTIN_PUNPCKLWD128,
13136 IX86_BUILTIN_PUNPCKLDQ128,
13137 IX86_BUILTIN_PUNPCKLQDQ128,
13139 IX86_BUILTIN_CLFLUSH,
13140 IX86_BUILTIN_MFENCE,
13141 IX86_BUILTIN_LFENCE,
13143 /* Prescott New Instructions. */
13144 IX86_BUILTIN_ADDSUBPS,
13145 IX86_BUILTIN_HADDPS,
13146 IX86_BUILTIN_HSUBPS,
13147 IX86_BUILTIN_MOVSHDUP,
13148 IX86_BUILTIN_MOVSLDUP,
13149 IX86_BUILTIN_ADDSUBPD,
13150 IX86_BUILTIN_HADDPD,
13151 IX86_BUILTIN_HSUBPD,
13152 IX86_BUILTIN_LDDQU,
13154 IX86_BUILTIN_MONITOR,
13155 IX86_BUILTIN_MWAIT,
13157 IX86_BUILTIN_VEC_INIT_V2SI,
13158 IX86_BUILTIN_VEC_INIT_V4HI,
13159 IX86_BUILTIN_VEC_INIT_V8QI,
13160 IX86_BUILTIN_VEC_EXT_V2DF,
13161 IX86_BUILTIN_VEC_EXT_V2DI,
13162 IX86_BUILTIN_VEC_EXT_V4SF,
13163 IX86_BUILTIN_VEC_EXT_V4SI,
13164 IX86_BUILTIN_VEC_EXT_V8HI,
13165 IX86_BUILTIN_VEC_EXT_V2SI,
13166 IX86_BUILTIN_VEC_EXT_V4HI,
13167 IX86_BUILTIN_VEC_SET_V8HI,
13168 IX86_BUILTIN_VEC_SET_V4HI,
13173 #define def_builtin(MASK, NAME, TYPE, CODE) \
13175 if ((MASK) & target_flags \
13176 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
13177 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
13178 NULL, NULL_TREE); \
13181 /* Bits for builtin_description.flag. */
13183 /* Set when we don't support the comparison natively, and should
13184 swap_comparison in order to support it. */
13185 #define BUILTIN_DESC_SWAP_OPERANDS 1
13187 struct builtin_description
13189 const unsigned int mask;
13190 const enum insn_code icode;
13191 const char *const name;
13192 const enum ix86_builtins code;
13193 const enum rtx_code comparison;
13194 const unsigned int flag;
13197 static const struct builtin_description bdesc_comi[] =
13199 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13200 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13201 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13202 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13203 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13204 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13205 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13206 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13207 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13208 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13209 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13210 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13211 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13212 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13213 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13214 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13215 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13216 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13217 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13218 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13219 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13220 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13221 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13222 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
13225 static const struct builtin_description bdesc_2arg[] =
13228 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
13229 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
13230 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
13231 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
13232 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
13233 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
13234 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
13235 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
13237 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
13238 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
13239 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
13240 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
13241 BUILTIN_DESC_SWAP_OPERANDS },
13242 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
13243 BUILTIN_DESC_SWAP_OPERANDS },
13244 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
13245 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
13246 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
13247 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
13248 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
13249 BUILTIN_DESC_SWAP_OPERANDS },
13250 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
13251 BUILTIN_DESC_SWAP_OPERANDS },
13252 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
13253 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
13254 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
13255 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
13256 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
13257 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
13258 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
13259 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
13260 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
13261 BUILTIN_DESC_SWAP_OPERANDS },
13262 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
13263 BUILTIN_DESC_SWAP_OPERANDS },
13264 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
13266 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
13267 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
13268 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
13269 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
13271 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
13272 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
13273 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
13274 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
13276 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
13277 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
13278 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
13279 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
13280 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13283 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13284 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13285 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13286 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13287 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13288 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13289 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13290 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13292 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13293 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13294 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13295 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13296 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13297 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13298 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13299 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13301 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13302 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13303 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13305 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13306 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13307 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13308 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13310 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13311 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13313 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13314 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13315 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13316 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13317 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13318 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13320 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13321 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13322 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13323 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13325 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13326 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13327 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13328 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13329 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13330 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13333 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13334 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13335 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13337 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13338 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13339 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13341 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13342 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13343 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13344 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13345 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13346 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13348 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13349 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13350 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13351 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13352 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13353 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13355 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13356 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13357 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13358 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13360 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13361 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13364 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13365 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13366 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13367 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13368 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13369 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13370 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13371 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13373 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13374 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13375 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13376 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13377 BUILTIN_DESC_SWAP_OPERANDS },
13378 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13379 BUILTIN_DESC_SWAP_OPERANDS },
13380 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13381 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13382 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13383 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13384 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13385 BUILTIN_DESC_SWAP_OPERANDS },
13386 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13387 BUILTIN_DESC_SWAP_OPERANDS },
13388 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13389 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13390 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13391 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13392 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13393 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13394 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13395 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13396 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13398 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13399 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13400 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13401 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13403 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13404 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13405 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13406 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13408 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13409 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13410 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13413 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13414 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13415 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13416 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13417 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13418 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13419 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13420 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13422 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13423 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13424 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13425 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13426 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13427 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13428 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13429 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13431 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13432 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13434 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13435 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13436 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13437 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13439 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13440 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13442 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13443 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13444 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13445 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13446 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13447 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13449 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13450 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13451 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13452 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13454 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13455 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13456 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13457 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13458 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13459 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13460 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13461 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13463 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13464 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13465 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13467 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13468 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13470 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13471 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13473 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13474 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13475 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13477 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13478 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13479 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13481 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13482 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13484 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13486 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13487 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13488 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13489 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13492 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13493 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13494 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13495 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13496 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13497 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13500 static const struct builtin_description bdesc_1arg[] =
13502 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13503 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13505 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13506 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13507 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13509 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13510 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13511 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13512 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13513 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13514 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13516 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13517 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13519 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13521 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13522 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13524 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13525 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13526 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13527 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13528 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13530 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13532 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13533 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13534 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13535 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13537 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13538 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13539 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13542 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13543 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13547 ix86_init_builtins (void)
13550 ix86_init_mmx_sse_builtins ();
13553 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13554 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13557 ix86_init_mmx_sse_builtins (void)
13559 const struct builtin_description * d;
13562 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13563 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13564 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13565 tree V2DI_type_node
13566 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13567 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13568 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13569 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13570 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13571 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13572 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13574 tree pchar_type_node = build_pointer_type (char_type_node);
13575 tree pcchar_type_node = build_pointer_type (
13576 build_type_variant (char_type_node, 1, 0));
13577 tree pfloat_type_node = build_pointer_type (float_type_node);
13578 tree pcfloat_type_node = build_pointer_type (
13579 build_type_variant (float_type_node, 1, 0));
13580 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13581 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13582 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13585 tree int_ftype_v4sf_v4sf
13586 = build_function_type_list (integer_type_node,
13587 V4SF_type_node, V4SF_type_node, NULL_TREE);
13588 tree v4si_ftype_v4sf_v4sf
13589 = build_function_type_list (V4SI_type_node,
13590 V4SF_type_node, V4SF_type_node, NULL_TREE);
13591 /* MMX/SSE/integer conversions. */
13592 tree int_ftype_v4sf
13593 = build_function_type_list (integer_type_node,
13594 V4SF_type_node, NULL_TREE);
13595 tree int64_ftype_v4sf
13596 = build_function_type_list (long_long_integer_type_node,
13597 V4SF_type_node, NULL_TREE);
13598 tree int_ftype_v8qi
13599 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13600 tree v4sf_ftype_v4sf_int
13601 = build_function_type_list (V4SF_type_node,
13602 V4SF_type_node, integer_type_node, NULL_TREE);
13603 tree v4sf_ftype_v4sf_int64
13604 = build_function_type_list (V4SF_type_node,
13605 V4SF_type_node, long_long_integer_type_node,
13607 tree v4sf_ftype_v4sf_v2si
13608 = build_function_type_list (V4SF_type_node,
13609 V4SF_type_node, V2SI_type_node, NULL_TREE);
13611 /* Miscellaneous. */
13612 tree v8qi_ftype_v4hi_v4hi
13613 = build_function_type_list (V8QI_type_node,
13614 V4HI_type_node, V4HI_type_node, NULL_TREE);
13615 tree v4hi_ftype_v2si_v2si
13616 = build_function_type_list (V4HI_type_node,
13617 V2SI_type_node, V2SI_type_node, NULL_TREE);
13618 tree v4sf_ftype_v4sf_v4sf_int
13619 = build_function_type_list (V4SF_type_node,
13620 V4SF_type_node, V4SF_type_node,
13621 integer_type_node, NULL_TREE);
13622 tree v2si_ftype_v4hi_v4hi
13623 = build_function_type_list (V2SI_type_node,
13624 V4HI_type_node, V4HI_type_node, NULL_TREE);
13625 tree v4hi_ftype_v4hi_int
13626 = build_function_type_list (V4HI_type_node,
13627 V4HI_type_node, integer_type_node, NULL_TREE);
13628 tree v4hi_ftype_v4hi_di
13629 = build_function_type_list (V4HI_type_node,
13630 V4HI_type_node, long_long_unsigned_type_node,
13632 tree v2si_ftype_v2si_di
13633 = build_function_type_list (V2SI_type_node,
13634 V2SI_type_node, long_long_unsigned_type_node,
13636 tree void_ftype_void
13637 = build_function_type (void_type_node, void_list_node);
13638 tree void_ftype_unsigned
13639 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13640 tree void_ftype_unsigned_unsigned
13641 = build_function_type_list (void_type_node, unsigned_type_node,
13642 unsigned_type_node, NULL_TREE);
13643 tree void_ftype_pcvoid_unsigned_unsigned
13644 = build_function_type_list (void_type_node, const_ptr_type_node,
13645 unsigned_type_node, unsigned_type_node,
13647 tree unsigned_ftype_void
13648 = build_function_type (unsigned_type_node, void_list_node);
13649 tree v2si_ftype_v4sf
13650 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13651 /* Loads/stores. */
13652 tree void_ftype_v8qi_v8qi_pchar
13653 = build_function_type_list (void_type_node,
13654 V8QI_type_node, V8QI_type_node,
13655 pchar_type_node, NULL_TREE);
13656 tree v4sf_ftype_pcfloat
13657 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13658 /* @@@ the type is bogus */
13659 tree v4sf_ftype_v4sf_pv2si
13660 = build_function_type_list (V4SF_type_node,
13661 V4SF_type_node, pv2si_type_node, NULL_TREE);
13662 tree void_ftype_pv2si_v4sf
13663 = build_function_type_list (void_type_node,
13664 pv2si_type_node, V4SF_type_node, NULL_TREE);
13665 tree void_ftype_pfloat_v4sf
13666 = build_function_type_list (void_type_node,
13667 pfloat_type_node, V4SF_type_node, NULL_TREE);
13668 tree void_ftype_pdi_di
13669 = build_function_type_list (void_type_node,
13670 pdi_type_node, long_long_unsigned_type_node,
13672 tree void_ftype_pv2di_v2di
13673 = build_function_type_list (void_type_node,
13674 pv2di_type_node, V2DI_type_node, NULL_TREE);
13675 /* Normal vector unops. */
13676 tree v4sf_ftype_v4sf
13677 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13679 /* Normal vector binops. */
13680 tree v4sf_ftype_v4sf_v4sf
13681 = build_function_type_list (V4SF_type_node,
13682 V4SF_type_node, V4SF_type_node, NULL_TREE);
13683 tree v8qi_ftype_v8qi_v8qi
13684 = build_function_type_list (V8QI_type_node,
13685 V8QI_type_node, V8QI_type_node, NULL_TREE);
13686 tree v4hi_ftype_v4hi_v4hi
13687 = build_function_type_list (V4HI_type_node,
13688 V4HI_type_node, V4HI_type_node, NULL_TREE);
13689 tree v2si_ftype_v2si_v2si
13690 = build_function_type_list (V2SI_type_node,
13691 V2SI_type_node, V2SI_type_node, NULL_TREE);
13692 tree di_ftype_di_di
13693 = build_function_type_list (long_long_unsigned_type_node,
13694 long_long_unsigned_type_node,
13695 long_long_unsigned_type_node, NULL_TREE);
13697 tree v2si_ftype_v2sf
13698 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13699 tree v2sf_ftype_v2si
13700 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13701 tree v2si_ftype_v2si
13702 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13703 tree v2sf_ftype_v2sf
13704 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13705 tree v2sf_ftype_v2sf_v2sf
13706 = build_function_type_list (V2SF_type_node,
13707 V2SF_type_node, V2SF_type_node, NULL_TREE);
13708 tree v2si_ftype_v2sf_v2sf
13709 = build_function_type_list (V2SI_type_node,
13710 V2SF_type_node, V2SF_type_node, NULL_TREE);
13711 tree pint_type_node = build_pointer_type (integer_type_node);
13712 tree pdouble_type_node = build_pointer_type (double_type_node);
13713 tree pcdouble_type_node = build_pointer_type (
13714 build_type_variant (double_type_node, 1, 0));
13715 tree int_ftype_v2df_v2df
13716 = build_function_type_list (integer_type_node,
13717 V2DF_type_node, V2DF_type_node, NULL_TREE);
13719 tree ti_ftype_ti_ti
13720 = build_function_type_list (intTI_type_node,
13721 intTI_type_node, intTI_type_node, NULL_TREE);
13722 tree void_ftype_pcvoid
13723 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13724 tree v4sf_ftype_v4si
13725 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13726 tree v4si_ftype_v4sf
13727 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13728 tree v2df_ftype_v4si
13729 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13730 tree v4si_ftype_v2df
13731 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13732 tree v2si_ftype_v2df
13733 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13734 tree v4sf_ftype_v2df
13735 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13736 tree v2df_ftype_v2si
13737 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13738 tree v2df_ftype_v4sf
13739 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13740 tree int_ftype_v2df
13741 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13742 tree int64_ftype_v2df
13743 = build_function_type_list (long_long_integer_type_node,
13744 V2DF_type_node, NULL_TREE);
13745 tree v2df_ftype_v2df_int
13746 = build_function_type_list (V2DF_type_node,
13747 V2DF_type_node, integer_type_node, NULL_TREE);
13748 tree v2df_ftype_v2df_int64
13749 = build_function_type_list (V2DF_type_node,
13750 V2DF_type_node, long_long_integer_type_node,
13752 tree v4sf_ftype_v4sf_v2df
13753 = build_function_type_list (V4SF_type_node,
13754 V4SF_type_node, V2DF_type_node, NULL_TREE);
13755 tree v2df_ftype_v2df_v4sf
13756 = build_function_type_list (V2DF_type_node,
13757 V2DF_type_node, V4SF_type_node, NULL_TREE);
13758 tree v2df_ftype_v2df_v2df_int
13759 = build_function_type_list (V2DF_type_node,
13760 V2DF_type_node, V2DF_type_node,
13763 tree v2df_ftype_v2df_pcdouble
13764 = build_function_type_list (V2DF_type_node,
13765 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13766 tree void_ftype_pdouble_v2df
13767 = build_function_type_list (void_type_node,
13768 pdouble_type_node, V2DF_type_node, NULL_TREE);
13769 tree void_ftype_pint_int
13770 = build_function_type_list (void_type_node,
13771 pint_type_node, integer_type_node, NULL_TREE);
13772 tree void_ftype_v16qi_v16qi_pchar
13773 = build_function_type_list (void_type_node,
13774 V16QI_type_node, V16QI_type_node,
13775 pchar_type_node, NULL_TREE);
13776 tree v2df_ftype_pcdouble
13777 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13778 tree v2df_ftype_v2df_v2df
13779 = build_function_type_list (V2DF_type_node,
13780 V2DF_type_node, V2DF_type_node, NULL_TREE);
13781 tree v16qi_ftype_v16qi_v16qi
13782 = build_function_type_list (V16QI_type_node,
13783 V16QI_type_node, V16QI_type_node, NULL_TREE);
13784 tree v8hi_ftype_v8hi_v8hi
13785 = build_function_type_list (V8HI_type_node,
13786 V8HI_type_node, V8HI_type_node, NULL_TREE);
13787 tree v4si_ftype_v4si_v4si
13788 = build_function_type_list (V4SI_type_node,
13789 V4SI_type_node, V4SI_type_node, NULL_TREE);
13790 tree v2di_ftype_v2di_v2di
13791 = build_function_type_list (V2DI_type_node,
13792 V2DI_type_node, V2DI_type_node, NULL_TREE);
13793 tree v2di_ftype_v2df_v2df
13794 = build_function_type_list (V2DI_type_node,
13795 V2DF_type_node, V2DF_type_node, NULL_TREE);
13796 tree v2df_ftype_v2df
13797 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13798 tree v2di_ftype_v2di_int
13799 = build_function_type_list (V2DI_type_node,
13800 V2DI_type_node, integer_type_node, NULL_TREE);
13801 tree v4si_ftype_v4si_int
13802 = build_function_type_list (V4SI_type_node,
13803 V4SI_type_node, integer_type_node, NULL_TREE);
13804 tree v8hi_ftype_v8hi_int
13805 = build_function_type_list (V8HI_type_node,
13806 V8HI_type_node, integer_type_node, NULL_TREE);
13807 tree v8hi_ftype_v8hi_v2di
13808 = build_function_type_list (V8HI_type_node,
13809 V8HI_type_node, V2DI_type_node, NULL_TREE);
13810 tree v4si_ftype_v4si_v2di
13811 = build_function_type_list (V4SI_type_node,
13812 V4SI_type_node, V2DI_type_node, NULL_TREE);
13813 tree v4si_ftype_v8hi_v8hi
13814 = build_function_type_list (V4SI_type_node,
13815 V8HI_type_node, V8HI_type_node, NULL_TREE);
13816 tree di_ftype_v8qi_v8qi
13817 = build_function_type_list (long_long_unsigned_type_node,
13818 V8QI_type_node, V8QI_type_node, NULL_TREE);
13819 tree di_ftype_v2si_v2si
13820 = build_function_type_list (long_long_unsigned_type_node,
13821 V2SI_type_node, V2SI_type_node, NULL_TREE);
13822 tree v2di_ftype_v16qi_v16qi
13823 = build_function_type_list (V2DI_type_node,
13824 V16QI_type_node, V16QI_type_node, NULL_TREE);
13825 tree v2di_ftype_v4si_v4si
13826 = build_function_type_list (V2DI_type_node,
13827 V4SI_type_node, V4SI_type_node, NULL_TREE);
13828 tree int_ftype_v16qi
13829 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13830 tree v16qi_ftype_pcchar
13831 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13832 tree void_ftype_pchar_v16qi
13833 = build_function_type_list (void_type_node,
13834 pchar_type_node, V16QI_type_node, NULL_TREE);
13837 tree float128_type;
13840 /* The __float80 type. */
13841 if (TYPE_MODE (long_double_type_node) == XFmode)
13842 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13846 /* The __float80 type. */
13847 float80_type = make_node (REAL_TYPE);
13848 TYPE_PRECISION (float80_type) = 80;
13849 layout_type (float80_type);
13850 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13853 float128_type = make_node (REAL_TYPE);
13854 TYPE_PRECISION (float128_type) = 128;
13855 layout_type (float128_type);
13856 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13858 /* Add all builtins that are more or less simple operations on two
13860 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13862 /* Use one of the operands; the target can have a different mode for
13863 mask-generating compares. */
13864 enum machine_mode mode;
13869 mode = insn_data[d->icode].operand[1].mode;
13874 type = v16qi_ftype_v16qi_v16qi;
13877 type = v8hi_ftype_v8hi_v8hi;
13880 type = v4si_ftype_v4si_v4si;
13883 type = v2di_ftype_v2di_v2di;
13886 type = v2df_ftype_v2df_v2df;
13889 type = ti_ftype_ti_ti;
13892 type = v4sf_ftype_v4sf_v4sf;
13895 type = v8qi_ftype_v8qi_v8qi;
13898 type = v4hi_ftype_v4hi_v4hi;
13901 type = v2si_ftype_v2si_v2si;
13904 type = di_ftype_di_di;
13908 gcc_unreachable ();
13911 /* Override for comparisons. */
13912 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13913 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13914 type = v4si_ftype_v4sf_v4sf;
13916 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13917 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13918 type = v2di_ftype_v2df_v2df;
13920 def_builtin (d->mask, d->name, type, d->code);
13923 /* Add the remaining MMX insns with somewhat more complicated types. */
13924 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13925 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13926 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13927 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13929 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13930 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13931 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13933 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13934 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13936 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13937 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13939 /* comi/ucomi insns. */
13940 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13941 if (d->mask == MASK_SSE2)
13942 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13944 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13946 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13947 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13948 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13950 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13951 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13952 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13953 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13954 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13955 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13956 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13957 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13958 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13959 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13960 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13962 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13964 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13965 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13967 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13968 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13969 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13970 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13972 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13973 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13974 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13975 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13977 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13979 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13981 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13982 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13983 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13984 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13985 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13986 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13988 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13990 /* Original 3DNow! */
13991 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13992 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13993 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13994 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13995 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13996 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13997 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13998 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13999 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
14000 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
14001 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
14002 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
14003 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
14004 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
14005 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
14006 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
14007 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
14008 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
14009 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
14010 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
14012 /* 3DNow! extension as used in the Athlon CPU. */
14013 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
14014 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
14015 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
14016 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
14017 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
14018 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
14021 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
14023 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
14024 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
14026 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
14027 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
14029 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
14030 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
14031 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
14032 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
14033 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
14035 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
14036 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
14037 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
14038 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
14040 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
14041 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
14043 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
14045 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
14046 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
14048 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
14049 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
14050 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
14051 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
14052 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
14054 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
14056 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
14057 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
14058 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
14059 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
14061 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
14062 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
14063 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
14065 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
14066 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
14067 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
14068 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
14070 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
14071 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
14072 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
14074 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
14075 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
14077 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
14078 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
14080 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
14081 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
14082 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
14084 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
14085 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
14086 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
14088 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
14089 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
14091 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
14092 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
14093 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
14094 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
14096 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
14097 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
14098 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
14099 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
14101 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
14102 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
14104 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
14106 /* Prescott New Instructions. */
14107 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
14108 void_ftype_pcvoid_unsigned_unsigned,
14109 IX86_BUILTIN_MONITOR);
14110 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
14111 void_ftype_unsigned_unsigned,
14112 IX86_BUILTIN_MWAIT);
14113 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
14115 IX86_BUILTIN_MOVSHDUP);
14116 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
14118 IX86_BUILTIN_MOVSLDUP);
14119 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
14120 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
14122 /* Access to the vec_init patterns. */
14123 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
14124 integer_type_node, NULL_TREE);
14125 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
14126 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
14128 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
14129 short_integer_type_node,
14130 short_integer_type_node,
14131 short_integer_type_node, NULL_TREE);
14132 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
14133 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
14135 ftype = build_function_type_list (V8QI_type_node, char_type_node,
14136 char_type_node, char_type_node,
14137 char_type_node, char_type_node,
14138 char_type_node, char_type_node,
14139 char_type_node, NULL_TREE);
14140 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
14141 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
14143 /* Access to the vec_extract patterns. */
14144 ftype = build_function_type_list (double_type_node, V2DF_type_node,
14145 integer_type_node, NULL_TREE);
14146 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
14147 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
14149 ftype = build_function_type_list (long_long_integer_type_node,
14150 V2DI_type_node, integer_type_node,
14152 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
14153 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
14155 ftype = build_function_type_list (float_type_node, V4SF_type_node,
14156 integer_type_node, NULL_TREE);
14157 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
14158 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
14160 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14161 integer_type_node, NULL_TREE);
14162 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
14163 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
14165 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14166 integer_type_node, NULL_TREE);
14167 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
14168 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
14170 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
14171 integer_type_node, NULL_TREE);
14172 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
14173 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
14175 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
14176 integer_type_node, NULL_TREE);
14177 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
14178 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
14180 /* Access to the vec_set patterns. */
14181 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14183 integer_type_node, NULL_TREE);
14184 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
14185 ftype, IX86_BUILTIN_VEC_SET_V8HI);
14187 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14189 integer_type_node, NULL_TREE);
14190 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14191 ftype, IX86_BUILTIN_VEC_SET_V4HI);
14194 /* Errors in the source file can cause expand_expr to return const0_rtx
14195 where we expect a vector. To avoid crashing, use one of the vector
14196 clear instructions. */
14198 safe_vector_operand (rtx x, enum machine_mode mode)
14200 if (x == const0_rtx)
14201 x = CONST0_RTX (mode);
14205 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14208 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14211 tree arg0 = TREE_VALUE (arglist);
14212 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14213 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14214 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14215 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14216 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14217 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14219 if (VECTOR_MODE_P (mode0))
14220 op0 = safe_vector_operand (op0, mode0);
14221 if (VECTOR_MODE_P (mode1))
14222 op1 = safe_vector_operand (op1, mode1);
14224 if (optimize || !target
14225 || GET_MODE (target) != tmode
14226 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14227 target = gen_reg_rtx (tmode);
14229 if (GET_MODE (op1) == SImode && mode1 == TImode)
14231 rtx x = gen_reg_rtx (V4SImode);
14232 emit_insn (gen_sse2_loadd (x, op1));
14233 op1 = gen_lowpart (TImode, x);
14236 /* The insn must want input operands in the same modes as the
14238 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
14239 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
14241 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14242 op0 = copy_to_mode_reg (mode0, op0);
14243 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14244 op1 = copy_to_mode_reg (mode1, op1);
14246 /* ??? Using ix86_fixup_binary_operands is problematic when
14247 we've got mismatched modes. Fake it. */
14253 if (tmode == mode0 && tmode == mode1)
14255 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
14259 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
14261 op0 = force_reg (mode0, op0);
14262 op1 = force_reg (mode1, op1);
14263 target = gen_reg_rtx (tmode);
14266 pat = GEN_FCN (icode) (target, op0, op1);
14273 /* Subroutine of ix86_expand_builtin to take care of stores. */
14276 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
14279 tree arg0 = TREE_VALUE (arglist);
14280 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14281 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14282 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14283 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14284 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14286 if (VECTOR_MODE_P (mode1))
14287 op1 = safe_vector_operand (op1, mode1);
14289 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14290 op1 = copy_to_mode_reg (mode1, op1);
14292 pat = GEN_FCN (icode) (op0, op1);
14298 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14301 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14302 rtx target, int do_load)
14305 tree arg0 = TREE_VALUE (arglist);
14306 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14307 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14308 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14310 if (optimize || !target
14311 || GET_MODE (target) != tmode
14312 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14313 target = gen_reg_rtx (tmode);
14315 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14318 if (VECTOR_MODE_P (mode0))
14319 op0 = safe_vector_operand (op0, mode0);
14321 if ((optimize && !register_operand (op0, mode0))
14322 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14323 op0 = copy_to_mode_reg (mode0, op0);
14326 pat = GEN_FCN (icode) (target, op0);
14333 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14334 sqrtss, rsqrtss, rcpss. */
14337 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14340 tree arg0 = TREE_VALUE (arglist);
14341 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14342 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14343 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14345 if (optimize || !target
14346 || GET_MODE (target) != tmode
14347 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14348 target = gen_reg_rtx (tmode);
14350 if (VECTOR_MODE_P (mode0))
14351 op0 = safe_vector_operand (op0, mode0);
14353 if ((optimize && !register_operand (op0, mode0))
14354 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14355 op0 = copy_to_mode_reg (mode0, op0);
14358 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14359 op1 = copy_to_mode_reg (mode0, op1);
14361 pat = GEN_FCN (icode) (target, op0, op1);
14368 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14371 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14375 tree arg0 = TREE_VALUE (arglist);
14376 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14377 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14378 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14380 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14381 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14382 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14383 enum rtx_code comparison = d->comparison;
14385 if (VECTOR_MODE_P (mode0))
14386 op0 = safe_vector_operand (op0, mode0);
14387 if (VECTOR_MODE_P (mode1))
14388 op1 = safe_vector_operand (op1, mode1);
14390 /* Swap operands if we have a comparison that isn't available in
14392 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14394 rtx tmp = gen_reg_rtx (mode1);
14395 emit_move_insn (tmp, op1);
14400 if (optimize || !target
14401 || GET_MODE (target) != tmode
14402 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14403 target = gen_reg_rtx (tmode);
14405 if ((optimize && !register_operand (op0, mode0))
14406 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14407 op0 = copy_to_mode_reg (mode0, op0);
14408 if ((optimize && !register_operand (op1, mode1))
14409 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14410 op1 = copy_to_mode_reg (mode1, op1);
14412 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14413 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14420 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14423 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14427 tree arg0 = TREE_VALUE (arglist);
14428 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14429 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14430 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14432 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14433 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14434 enum rtx_code comparison = d->comparison;
14436 if (VECTOR_MODE_P (mode0))
14437 op0 = safe_vector_operand (op0, mode0);
14438 if (VECTOR_MODE_P (mode1))
14439 op1 = safe_vector_operand (op1, mode1);
14441 /* Swap operands if we have a comparison that isn't available in
14443 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14450 target = gen_reg_rtx (SImode);
14451 emit_move_insn (target, const0_rtx);
14452 target = gen_rtx_SUBREG (QImode, target, 0);
14454 if ((optimize && !register_operand (op0, mode0))
14455 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14456 op0 = copy_to_mode_reg (mode0, op0);
14457 if ((optimize && !register_operand (op1, mode1))
14458 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14459 op1 = copy_to_mode_reg (mode1, op1);
14461 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14462 pat = GEN_FCN (d->icode) (op0, op1);
14466 emit_insn (gen_rtx_SET (VOIDmode,
14467 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14468 gen_rtx_fmt_ee (comparison, QImode,
14472 return SUBREG_REG (target);
14475 /* Return the integer constant in ARG. Constrain it to be in the range
14476 of the subparts of VEC_TYPE; issue an error if not. */
14479 get_element_number (tree vec_type, tree arg)
14481 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14483 if (!host_integerp (arg, 1)
14484 || (elt = tree_low_cst (arg, 1), elt > max))
14486 error ("selector must be an integer constant in the range 0..%i", max);
14493 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14494 ix86_expand_vector_init. We DO have language-level syntax for this, in
14495 the form of (type){ init-list }. Except that since we can't place emms
14496 instructions from inside the compiler, we can't allow the use of MMX
14497 registers unless the user explicitly asks for it. So we do *not* define
14498 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14499 we have builtins invoked by mmintrin.h that gives us license to emit
14500 these sorts of instructions. */
14503 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14505 enum machine_mode tmode = TYPE_MODE (type);
14506 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14507 int i, n_elt = GET_MODE_NUNITS (tmode);
14508 rtvec v = rtvec_alloc (n_elt);
14510 gcc_assert (VECTOR_MODE_P (tmode));
14512 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14514 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14515 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14518 gcc_assert (arglist == NULL);
14520 if (!target || !register_operand (target, tmode))
14521 target = gen_reg_rtx (tmode);
14523 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14527 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14528 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14529 had a language-level syntax for referencing vector elements. */
14532 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14534 enum machine_mode tmode, mode0;
14539 arg0 = TREE_VALUE (arglist);
14540 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14542 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14543 elt = get_element_number (TREE_TYPE (arg0), arg1);
14545 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14546 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14547 gcc_assert (VECTOR_MODE_P (mode0));
14549 op0 = force_reg (mode0, op0);
14551 if (optimize || !target || !register_operand (target, tmode))
14552 target = gen_reg_rtx (tmode);
14554 ix86_expand_vector_extract (true, target, op0, elt);
14559 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14560 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14561 a language-level syntax for referencing vector elements. */
14564 ix86_expand_vec_set_builtin (tree arglist)
14566 enum machine_mode tmode, mode1;
14567 tree arg0, arg1, arg2;
14571 arg0 = TREE_VALUE (arglist);
14572 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14573 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14575 tmode = TYPE_MODE (TREE_TYPE (arg0));
14576 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14577 gcc_assert (VECTOR_MODE_P (tmode));
14579 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14580 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14581 elt = get_element_number (TREE_TYPE (arg0), arg2);
14583 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14584 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14586 op0 = force_reg (tmode, op0);
14587 op1 = force_reg (mode1, op1);
14589 ix86_expand_vector_set (true, op0, op1, elt);
14594 /* Expand an expression EXP that calls a built-in function,
14595 with result going to TARGET if that's convenient
14596 (and in mode MODE if that's convenient).
14597 SUBTARGET may be used as the target for computing one of EXP's operands.
14598 IGNORE is nonzero if the value is to be ignored. */
14601 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14602 enum machine_mode mode ATTRIBUTE_UNUSED,
14603 int ignore ATTRIBUTE_UNUSED)
14605 const struct builtin_description *d;
14607 enum insn_code icode;
14608 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14609 tree arglist = TREE_OPERAND (exp, 1);
14610 tree arg0, arg1, arg2;
14611 rtx op0, op1, op2, pat;
14612 enum machine_mode tmode, mode0, mode1, mode2;
14613 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14617 case IX86_BUILTIN_EMMS:
14618 emit_insn (gen_mmx_emms ());
14621 case IX86_BUILTIN_SFENCE:
14622 emit_insn (gen_sse_sfence ());
14625 case IX86_BUILTIN_MASKMOVQ:
14626 case IX86_BUILTIN_MASKMOVDQU:
14627 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14628 ? CODE_FOR_mmx_maskmovq
14629 : CODE_FOR_sse2_maskmovdqu);
14630 /* Note the arg order is different from the operand order. */
14631 arg1 = TREE_VALUE (arglist);
14632 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14633 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14634 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14635 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14636 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14637 mode0 = insn_data[icode].operand[0].mode;
14638 mode1 = insn_data[icode].operand[1].mode;
14639 mode2 = insn_data[icode].operand[2].mode;
14641 op0 = force_reg (Pmode, op0);
14642 op0 = gen_rtx_MEM (mode1, op0);
14644 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14645 op0 = copy_to_mode_reg (mode0, op0);
14646 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14647 op1 = copy_to_mode_reg (mode1, op1);
14648 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14649 op2 = copy_to_mode_reg (mode2, op2);
14650 pat = GEN_FCN (icode) (op0, op1, op2);
14656 case IX86_BUILTIN_SQRTSS:
14657 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14658 case IX86_BUILTIN_RSQRTSS:
14659 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14660 case IX86_BUILTIN_RCPSS:
14661 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14663 case IX86_BUILTIN_LOADUPS:
14664 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14666 case IX86_BUILTIN_STOREUPS:
14667 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14669 case IX86_BUILTIN_LOADHPS:
14670 case IX86_BUILTIN_LOADLPS:
14671 case IX86_BUILTIN_LOADHPD:
14672 case IX86_BUILTIN_LOADLPD:
14673 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14674 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14675 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14676 : CODE_FOR_sse2_loadlpd);
14677 arg0 = TREE_VALUE (arglist);
14678 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14679 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14680 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14681 tmode = insn_data[icode].operand[0].mode;
14682 mode0 = insn_data[icode].operand[1].mode;
14683 mode1 = insn_data[icode].operand[2].mode;
14685 op0 = force_reg (mode0, op0);
14686 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14687 if (optimize || target == 0
14688 || GET_MODE (target) != tmode
14689 || !register_operand (target, tmode))
14690 target = gen_reg_rtx (tmode);
14691 pat = GEN_FCN (icode) (target, op0, op1);
14697 case IX86_BUILTIN_STOREHPS:
14698 case IX86_BUILTIN_STORELPS:
14699 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14700 : CODE_FOR_sse_storelps);
14701 arg0 = TREE_VALUE (arglist);
14702 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14703 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14704 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14705 mode0 = insn_data[icode].operand[0].mode;
14706 mode1 = insn_data[icode].operand[1].mode;
14708 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14709 op1 = force_reg (mode1, op1);
14711 pat = GEN_FCN (icode) (op0, op1);
14717 case IX86_BUILTIN_MOVNTPS:
14718 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14719 case IX86_BUILTIN_MOVNTQ:
14720 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14722 case IX86_BUILTIN_LDMXCSR:
14723 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14724 target = assign_386_stack_local (SImode, 0);
14725 emit_move_insn (target, op0);
14726 emit_insn (gen_sse_ldmxcsr (target));
14729 case IX86_BUILTIN_STMXCSR:
14730 target = assign_386_stack_local (SImode, 0);
14731 emit_insn (gen_sse_stmxcsr (target));
14732 return copy_to_mode_reg (SImode, target);
14734 case IX86_BUILTIN_SHUFPS:
14735 case IX86_BUILTIN_SHUFPD:
14736 icode = (fcode == IX86_BUILTIN_SHUFPS
14737 ? CODE_FOR_sse_shufps
14738 : CODE_FOR_sse2_shufpd);
14739 arg0 = TREE_VALUE (arglist);
14740 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14741 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14742 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14743 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14744 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14745 tmode = insn_data[icode].operand[0].mode;
14746 mode0 = insn_data[icode].operand[1].mode;
14747 mode1 = insn_data[icode].operand[2].mode;
14748 mode2 = insn_data[icode].operand[3].mode;
14750 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14751 op0 = copy_to_mode_reg (mode0, op0);
14752 if ((optimize && !register_operand (op1, mode1))
14753 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14754 op1 = copy_to_mode_reg (mode1, op1);
14755 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14757 /* @@@ better error message */
14758 error ("mask must be an immediate");
14759 return gen_reg_rtx (tmode);
14761 if (optimize || target == 0
14762 || GET_MODE (target) != tmode
14763 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14764 target = gen_reg_rtx (tmode);
14765 pat = GEN_FCN (icode) (target, op0, op1, op2);
14771 case IX86_BUILTIN_PSHUFW:
14772 case IX86_BUILTIN_PSHUFD:
14773 case IX86_BUILTIN_PSHUFHW:
14774 case IX86_BUILTIN_PSHUFLW:
14775 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14776 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14777 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14778 : CODE_FOR_mmx_pshufw);
14779 arg0 = TREE_VALUE (arglist);
14780 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14781 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14782 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14783 tmode = insn_data[icode].operand[0].mode;
14784 mode1 = insn_data[icode].operand[1].mode;
14785 mode2 = insn_data[icode].operand[2].mode;
14787 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14788 op0 = copy_to_mode_reg (mode1, op0);
14789 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14791 /* @@@ better error message */
14792 error ("mask must be an immediate");
14796 || GET_MODE (target) != tmode
14797 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14798 target = gen_reg_rtx (tmode);
14799 pat = GEN_FCN (icode) (target, op0, op1);
14805 case IX86_BUILTIN_PSLLDQI128:
14806 case IX86_BUILTIN_PSRLDQI128:
14807 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14808 : CODE_FOR_sse2_lshrti3);
14809 arg0 = TREE_VALUE (arglist);
14810 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14811 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14812 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14813 tmode = insn_data[icode].operand[0].mode;
14814 mode1 = insn_data[icode].operand[1].mode;
14815 mode2 = insn_data[icode].operand[2].mode;
14817 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14819 op0 = copy_to_reg (op0);
14820 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14822 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14824 error ("shift must be an immediate");
14827 target = gen_reg_rtx (V2DImode);
14828 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14834 case IX86_BUILTIN_FEMMS:
14835 emit_insn (gen_mmx_femms ());
14838 case IX86_BUILTIN_PAVGUSB:
14839 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14841 case IX86_BUILTIN_PF2ID:
14842 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14844 case IX86_BUILTIN_PFACC:
14845 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14847 case IX86_BUILTIN_PFADD:
14848 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14850 case IX86_BUILTIN_PFCMPEQ:
14851 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14853 case IX86_BUILTIN_PFCMPGE:
14854 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14856 case IX86_BUILTIN_PFCMPGT:
14857 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14859 case IX86_BUILTIN_PFMAX:
14860 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14862 case IX86_BUILTIN_PFMIN:
14863 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14865 case IX86_BUILTIN_PFMUL:
14866 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14868 case IX86_BUILTIN_PFRCP:
14869 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14871 case IX86_BUILTIN_PFRCPIT1:
14872 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14874 case IX86_BUILTIN_PFRCPIT2:
14875 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14877 case IX86_BUILTIN_PFRSQIT1:
14878 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14880 case IX86_BUILTIN_PFRSQRT:
14881 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14883 case IX86_BUILTIN_PFSUB:
14884 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14886 case IX86_BUILTIN_PFSUBR:
14887 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14889 case IX86_BUILTIN_PI2FD:
14890 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14892 case IX86_BUILTIN_PMULHRW:
14893 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14895 case IX86_BUILTIN_PF2IW:
14896 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14898 case IX86_BUILTIN_PFNACC:
14899 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14901 case IX86_BUILTIN_PFPNACC:
14902 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14904 case IX86_BUILTIN_PI2FW:
14905 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14907 case IX86_BUILTIN_PSWAPDSI:
14908 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14910 case IX86_BUILTIN_PSWAPDSF:
14911 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14913 case IX86_BUILTIN_SQRTSD:
14914 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14915 case IX86_BUILTIN_LOADUPD:
14916 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14917 case IX86_BUILTIN_STOREUPD:
14918 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14920 case IX86_BUILTIN_MFENCE:
14921 emit_insn (gen_sse2_mfence ());
14923 case IX86_BUILTIN_LFENCE:
14924 emit_insn (gen_sse2_lfence ());
14927 case IX86_BUILTIN_CLFLUSH:
14928 arg0 = TREE_VALUE (arglist);
14929 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14930 icode = CODE_FOR_sse2_clflush;
14931 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14932 op0 = copy_to_mode_reg (Pmode, op0);
14934 emit_insn (gen_sse2_clflush (op0));
14937 case IX86_BUILTIN_MOVNTPD:
14938 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14939 case IX86_BUILTIN_MOVNTDQ:
14940 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14941 case IX86_BUILTIN_MOVNTI:
14942 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14944 case IX86_BUILTIN_LOADDQU:
14945 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14946 case IX86_BUILTIN_STOREDQU:
14947 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14949 case IX86_BUILTIN_MONITOR:
14950 arg0 = TREE_VALUE (arglist);
14951 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14952 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14953 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14954 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14955 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14957 op0 = copy_to_mode_reg (SImode, op0);
14959 op1 = copy_to_mode_reg (SImode, op1);
14961 op2 = copy_to_mode_reg (SImode, op2);
14962 emit_insn (gen_sse3_monitor (op0, op1, op2));
14965 case IX86_BUILTIN_MWAIT:
14966 arg0 = TREE_VALUE (arglist);
14967 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14968 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14969 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14971 op0 = copy_to_mode_reg (SImode, op0);
14973 op1 = copy_to_mode_reg (SImode, op1);
14974 emit_insn (gen_sse3_mwait (op0, op1));
14977 case IX86_BUILTIN_LDDQU:
14978 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
14981 case IX86_BUILTIN_VEC_INIT_V2SI:
14982 case IX86_BUILTIN_VEC_INIT_V4HI:
14983 case IX86_BUILTIN_VEC_INIT_V8QI:
14984 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
14986 case IX86_BUILTIN_VEC_EXT_V2DF:
14987 case IX86_BUILTIN_VEC_EXT_V2DI:
14988 case IX86_BUILTIN_VEC_EXT_V4SF:
14989 case IX86_BUILTIN_VEC_EXT_V4SI:
14990 case IX86_BUILTIN_VEC_EXT_V8HI:
14991 case IX86_BUILTIN_VEC_EXT_V2SI:
14992 case IX86_BUILTIN_VEC_EXT_V4HI:
14993 return ix86_expand_vec_ext_builtin (arglist, target);
14995 case IX86_BUILTIN_VEC_SET_V8HI:
14996 case IX86_BUILTIN_VEC_SET_V4HI:
14997 return ix86_expand_vec_set_builtin (arglist);
15003 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15004 if (d->code == fcode)
15006 /* Compares are treated specially. */
15007 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15008 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
15009 || d->icode == CODE_FOR_sse2_maskcmpv2df3
15010 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15011 return ix86_expand_sse_compare (d, arglist, target);
15013 return ix86_expand_binop_builtin (d->icode, arglist, target);
15016 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15017 if (d->code == fcode)
15018 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
15020 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15021 if (d->code == fcode)
15022 return ix86_expand_sse_comi (d, arglist, target);
15024 gcc_unreachable ();
15027 /* Store OPERAND to the memory after reload is completed. This means
15028 that we can't easily use assign_stack_local. */
15030 ix86_force_to_memory (enum machine_mode mode, rtx operand)
15034 gcc_assert (reload_completed);
15035 if (TARGET_RED_ZONE)
15037 result = gen_rtx_MEM (mode,
15038 gen_rtx_PLUS (Pmode,
15040 GEN_INT (-RED_ZONE_SIZE)));
15041 emit_move_insn (result, operand);
15043 else if (!TARGET_RED_ZONE && TARGET_64BIT)
15049 operand = gen_lowpart (DImode, operand);
15053 gen_rtx_SET (VOIDmode,
15054 gen_rtx_MEM (DImode,
15055 gen_rtx_PRE_DEC (DImode,
15056 stack_pointer_rtx)),
15060 gcc_unreachable ();
15062 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15071 split_di (&operand, 1, operands, operands + 1);
15073 gen_rtx_SET (VOIDmode,
15074 gen_rtx_MEM (SImode,
15075 gen_rtx_PRE_DEC (Pmode,
15076 stack_pointer_rtx)),
15079 gen_rtx_SET (VOIDmode,
15080 gen_rtx_MEM (SImode,
15081 gen_rtx_PRE_DEC (Pmode,
15082 stack_pointer_rtx)),
15087 /* It is better to store HImodes as SImodes. */
15088 if (!TARGET_PARTIAL_REG_STALL)
15089 operand = gen_lowpart (SImode, operand);
15093 gen_rtx_SET (VOIDmode,
15094 gen_rtx_MEM (GET_MODE (operand),
15095 gen_rtx_PRE_DEC (SImode,
15096 stack_pointer_rtx)),
15100 gcc_unreachable ();
15102 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15107 /* Free operand from the memory. */
15109 ix86_free_from_memory (enum machine_mode mode)
15111 if (!TARGET_RED_ZONE)
15115 if (mode == DImode || TARGET_64BIT)
15117 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
15121 /* Use LEA to deallocate stack space. In peephole2 it will be converted
15122 to pop or add instruction if registers are available. */
15123 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15124 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15129 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
15130 QImode must go into class Q_REGS.
15131 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
15132 movdf to do mem-to-mem moves through integer regs. */
15134 ix86_preferred_reload_class (rtx x, enum reg_class class)
15136 /* We're only allowed to return a subclass of CLASS. Many of the
15137 following checks fail for NO_REGS, so eliminate that early. */
15138 if (class == NO_REGS)
15141 /* All classes can load zeros. */
15142 if (x == CONST0_RTX (GET_MODE (x)))
15145 /* Floating-point constants need more complex checks. */
15146 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
15148 /* General regs can load everything. */
15149 if (reg_class_subset_p (class, GENERAL_REGS))
15152 /* Floats can load 0 and 1 plus some others. Note that we eliminated
15153 zero above. We only want to wind up preferring 80387 registers if
15154 we plan on doing computation with them. */
15156 && (TARGET_MIX_SSE_I387
15157 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
15158 && standard_80387_constant_p (x))
15160 /* Limit class to non-sse. */
15161 if (class == FLOAT_SSE_REGS)
15163 if (class == FP_TOP_SSE_REGS)
15165 if (class == FP_SECOND_SSE_REGS)
15166 return FP_SECOND_REG;
15167 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
15173 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
15175 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
15178 /* Generally when we see PLUS here, it's the function invariant
15179 (plus soft-fp const_int). Which can only be computed into general
15181 if (GET_CODE (x) == PLUS)
15182 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
15184 /* QImode constants are easy to load, but non-constant QImode data
15185 must go into Q_REGS. */
15186 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
15188 if (reg_class_subset_p (class, Q_REGS))
15190 if (reg_class_subset_p (Q_REGS, class))
15198 /* If we are copying between general and FP registers, we need a memory
15199 location. The same is true for SSE and MMX registers.
15201 The macro can't work reliably when one of the CLASSES is class containing
15202 registers from multiple units (SSE, MMX, integer). We avoid this by never
15203 combining those units in single alternative in the machine description.
15204 Ensure that this constraint holds to avoid unexpected surprises.
15206 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15207 enforce these sanity checks. */
15210 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
15211 enum machine_mode mode, int strict)
15213 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
15214 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
15215 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
15216 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
15217 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
15218 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
15220 gcc_assert (!strict);
15224 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
15227 /* ??? This is a lie. We do have moves between mmx/general, and for
15228 mmx/sse2. But by saying we need secondary memory we discourage the
15229 register allocator from using the mmx registers unless needed. */
15230 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
15233 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15235 /* SSE1 doesn't have any direct moves from other classes. */
15239 /* If the target says that inter-unit moves are more expensive
15240 than moving through memory, then don't generate them. */
15241 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
15244 /* Between SSE and general, we have moves no larger than word size. */
15245 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15248 /* ??? For the cost of one register reformat penalty, we could use
15249 the same instructions to move SFmode and DFmode data, but the
15250 relevant move patterns don't support those alternatives. */
15251 if (mode == SFmode || mode == DFmode)
15258 /* Return true if the registers in CLASS cannot represent the change from
15259 modes FROM to TO. */
15262 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
15263 enum reg_class class)
15268 /* x87 registers can't do subreg at all, as all values are reformatted
15269 to extended precision. */
15270 if (MAYBE_FLOAT_CLASS_P (class))
15273 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
15275 /* Vector registers do not support QI or HImode loads. If we don't
15276 disallow a change to these modes, reload will assume it's ok to
15277 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
15278 the vec_dupv4hi pattern. */
15279 if (GET_MODE_SIZE (from) < 4)
15282 /* Vector registers do not support subreg with nonzero offsets, which
15283 are otherwise valid for integer registers. Since we can't see
15284 whether we have a nonzero offset from here, prohibit all
15285 nonparadoxical subregs changing size. */
15286 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
15293 /* Return the cost of moving data from a register in class CLASS1 to
15294 one in class CLASS2.
15296 It is not required that the cost always equal 2 when FROM is the same as TO;
15297 on some machines it is expensive to move between registers if they are not
15298 general registers. */
15301 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
15302 enum reg_class class2)
15304 /* In case we require secondary memory, compute cost of the store followed
15305 by load. In order to avoid bad register allocation choices, we need
15306 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15308 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
15312 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
15313 MEMORY_MOVE_COST (mode, class1, 1));
15314 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15315 MEMORY_MOVE_COST (mode, class2, 1));
15317 /* In case of copying from general_purpose_register we may emit multiple
15318 stores followed by single load causing memory size mismatch stall.
15319 Count this as arbitrarily high cost of 20. */
15320 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15323 /* In the case of FP/MMX moves, the registers actually overlap, and we
15324 have to switch modes in order to treat them differently. */
15325 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15326 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15332 /* Moves between SSE/MMX and integer unit are expensive. */
15333 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15334 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15335 return ix86_cost->mmxsse_to_integer;
15336 if (MAYBE_FLOAT_CLASS_P (class1))
15337 return ix86_cost->fp_move;
15338 if (MAYBE_SSE_CLASS_P (class1))
15339 return ix86_cost->sse_move;
15340 if (MAYBE_MMX_CLASS_P (class1))
15341 return ix86_cost->mmx_move;
15345 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15348 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15350 /* Flags and only flags can only hold CCmode values. */
15351 if (CC_REGNO_P (regno))
15352 return GET_MODE_CLASS (mode) == MODE_CC;
15353 if (GET_MODE_CLASS (mode) == MODE_CC
15354 || GET_MODE_CLASS (mode) == MODE_RANDOM
15355 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15357 if (FP_REGNO_P (regno))
15358 return VALID_FP_MODE_P (mode);
15359 if (SSE_REGNO_P (regno))
15361 /* We implement the move patterns for all vector modes into and
15362 out of SSE registers, even when no operation instructions
15364 return (VALID_SSE_REG_MODE (mode)
15365 || VALID_SSE2_REG_MODE (mode)
15366 || VALID_MMX_REG_MODE (mode)
15367 || VALID_MMX_REG_MODE_3DNOW (mode));
15369 if (MMX_REGNO_P (regno))
15371 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15372 so if the register is available at all, then we can move data of
15373 the given mode into or out of it. */
15374 return (VALID_MMX_REG_MODE (mode)
15375 || VALID_MMX_REG_MODE_3DNOW (mode));
15378 if (mode == QImode)
15380 /* Take care for QImode values - they can be in non-QI regs,
15381 but then they do cause partial register stalls. */
15382 if (regno < 4 || TARGET_64BIT)
15384 if (!TARGET_PARTIAL_REG_STALL)
15386 return reload_in_progress || reload_completed;
15388 /* We handle both integer and floats in the general purpose registers. */
15389 else if (VALID_INT_MODE_P (mode))
15391 else if (VALID_FP_MODE_P (mode))
15393 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15394 on to use that value in smaller contexts, this can easily force a
15395 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15396 supporting DImode, allow it. */
15397 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
15403 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15404 tieable integer mode. */
15407 ix86_tieable_integer_mode_p (enum machine_mode mode)
15416 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
15419 return TARGET_64BIT;
15426 /* Return true if MODE1 is accessible in a register that can hold MODE2
15427 without copying. That is, all register classes that can hold MODE2
15428 can also hold MODE1. */
15431 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
15433 if (mode1 == mode2)
15436 if (ix86_tieable_integer_mode_p (mode1)
15437 && ix86_tieable_integer_mode_p (mode2))
15440 /* MODE2 being XFmode implies fp stack or general regs, which means we
15441 can tie any smaller floating point modes to it. Note that we do not
15442 tie this with TFmode. */
15443 if (mode2 == XFmode)
15444 return mode1 == SFmode || mode1 == DFmode;
15446 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15447 that we can tie it with SFmode. */
15448 if (mode2 == DFmode)
15449 return mode1 == SFmode;
15451 /* If MODE2 is only appropriate for an SSE register, then tie with
15452 any other mode acceptable to SSE registers. */
15453 if (GET_MODE_SIZE (mode2) >= 8
15454 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
15455 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
15457 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15458 with any other mode acceptable to MMX registers. */
15459 if (GET_MODE_SIZE (mode2) == 8
15460 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
15461 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
15466 /* Return the cost of moving data of mode M between a
15467 register and memory. A value of 2 is the default; this cost is
15468 relative to those in `REGISTER_MOVE_COST'.
15470 If moving between registers and memory is more expensive than
15471 between two registers, you should define this macro to express the
15474 Model also increased moving costs of QImode registers in non
15478 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
15480 if (FLOAT_CLASS_P (class))
15497 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15499 if (SSE_CLASS_P (class))
15502 switch (GET_MODE_SIZE (mode))
15516 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15518 if (MMX_CLASS_P (class))
15521 switch (GET_MODE_SIZE (mode))
15532 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15534 switch (GET_MODE_SIZE (mode))
15538 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15539 : ix86_cost->movzbl_load);
15541 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15542 : ix86_cost->int_store[0] + 4);
15545 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15547 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15548 if (mode == TFmode)
15550 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15551 * (((int) GET_MODE_SIZE (mode)
15552 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15556 /* Compute a (partial) cost for rtx X. Return true if the complete
15557 cost has been computed, and false if subexpressions should be
15558 scanned. In either case, *TOTAL contains the cost result. */
15561 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15563 enum machine_mode mode = GET_MODE (x);
15571 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15573 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15575 else if (flag_pic && SYMBOLIC_CONST (x)
15577 || (!GET_CODE (x) != LABEL_REF
15578 && (GET_CODE (x) != SYMBOL_REF
15579 || !SYMBOL_REF_LOCAL_P (x)))))
15586 if (mode == VOIDmode)
15589 switch (standard_80387_constant_p (x))
15594 default: /* Other constants */
15599 /* Start with (MEM (SYMBOL_REF)), since that's where
15600 it'll probably end up. Add a penalty for size. */
15601 *total = (COSTS_N_INSNS (1)
15602 + (flag_pic != 0 && !TARGET_64BIT)
15603 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15609 /* The zero extensions is often completely free on x86_64, so make
15610 it as cheap as possible. */
15611 if (TARGET_64BIT && mode == DImode
15612 && GET_MODE (XEXP (x, 0)) == SImode)
15614 else if (TARGET_ZERO_EXTEND_WITH_AND)
15615 *total = COSTS_N_INSNS (ix86_cost->add);
15617 *total = COSTS_N_INSNS (ix86_cost->movzx);
15621 *total = COSTS_N_INSNS (ix86_cost->movsx);
15625 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15626 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15628 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15631 *total = COSTS_N_INSNS (ix86_cost->add);
15634 if ((value == 2 || value == 3)
15635 && ix86_cost->lea <= ix86_cost->shift_const)
15637 *total = COSTS_N_INSNS (ix86_cost->lea);
15647 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15649 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15651 if (INTVAL (XEXP (x, 1)) > 32)
15652 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15654 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15658 if (GET_CODE (XEXP (x, 1)) == AND)
15659 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15661 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15666 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15667 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15669 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15674 if (FLOAT_MODE_P (mode))
15676 *total = COSTS_N_INSNS (ix86_cost->fmul);
15681 rtx op0 = XEXP (x, 0);
15682 rtx op1 = XEXP (x, 1);
15684 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15686 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15687 for (nbits = 0; value != 0; value &= value - 1)
15691 /* This is arbitrary. */
15694 /* Compute costs correctly for widening multiplication. */
15695 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15696 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15697 == GET_MODE_SIZE (mode))
15699 int is_mulwiden = 0;
15700 enum machine_mode inner_mode = GET_MODE (op0);
15702 if (GET_CODE (op0) == GET_CODE (op1))
15703 is_mulwiden = 1, op1 = XEXP (op1, 0);
15704 else if (GET_CODE (op1) == CONST_INT)
15706 if (GET_CODE (op0) == SIGN_EXTEND)
15707 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15710 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15714 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15717 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15718 + nbits * ix86_cost->mult_bit)
15719 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15728 if (FLOAT_MODE_P (mode))
15729 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15731 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15735 if (FLOAT_MODE_P (mode))
15736 *total = COSTS_N_INSNS (ix86_cost->fadd);
15737 else if (GET_MODE_CLASS (mode) == MODE_INT
15738 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15740 if (GET_CODE (XEXP (x, 0)) == PLUS
15741 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15742 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15743 && CONSTANT_P (XEXP (x, 1)))
15745 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15746 if (val == 2 || val == 4 || val == 8)
15748 *total = COSTS_N_INSNS (ix86_cost->lea);
15749 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15750 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15752 *total += rtx_cost (XEXP (x, 1), outer_code);
15756 else if (GET_CODE (XEXP (x, 0)) == MULT
15757 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15759 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15760 if (val == 2 || val == 4 || val == 8)
15762 *total = COSTS_N_INSNS (ix86_cost->lea);
15763 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15764 *total += rtx_cost (XEXP (x, 1), outer_code);
15768 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15770 *total = COSTS_N_INSNS (ix86_cost->lea);
15771 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15772 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15773 *total += rtx_cost (XEXP (x, 1), outer_code);
15780 if (FLOAT_MODE_P (mode))
15782 *total = COSTS_N_INSNS (ix86_cost->fadd);
15790 if (!TARGET_64BIT && mode == DImode)
15792 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15793 + (rtx_cost (XEXP (x, 0), outer_code)
15794 << (GET_MODE (XEXP (x, 0)) != DImode))
15795 + (rtx_cost (XEXP (x, 1), outer_code)
15796 << (GET_MODE (XEXP (x, 1)) != DImode)));
15802 if (FLOAT_MODE_P (mode))
15804 *total = COSTS_N_INSNS (ix86_cost->fchs);
15810 if (!TARGET_64BIT && mode == DImode)
15811 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15813 *total = COSTS_N_INSNS (ix86_cost->add);
15817 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15818 && XEXP (XEXP (x, 0), 1) == const1_rtx
15819 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15820 && XEXP (x, 1) == const0_rtx)
15822 /* This kind of construct is implemented using test[bwl].
15823 Treat it as if we had an AND. */
15824 *total = (COSTS_N_INSNS (ix86_cost->add)
15825 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15826 + rtx_cost (const1_rtx, outer_code));
15832 if (!TARGET_SSE_MATH
15834 || (mode == DFmode && !TARGET_SSE2))
15839 if (FLOAT_MODE_P (mode))
15840 *total = COSTS_N_INSNS (ix86_cost->fabs);
15844 if (FLOAT_MODE_P (mode))
15845 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15849 if (XINT (x, 1) == UNSPEC_TP)
15860 static int current_machopic_label_num;
15862 /* Given a symbol name and its associated stub, write out the
15863 definition of the stub. */
15866 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15868 unsigned int length;
15869 char *binder_name, *symbol_name, lazy_ptr_name[32];
15870 int label = ++current_machopic_label_num;
15872 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15873 symb = (*targetm.strip_name_encoding) (symb);
15875 length = strlen (stub);
15876 binder_name = alloca (length + 32);
15877 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15879 length = strlen (symb);
15880 symbol_name = alloca (length + 32);
15881 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15883 sprintf (lazy_ptr_name, "L%d$lz", label);
15886 machopic_picsymbol_stub_section ();
15888 machopic_symbol_stub_section ();
15890 fprintf (file, "%s:\n", stub);
15891 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15895 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15896 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15897 fprintf (file, "\tjmp %%edx\n");
15900 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15902 fprintf (file, "%s:\n", binder_name);
15906 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15907 fprintf (file, "\tpushl %%eax\n");
15910 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15912 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15914 machopic_lazy_symbol_ptr_section ();
15915 fprintf (file, "%s:\n", lazy_ptr_name);
15916 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15917 fprintf (file, "\t.long %s\n", binder_name);
15919 #endif /* TARGET_MACHO */
15921 /* Order the registers for register allocator. */
15924 x86_order_regs_for_local_alloc (void)
15929 /* First allocate the local general purpose registers. */
15930 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15931 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15932 reg_alloc_order [pos++] = i;
15934 /* Global general purpose registers. */
15935 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15936 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15937 reg_alloc_order [pos++] = i;
15939 /* x87 registers come first in case we are doing FP math
15941 if (!TARGET_SSE_MATH)
15942 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15943 reg_alloc_order [pos++] = i;
15945 /* SSE registers. */
15946 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15947 reg_alloc_order [pos++] = i;
15948 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15949 reg_alloc_order [pos++] = i;
15951 /* x87 registers. */
15952 if (TARGET_SSE_MATH)
15953 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15954 reg_alloc_order [pos++] = i;
15956 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15957 reg_alloc_order [pos++] = i;
15959 /* Initialize the rest of array as we do not allocate some registers
15961 while (pos < FIRST_PSEUDO_REGISTER)
15962 reg_alloc_order [pos++] = 0;
15965 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15966 struct attribute_spec.handler. */
15968 ix86_handle_struct_attribute (tree *node, tree name,
15969 tree args ATTRIBUTE_UNUSED,
15970 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15973 if (DECL_P (*node))
15975 if (TREE_CODE (*node) == TYPE_DECL)
15976 type = &TREE_TYPE (*node);
15981 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15982 || TREE_CODE (*type) == UNION_TYPE)))
15984 warning (OPT_Wattributes, "%qs attribute ignored",
15985 IDENTIFIER_POINTER (name));
15986 *no_add_attrs = true;
15989 else if ((is_attribute_p ("ms_struct", name)
15990 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15991 || ((is_attribute_p ("gcc_struct", name)
15992 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15994 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
15995 IDENTIFIER_POINTER (name));
15996 *no_add_attrs = true;
16003 ix86_ms_bitfield_layout_p (tree record_type)
16005 return (TARGET_MS_BITFIELD_LAYOUT &&
16006 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
16007 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
16010 /* Returns an expression indicating where the this parameter is
16011 located on entry to the FUNCTION. */
16014 x86_this_parameter (tree function)
16016 tree type = TREE_TYPE (function);
16020 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
16021 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
16024 if (ix86_function_regparm (type, function) > 0)
16028 parm = TYPE_ARG_TYPES (type);
16029 /* Figure out whether or not the function has a variable number of
16031 for (; parm; parm = TREE_CHAIN (parm))
16032 if (TREE_VALUE (parm) == void_type_node)
16034 /* If not, the this parameter is in the first argument. */
16038 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
16040 return gen_rtx_REG (SImode, regno);
16044 if (aggregate_value_p (TREE_TYPE (type), type))
16045 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
16047 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
16050 /* Determine whether x86_output_mi_thunk can succeed. */
16053 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
16054 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
16055 HOST_WIDE_INT vcall_offset, tree function)
16057 /* 64-bit can handle anything. */
16061 /* For 32-bit, everything's fine if we have one free register. */
16062 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
16065 /* Need a free register for vcall_offset. */
16069 /* Need a free register for GOT references. */
16070 if (flag_pic && !(*targetm.binds_local_p) (function))
16073 /* Otherwise ok. */
16077 /* Output the assembler code for a thunk function. THUNK_DECL is the
16078 declaration for the thunk function itself, FUNCTION is the decl for
16079 the target function. DELTA is an immediate constant offset to be
16080 added to THIS. If VCALL_OFFSET is nonzero, the word at
16081 *(*this + vcall_offset) should be added to THIS. */
16084 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
16085 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
16086 HOST_WIDE_INT vcall_offset, tree function)
16089 rtx this = x86_this_parameter (function);
16092 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
16093 pull it in now and let DELTA benefit. */
16096 else if (vcall_offset)
16098 /* Put the this parameter into %eax. */
16100 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
16101 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16104 this_reg = NULL_RTX;
16106 /* Adjust the this parameter by a fixed constant. */
16109 xops[0] = GEN_INT (delta);
16110 xops[1] = this_reg ? this_reg : this;
16113 if (!x86_64_general_operand (xops[0], DImode))
16115 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16117 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
16121 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16124 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16127 /* Adjust the this parameter by a value stored in the vtable. */
16131 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16134 int tmp_regno = 2 /* ECX */;
16135 if (lookup_attribute ("fastcall",
16136 TYPE_ATTRIBUTES (TREE_TYPE (function))))
16137 tmp_regno = 0 /* EAX */;
16138 tmp = gen_rtx_REG (SImode, tmp_regno);
16141 xops[0] = gen_rtx_MEM (Pmode, this_reg);
16144 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16146 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16148 /* Adjust the this parameter. */
16149 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
16150 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
16152 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
16153 xops[0] = GEN_INT (vcall_offset);
16155 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16156 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
16158 xops[1] = this_reg;
16160 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16162 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16165 /* If necessary, drop THIS back to its stack slot. */
16166 if (this_reg && this_reg != this)
16168 xops[0] = this_reg;
16170 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16173 xops[0] = XEXP (DECL_RTL (function), 0);
16176 if (!flag_pic || (*targetm.binds_local_p) (function))
16177 output_asm_insn ("jmp\t%P0", xops);
16180 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
16181 tmp = gen_rtx_CONST (Pmode, tmp);
16182 tmp = gen_rtx_MEM (QImode, tmp);
16184 output_asm_insn ("jmp\t%A0", xops);
16189 if (!flag_pic || (*targetm.binds_local_p) (function))
16190 output_asm_insn ("jmp\t%P0", xops);
16195 rtx sym_ref = XEXP (DECL_RTL (function), 0);
16196 tmp = (gen_rtx_SYMBOL_REF
16198 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
16199 tmp = gen_rtx_MEM (QImode, tmp);
16201 output_asm_insn ("jmp\t%0", xops);
16204 #endif /* TARGET_MACHO */
16206 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
16207 output_set_got (tmp);
16210 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
16211 output_asm_insn ("jmp\t{*}%1", xops);
16217 x86_file_start (void)
16219 default_file_start ();
16220 if (X86_FILE_START_VERSION_DIRECTIVE)
16221 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
16222 if (X86_FILE_START_FLTUSED)
16223 fputs ("\t.global\t__fltused\n", asm_out_file);
16224 if (ix86_asm_dialect == ASM_INTEL)
16225 fputs ("\t.intel_syntax\n", asm_out_file);
16229 x86_field_alignment (tree field, int computed)
16231 enum machine_mode mode;
16232 tree type = TREE_TYPE (field);
16234 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
16236 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
16237 ? get_inner_array_type (type) : type);
16238 if (mode == DFmode || mode == DCmode
16239 || GET_MODE_CLASS (mode) == MODE_INT
16240 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
16241 return MIN (32, computed);
16245 /* Output assembler code to FILE to increment profiler label # LABELNO
16246 for profiling a function entry. */
16248 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
16253 #ifndef NO_PROFILE_COUNTERS
16254 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
16256 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
16260 #ifndef NO_PROFILE_COUNTERS
16261 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
16263 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16267 #ifndef NO_PROFILE_COUNTERS
16268 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
16269 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
16271 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
16275 #ifndef NO_PROFILE_COUNTERS
16276 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
16277 PROFILE_COUNT_REGISTER);
16279 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16283 /* We don't have exact information about the insn sizes, but we may assume
16284 quite safely that we are informed about all 1 byte insns and memory
16285 address sizes. This is enough to eliminate unnecessary padding in
16289 min_insn_size (rtx insn)
16293 if (!INSN_P (insn) || !active_insn_p (insn))
16296 /* Discard alignments we've emit and jump instructions. */
16297 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
16298 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
16300 if (GET_CODE (insn) == JUMP_INSN
16301 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
16302 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
16305 /* Important case - calls are always 5 bytes.
16306 It is common to have many calls in the row. */
16307 if (GET_CODE (insn) == CALL_INSN
16308 && symbolic_reference_mentioned_p (PATTERN (insn))
16309 && !SIBLING_CALL_P (insn))
16311 if (get_attr_length (insn) <= 1)
16314 /* For normal instructions we may rely on the sizes of addresses
16315 and the presence of symbol to require 4 bytes of encoding.
16316 This is not the case for jumps where references are PC relative. */
16317 if (GET_CODE (insn) != JUMP_INSN)
16319 l = get_attr_length_address (insn);
16320 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16329 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16333 ix86_avoid_jump_misspredicts (void)
16335 rtx insn, start = get_insns ();
16336 int nbytes = 0, njumps = 0;
16339 /* Look for all minimal intervals of instructions containing 4 jumps.
16340 The intervals are bounded by START and INSN. NBYTES is the total
16341 size of instructions in the interval including INSN and not including
16342 START. When the NBYTES is smaller than 16 bytes, it is possible
16343 that the end of START and INSN ends up in the same 16byte page.
16345 The smallest offset in the page INSN can start is the case where START
16346 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16347 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16349 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16352 nbytes += min_insn_size (insn);
16354 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16355 INSN_UID (insn), min_insn_size (insn));
16356 if ((GET_CODE (insn) == JUMP_INSN
16357 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16358 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16359 || GET_CODE (insn) == CALL_INSN)
16366 start = NEXT_INSN (start);
16367 if ((GET_CODE (start) == JUMP_INSN
16368 && GET_CODE (PATTERN (start)) != ADDR_VEC
16369 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16370 || GET_CODE (start) == CALL_INSN)
16371 njumps--, isjump = 1;
16374 nbytes -= min_insn_size (start);
16376 gcc_assert (njumps >= 0);
16378 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16379 INSN_UID (start), INSN_UID (insn), nbytes);
16381 if (njumps == 3 && isjump && nbytes < 16)
16383 int padsize = 15 - nbytes + min_insn_size (insn);
16386 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16387 INSN_UID (insn), padsize);
16388 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16393 /* AMD Athlon works faster
16394 when RET is not destination of conditional jump or directly preceded
16395 by other jump instruction. We avoid the penalty by inserting NOP just
16396 before the RET instructions in such cases. */
16398 ix86_pad_returns (void)
16403 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16405 basic_block bb = e->src;
16406 rtx ret = BB_END (bb);
16408 bool replace = false;
16410 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16411 || !maybe_hot_bb_p (bb))
16413 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16414 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16416 if (prev && GET_CODE (prev) == CODE_LABEL)
16421 FOR_EACH_EDGE (e, ei, bb->preds)
16422 if (EDGE_FREQUENCY (e) && e->src->index >= 0
16423 && !(e->flags & EDGE_FALLTHRU))
16428 prev = prev_active_insn (ret);
16430 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
16431 || GET_CODE (prev) == CALL_INSN))
16433 /* Empty functions get branch mispredict even when the jump destination
16434 is not visible to us. */
16435 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
16440 emit_insn_before (gen_return_internal_long (), ret);
16446 /* Implement machine specific optimizations. We implement padding of returns
16447 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16451 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
16452 ix86_pad_returns ();
16453 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
16454 ix86_avoid_jump_misspredicts ();
16457 /* Return nonzero when QImode register that must be represented via REX prefix
16460 x86_extended_QIreg_mentioned_p (rtx insn)
16463 extract_insn_cached (insn);
16464 for (i = 0; i < recog_data.n_operands; i++)
16465 if (REG_P (recog_data.operand[i])
16466 && REGNO (recog_data.operand[i]) >= 4)
16471 /* Return nonzero when P points to register encoded via REX prefix.
16472 Called via for_each_rtx. */
16474 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
16476 unsigned int regno;
16479 regno = REGNO (*p);
16480 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
16483 /* Return true when INSN mentions register that must be encoded using REX
16486 x86_extended_reg_mentioned_p (rtx insn)
16488 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
16491 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16492 optabs would emit if we didn't have TFmode patterns. */
16495 x86_emit_floatuns (rtx operands[2])
16497 rtx neglab, donelab, i0, i1, f0, in, out;
16498 enum machine_mode mode, inmode;
16500 inmode = GET_MODE (operands[1]);
16501 gcc_assert (inmode == SImode || inmode == DImode);
16504 in = force_reg (inmode, operands[1]);
16505 mode = GET_MODE (out);
16506 neglab = gen_label_rtx ();
16507 donelab = gen_label_rtx ();
16508 i1 = gen_reg_rtx (Pmode);
16509 f0 = gen_reg_rtx (mode);
16511 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16513 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16514 emit_jump_insn (gen_jump (donelab));
16517 emit_label (neglab);
16519 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16520 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16521 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16522 expand_float (f0, i0, 0);
16523 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16525 emit_label (donelab);
16528 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16529 with all elements equal to VAR. Return true if successful. */
16532 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16533 rtx target, rtx val)
16535 enum machine_mode smode, wsmode, wvmode;
16542 if (!mmx_ok && !TARGET_SSE)
16550 val = force_reg (GET_MODE_INNER (mode), val);
16551 x = gen_rtx_VEC_DUPLICATE (mode, val);
16552 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16558 if (TARGET_SSE || TARGET_3DNOW_A)
16560 val = gen_lowpart (SImode, val);
16561 x = gen_rtx_TRUNCATE (HImode, val);
16562 x = gen_rtx_VEC_DUPLICATE (mode, x);
16563 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16592 /* Replicate the value once into the next wider mode and recurse. */
16593 val = convert_modes (wsmode, smode, val, true);
16594 x = expand_simple_binop (wsmode, ASHIFT, val,
16595 GEN_INT (GET_MODE_BITSIZE (smode)),
16596 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16597 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16599 x = gen_reg_rtx (wvmode);
16600 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16601 gcc_unreachable ();
16602 emit_move_insn (target, gen_lowpart (mode, x));
16610 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16611 whose low element is VAR, and other elements are zero. Return true
16615 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16616 rtx target, rtx var)
16618 enum machine_mode vsimode;
16625 if (!mmx_ok && !TARGET_SSE)
16631 var = force_reg (GET_MODE_INNER (mode), var);
16632 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16633 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16638 var = force_reg (GET_MODE_INNER (mode), var);
16639 x = gen_rtx_VEC_DUPLICATE (mode, var);
16640 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16641 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16646 vsimode = V4SImode;
16652 vsimode = V2SImode;
16655 /* Zero extend the variable element to SImode and recurse. */
16656 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16658 x = gen_reg_rtx (vsimode);
16659 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16660 gcc_unreachable ();
16662 emit_move_insn (target, gen_lowpart (mode, x));
16670 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16671 consisting of the values in VALS. It is known that all elements
16672 except ONE_VAR are constants. Return true if successful. */
16675 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16676 rtx target, rtx vals, int one_var)
16678 rtx var = XVECEXP (vals, 0, one_var);
16679 enum machine_mode wmode;
16682 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16683 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16691 /* For the two element vectors, it's just as easy to use
16692 the general case. */
16708 /* There's no way to set one QImode entry easily. Combine
16709 the variable value with its adjacent constant value, and
16710 promote to an HImode set. */
16711 x = XVECEXP (vals, 0, one_var ^ 1);
16714 var = convert_modes (HImode, QImode, var, true);
16715 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16716 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16717 x = GEN_INT (INTVAL (x) & 0xff);
16721 var = convert_modes (HImode, QImode, var, true);
16722 x = gen_int_mode (INTVAL (x) << 8, HImode);
16724 if (x != const0_rtx)
16725 var = expand_simple_binop (HImode, IOR, var, x, var,
16726 1, OPTAB_LIB_WIDEN);
16728 x = gen_reg_rtx (wmode);
16729 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16730 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
16732 emit_move_insn (target, gen_lowpart (mode, x));
16739 emit_move_insn (target, const_vec);
16740 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16744 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16745 all values variable, and none identical. */
16748 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16749 rtx target, rtx vals)
16751 enum machine_mode half_mode = GET_MODE_INNER (mode);
16752 rtx op0 = NULL, op1 = NULL;
16753 bool use_vec_concat = false;
16759 if (!mmx_ok && !TARGET_SSE)
16765 /* For the two element vectors, we always implement VEC_CONCAT. */
16766 op0 = XVECEXP (vals, 0, 0);
16767 op1 = XVECEXP (vals, 0, 1);
16768 use_vec_concat = true;
16772 half_mode = V2SFmode;
16775 half_mode = V2SImode;
16781 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16782 Recurse to load the two halves. */
16784 op0 = gen_reg_rtx (half_mode);
16785 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16786 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16788 op1 = gen_reg_rtx (half_mode);
16789 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16790 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16792 use_vec_concat = true;
16803 gcc_unreachable ();
16806 if (use_vec_concat)
16808 if (!register_operand (op0, half_mode))
16809 op0 = force_reg (half_mode, op0);
16810 if (!register_operand (op1, half_mode))
16811 op1 = force_reg (half_mode, op1);
16813 emit_insn (gen_rtx_SET (VOIDmode, target,
16814 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16818 int i, j, n_elts, n_words, n_elt_per_word;
16819 enum machine_mode inner_mode;
16820 rtx words[4], shift;
16822 inner_mode = GET_MODE_INNER (mode);
16823 n_elts = GET_MODE_NUNITS (mode);
16824 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16825 n_elt_per_word = n_elts / n_words;
16826 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16828 for (i = 0; i < n_words; ++i)
16830 rtx word = NULL_RTX;
16832 for (j = 0; j < n_elt_per_word; ++j)
16834 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16835 elt = convert_modes (word_mode, inner_mode, elt, true);
16841 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16842 word, 1, OPTAB_LIB_WIDEN);
16843 word = expand_simple_binop (word_mode, IOR, word, elt,
16844 word, 1, OPTAB_LIB_WIDEN);
16852 emit_move_insn (target, gen_lowpart (mode, words[0]));
16853 else if (n_words == 2)
16855 rtx tmp = gen_reg_rtx (mode);
16856 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16857 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16858 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16859 emit_move_insn (target, tmp);
16861 else if (n_words == 4)
16863 rtx tmp = gen_reg_rtx (V4SImode);
16864 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16865 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
16866 emit_move_insn (target, gen_lowpart (mode, tmp));
16869 gcc_unreachable ();
16873 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16874 instructions unless MMX_OK is true. */
16877 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16879 enum machine_mode mode = GET_MODE (target);
16880 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16881 int n_elts = GET_MODE_NUNITS (mode);
16882 int n_var = 0, one_var = -1;
16883 bool all_same = true, all_const_zero = true;
16887 for (i = 0; i < n_elts; ++i)
16889 x = XVECEXP (vals, 0, i);
16890 if (!CONSTANT_P (x))
16891 n_var++, one_var = i;
16892 else if (x != CONST0_RTX (inner_mode))
16893 all_const_zero = false;
16894 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16898 /* Constants are best loaded from the constant pool. */
16901 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16905 /* If all values are identical, broadcast the value. */
16907 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16908 XVECEXP (vals, 0, 0)))
16911 /* Values where only one field is non-constant are best loaded from
16912 the pool and overwritten via move later. */
16915 if (all_const_zero && one_var == 0
16916 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16917 XVECEXP (vals, 0, 0)))
16920 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16924 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16928 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16930 enum machine_mode mode = GET_MODE (target);
16931 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16932 bool use_vec_merge = false;
16941 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
16942 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
16944 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
16946 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
16947 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16957 /* For the two element vectors, we implement a VEC_CONCAT with
16958 the extraction of the other element. */
16960 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
16961 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
16964 op0 = val, op1 = tmp;
16966 op0 = tmp, op1 = val;
16968 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
16969 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16977 use_vec_merge = true;
16981 /* tmp = target = A B C D */
16982 tmp = copy_to_reg (target);
16983 /* target = A A B B */
16984 emit_insn (gen_sse_unpcklps (target, target, target));
16985 /* target = X A B B */
16986 ix86_expand_vector_set (false, target, val, 0);
16987 /* target = A X C D */
16988 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16989 GEN_INT (1), GEN_INT (0),
16990 GEN_INT (2+4), GEN_INT (3+4)));
16994 /* tmp = target = A B C D */
16995 tmp = copy_to_reg (target);
16996 /* tmp = X B C D */
16997 ix86_expand_vector_set (false, tmp, val, 0);
16998 /* target = A B X D */
16999 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17000 GEN_INT (0), GEN_INT (1),
17001 GEN_INT (0+4), GEN_INT (3+4)));
17005 /* tmp = target = A B C D */
17006 tmp = copy_to_reg (target);
17007 /* tmp = X B C D */
17008 ix86_expand_vector_set (false, tmp, val, 0);
17009 /* target = A B X D */
17010 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17011 GEN_INT (0), GEN_INT (1),
17012 GEN_INT (2+4), GEN_INT (0+4)));
17016 gcc_unreachable ();
17021 /* Element 0 handled by vec_merge below. */
17024 use_vec_merge = true;
17030 /* With SSE2, use integer shuffles to swap element 0 and ELT,
17031 store into element 0, then shuffle them back. */
17035 order[0] = GEN_INT (elt);
17036 order[1] = const1_rtx;
17037 order[2] = const2_rtx;
17038 order[3] = GEN_INT (3);
17039 order[elt] = const0_rtx;
17041 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17042 order[1], order[2], order[3]));
17044 ix86_expand_vector_set (false, target, val, 0);
17046 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17047 order[1], order[2], order[3]));
17051 /* For SSE1, we have to reuse the V4SF code. */
17052 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
17053 gen_lowpart (SFmode, val), elt);
17058 use_vec_merge = TARGET_SSE2;
17061 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17072 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
17073 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
17074 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17078 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17080 emit_move_insn (mem, target);
17082 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17083 emit_move_insn (tmp, val);
17085 emit_move_insn (target, mem);
17090 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
17092 enum machine_mode mode = GET_MODE (vec);
17093 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17094 bool use_vec_extr = false;
17107 use_vec_extr = true;
17119 tmp = gen_reg_rtx (mode);
17120 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
17121 GEN_INT (elt), GEN_INT (elt),
17122 GEN_INT (elt+4), GEN_INT (elt+4)));
17126 tmp = gen_reg_rtx (mode);
17127 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
17131 gcc_unreachable ();
17134 use_vec_extr = true;
17149 tmp = gen_reg_rtx (mode);
17150 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
17151 GEN_INT (elt), GEN_INT (elt),
17152 GEN_INT (elt), GEN_INT (elt)));
17156 tmp = gen_reg_rtx (mode);
17157 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
17161 gcc_unreachable ();
17164 use_vec_extr = true;
17169 /* For SSE1, we have to reuse the V4SF code. */
17170 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
17171 gen_lowpart (V4SFmode, vec), elt);
17177 use_vec_extr = TARGET_SSE2;
17180 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17185 /* ??? Could extract the appropriate HImode element and shift. */
17192 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
17193 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
17195 /* Let the rtl optimizers know about the zero extension performed. */
17196 if (inner_mode == HImode)
17198 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
17199 target = gen_lowpart (SImode, target);
17202 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17206 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17208 emit_move_insn (mem, vec);
17210 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17211 emit_move_insn (target, tmp);
17215 /* Implements target hook vector_mode_supported_p. */
17217 ix86_vector_mode_supported_p (enum machine_mode mode)
17219 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
17221 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
17223 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
17225 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
17230 /* Worker function for TARGET_MD_ASM_CLOBBERS.
17232 We do this in the new i386 backend to maintain source compatibility
17233 with the old cc0-based compiler. */
17236 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
17237 tree inputs ATTRIBUTE_UNUSED,
17240 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
17242 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
17244 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
17249 /* Worker function for REVERSE_CONDITION. */
17252 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
17254 return (mode != CCFPmode && mode != CCFPUmode
17255 ? reverse_condition (code)
17256 : reverse_condition_maybe_unordered (code));
17259 /* Output code to perform an x87 FP register move, from OPERANDS[1]
17263 output_387_reg_move (rtx insn, rtx *operands)
17265 if (REG_P (operands[1])
17266 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17268 if (REGNO (operands[0]) == FIRST_STACK_REG
17269 && TARGET_USE_FFREEP)
17270 return "ffreep\t%y0";
17271 return "fstp\t%y0";
17273 if (STACK_TOP_P (operands[0]))
17274 return "fld%z1\t%y1";
17278 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17279 FP status register is set. */
17282 ix86_emit_fp_unordered_jump (rtx label)
17284 rtx reg = gen_reg_rtx (HImode);
17287 emit_insn (gen_x86_fnstsw_1 (reg));
17289 if (TARGET_USE_SAHF)
17291 emit_insn (gen_x86_sahf_1 (reg));
17293 temp = gen_rtx_REG (CCmode, FLAGS_REG);
17294 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
17298 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
17300 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17301 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
17304 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
17305 gen_rtx_LABEL_REF (VOIDmode, label),
17307 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
17308 emit_jump_insn (temp);
17311 /* Output code to perform a log1p XFmode calculation. */
17313 void ix86_emit_i387_log1p (rtx op0, rtx op1)
17315 rtx label1 = gen_label_rtx ();
17316 rtx label2 = gen_label_rtx ();
17318 rtx tmp = gen_reg_rtx (XFmode);
17319 rtx tmp2 = gen_reg_rtx (XFmode);
17321 emit_insn (gen_absxf2 (tmp, op1));
17322 emit_insn (gen_cmpxf (tmp,
17323 CONST_DOUBLE_FROM_REAL_VALUE (
17324 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
17326 emit_jump_insn (gen_bge (label1));
17328 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17329 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17330 emit_jump (label2);
17332 emit_label (label1);
17333 emit_move_insn (tmp, CONST1_RTX (XFmode));
17334 emit_insn (gen_addxf3 (tmp, op1, tmp));
17335 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17336 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17338 emit_label (label2);
17341 /* Solaris named-section hook. Parameters are as for
17342 named_section_real. */
17345 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17348 /* With Binutils 2.15, the "@unwind" marker must be specified on
17349 every occurrence of the ".eh_frame" section, not just the first
17352 && strcmp (name, ".eh_frame") == 0)
17354 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
17355 flags & SECTION_WRITE ? "aw" : "a");
17358 default_elf_asm_named_section (name, flags, decl);
17361 #include "gt-i386.h"