1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
569 const int x86_sse_split_regs = m_ATHLON_K8;
570 const int x86_sse_typeless_stores = m_ATHLON_K8;
571 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
572 const int x86_use_ffreep = m_ATHLON_K8;
573 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
575 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
576 integer data in xmm registers. Which results in pretty abysmal code. */
577 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
579 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
580 /* Some CPU cores are not able to predict more than 4 branch instructions in
581 the 16 byte window. */
582 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
583 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
584 const int x86_use_bt = m_ATHLON_K8;
586 /* In case the average insn count for single function invocation is
587 lower than this constant, emit fast (but longer) prologue and
589 #define FAST_PROLOGUE_INSN_COUNT 20
591 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
592 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
593 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
594 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
596 /* Array of the smallest class containing reg number REGNO, indexed by
597 REGNO. Used by REGNO_REG_CLASS in i386.h. */
599 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
602 AREG, DREG, CREG, BREG,
604 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
606 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
607 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
610 /* flags, fpsr, dirflag, frame */
611 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
612 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
614 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
616 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
617 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
618 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
622 /* The "default" register map used in 32bit mode. */
624 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
626 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
627 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
629 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
630 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
631 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
632 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
635 static int const x86_64_int_parameter_registers[6] =
637 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
638 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
641 static int const x86_64_int_return_registers[4] =
643 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
646 /* The "default" register map used in 64bit mode. */
647 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
649 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
650 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
651 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
652 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
653 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
654 8,9,10,11,12,13,14,15, /* extended integer registers */
655 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
658 /* Define the register numbers to be used in Dwarf debugging information.
659 The SVR4 reference port C compiler uses the following register numbers
660 in its Dwarf output code:
661 0 for %eax (gcc regno = 0)
662 1 for %ecx (gcc regno = 2)
663 2 for %edx (gcc regno = 1)
664 3 for %ebx (gcc regno = 3)
665 4 for %esp (gcc regno = 7)
666 5 for %ebp (gcc regno = 6)
667 6 for %esi (gcc regno = 4)
668 7 for %edi (gcc regno = 5)
669 The following three DWARF register numbers are never generated by
670 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
671 believes these numbers have these meanings.
672 8 for %eip (no gcc equivalent)
673 9 for %eflags (gcc regno = 17)
674 10 for %trapno (no gcc equivalent)
675 It is not at all clear how we should number the FP stack registers
676 for the x86 architecture. If the version of SDB on x86/svr4 were
677 a bit less brain dead with respect to floating-point then we would
678 have a precedent to follow with respect to DWARF register numbers
679 for x86 FP registers, but the SDB on x86/svr4 is so completely
680 broken with respect to FP registers that it is hardly worth thinking
681 of it as something to strive for compatibility with.
682 The version of x86/svr4 SDB I have at the moment does (partially)
683 seem to believe that DWARF register number 11 is associated with
684 the x86 register %st(0), but that's about all. Higher DWARF
685 register numbers don't seem to be associated with anything in
686 particular, and even for DWARF regno 11, SDB only seems to under-
687 stand that it should say that a variable lives in %st(0) (when
688 asked via an `=' command) if we said it was in DWARF regno 11,
689 but SDB still prints garbage when asked for the value of the
690 variable in question (via a `/' command).
691 (Also note that the labels SDB prints for various FP stack regs
692 when doing an `x' command are all wrong.)
693 Note that these problems generally don't affect the native SVR4
694 C compiler because it doesn't allow the use of -O with -g and
695 because when it is *not* optimizing, it allocates a memory
696 location for each floating-point variable, and the memory
697 location is what gets described in the DWARF AT_location
698 attribute for the variable in question.
699 Regardless of the severe mental illness of the x86/svr4 SDB, we
700 do something sensible here and we use the following DWARF
701 register numbers. Note that these are all stack-top-relative
703 11 for %st(0) (gcc regno = 8)
704 12 for %st(1) (gcc regno = 9)
705 13 for %st(2) (gcc regno = 10)
706 14 for %st(3) (gcc regno = 11)
707 15 for %st(4) (gcc regno = 12)
708 16 for %st(5) (gcc regno = 13)
709 17 for %st(6) (gcc regno = 14)
710 18 for %st(7) (gcc regno = 15)
712 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
714 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
715 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
716 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
717 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
718 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
719 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
723 /* Test and compare insns in i386.md store the information needed to
724 generate branch and scc insns here. */
726 rtx ix86_compare_op0 = NULL_RTX;
727 rtx ix86_compare_op1 = NULL_RTX;
729 #define MAX_386_STACK_LOCALS 3
730 /* Size of the register save area. */
731 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
733 /* Define the structure for the machine field in struct function. */
735 struct stack_local_entry GTY(())
740 struct stack_local_entry *next;
743 /* Structure describing stack frame layout.
744 Stack grows downward:
750 saved frame pointer if frame_pointer_needed
751 <- HARD_FRAME_POINTER
757 > to_allocate <- FRAME_POINTER
769 int outgoing_arguments_size;
772 HOST_WIDE_INT to_allocate;
773 /* The offsets relative to ARG_POINTER. */
774 HOST_WIDE_INT frame_pointer_offset;
775 HOST_WIDE_INT hard_frame_pointer_offset;
776 HOST_WIDE_INT stack_pointer_offset;
778 /* When save_regs_using_mov is set, emit prologue using
779 move instead of push instructions. */
780 bool save_regs_using_mov;
783 /* Used to enable/disable debugging features. */
784 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
785 /* Code model option as passed by user. */
786 const char *ix86_cmodel_string;
788 enum cmodel ix86_cmodel;
790 const char *ix86_asm_string;
791 enum asm_dialect ix86_asm_dialect = ASM_ATT;
793 const char *ix86_tls_dialect_string;
794 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
796 /* Which unit we are generating floating point math for. */
797 enum fpmath_unit ix86_fpmath;
799 /* Which cpu are we scheduling for. */
800 enum processor_type ix86_tune;
801 /* Which instruction set architecture to use. */
802 enum processor_type ix86_arch;
804 /* Strings to hold which cpu and instruction set architecture to use. */
805 const char *ix86_tune_string; /* for -mtune=<xxx> */
806 const char *ix86_arch_string; /* for -march=<xxx> */
807 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
809 /* # of registers to use to pass arguments. */
810 const char *ix86_regparm_string;
812 /* true if sse prefetch instruction is not NOOP. */
813 int x86_prefetch_sse;
815 /* ix86_regparm_string as a number */
818 /* Alignment to use for loops and jumps: */
820 /* Power of two alignment for loops. */
821 const char *ix86_align_loops_string;
823 /* Power of two alignment for non-loop jumps. */
824 const char *ix86_align_jumps_string;
826 /* Power of two alignment for stack boundary in bytes. */
827 const char *ix86_preferred_stack_boundary_string;
829 /* Preferred alignment for stack boundary in bits. */
830 unsigned int ix86_preferred_stack_boundary;
832 /* Values 1-5: see jump.c */
833 int ix86_branch_cost;
834 const char *ix86_branch_cost_string;
836 /* Power of two alignment for functions. */
837 const char *ix86_align_funcs_string;
839 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
840 char internal_label_prefix[16];
841 int internal_label_prefix_len;
843 static void output_pic_addr_const (FILE *, rtx, int);
844 static void put_condition_code (enum rtx_code, enum machine_mode,
846 static const char *get_some_local_dynamic_name (void);
847 static int get_some_local_dynamic_name_1 (rtx *, void *);
848 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
849 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
851 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
852 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
854 static rtx get_thread_pointer (int);
855 static rtx legitimize_tls_address (rtx, enum tls_model, int);
856 static void get_pc_thunk_name (char [32], unsigned int);
857 static rtx gen_push (rtx);
858 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
859 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
860 static struct machine_function * ix86_init_machine_status (void);
861 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
862 static int ix86_nsaved_regs (void);
863 static void ix86_emit_save_regs (void);
864 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
865 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
866 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
867 static HOST_WIDE_INT ix86_GOT_alias_set (void);
868 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
869 static rtx ix86_expand_aligntest (rtx, int);
870 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
871 static int ix86_issue_rate (void);
872 static int ix86_adjust_cost (rtx, rtx, rtx, int);
873 static int ia32_multipass_dfa_lookahead (void);
874 static void ix86_init_mmx_sse_builtins (void);
875 static rtx x86_this_parameter (tree);
876 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
877 HOST_WIDE_INT, tree);
878 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
879 static void x86_file_start (void);
880 static void ix86_reorg (void);
881 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
882 static tree ix86_build_builtin_va_list (void);
883 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
885 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
886 static bool ix86_vector_mode_supported_p (enum machine_mode);
888 static int ix86_address_cost (rtx);
889 static bool ix86_cannot_force_const_mem (rtx);
890 static rtx ix86_delegitimize_address (rtx);
892 struct builtin_description;
893 static rtx ix86_expand_sse_comi (const struct builtin_description *,
895 static rtx ix86_expand_sse_compare (const struct builtin_description *,
897 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
898 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
899 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
900 static rtx ix86_expand_store_builtin (enum insn_code, tree);
901 static rtx safe_vector_operand (rtx, enum machine_mode);
902 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
903 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
904 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
905 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
906 static int ix86_fp_comparison_cost (enum rtx_code code);
907 static unsigned int ix86_select_alt_pic_regnum (void);
908 static int ix86_save_reg (unsigned int, int);
909 static void ix86_compute_frame_layout (struct ix86_frame *);
910 static int ix86_comp_type_attributes (tree, tree);
911 static int ix86_function_regparm (tree, tree);
912 const struct attribute_spec ix86_attribute_table[];
913 static bool ix86_function_ok_for_sibcall (tree, tree);
914 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
915 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
916 static int ix86_value_regno (enum machine_mode);
917 static bool contains_128bit_aligned_vector_p (tree);
918 static rtx ix86_struct_value_rtx (tree, int);
919 static bool ix86_ms_bitfield_layout_p (tree);
920 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
921 static int extended_reg_mentioned_1 (rtx *, void *);
922 static bool ix86_rtx_costs (rtx, int, int, int *);
923 static int min_insn_size (rtx);
924 static tree ix86_md_asm_clobbers (tree clobbers);
925 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
926 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
928 static void ix86_init_builtins (void);
929 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
931 /* This function is only used on Solaris. */
932 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
935 /* Register class used for passing given 64bit part of the argument.
936 These represent classes as documented by the PS ABI, with the exception
937 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
938 use SF or DFmode move instead of DImode to avoid reformatting penalties.
940 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
941 whenever possible (upper half does contain padding).
943 enum x86_64_reg_class
946 X86_64_INTEGER_CLASS,
947 X86_64_INTEGERSI_CLASS,
954 X86_64_COMPLEX_X87_CLASS,
957 static const char * const x86_64_reg_class_name[] = {
958 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
959 "sseup", "x87", "x87up", "cplx87", "no"
962 #define MAX_CLASSES 4
964 /* Table of constants used by fldpi, fldln2, etc.... */
965 static REAL_VALUE_TYPE ext_80387_constants_table [5];
966 static bool ext_80387_constants_init = 0;
967 static void init_ext_80387_constants (void);
969 /* Initialize the GCC target structure. */
970 #undef TARGET_ATTRIBUTE_TABLE
971 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
972 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
973 # undef TARGET_MERGE_DECL_ATTRIBUTES
974 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
977 #undef TARGET_COMP_TYPE_ATTRIBUTES
978 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
980 #undef TARGET_INIT_BUILTINS
981 #define TARGET_INIT_BUILTINS ix86_init_builtins
982 #undef TARGET_EXPAND_BUILTIN
983 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
985 #undef TARGET_ASM_FUNCTION_EPILOGUE
986 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
988 #undef TARGET_ASM_OPEN_PAREN
989 #define TARGET_ASM_OPEN_PAREN ""
990 #undef TARGET_ASM_CLOSE_PAREN
991 #define TARGET_ASM_CLOSE_PAREN ""
993 #undef TARGET_ASM_ALIGNED_HI_OP
994 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
995 #undef TARGET_ASM_ALIGNED_SI_OP
996 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
998 #undef TARGET_ASM_ALIGNED_DI_OP
999 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1002 #undef TARGET_ASM_UNALIGNED_HI_OP
1003 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1004 #undef TARGET_ASM_UNALIGNED_SI_OP
1005 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1006 #undef TARGET_ASM_UNALIGNED_DI_OP
1007 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1009 #undef TARGET_SCHED_ADJUST_COST
1010 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1011 #undef TARGET_SCHED_ISSUE_RATE
1012 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1013 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1014 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1015 ia32_multipass_dfa_lookahead
1017 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1018 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1021 #undef TARGET_HAVE_TLS
1022 #define TARGET_HAVE_TLS true
1024 #undef TARGET_CANNOT_FORCE_CONST_MEM
1025 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1027 #undef TARGET_DELEGITIMIZE_ADDRESS
1028 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1030 #undef TARGET_MS_BITFIELD_LAYOUT_P
1031 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1033 #undef TARGET_ASM_OUTPUT_MI_THUNK
1034 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1035 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1036 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1038 #undef TARGET_ASM_FILE_START
1039 #define TARGET_ASM_FILE_START x86_file_start
1041 #undef TARGET_RTX_COSTS
1042 #define TARGET_RTX_COSTS ix86_rtx_costs
1043 #undef TARGET_ADDRESS_COST
1044 #define TARGET_ADDRESS_COST ix86_address_cost
1046 #undef TARGET_FIXED_CONDITION_CODE_REGS
1047 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1048 #undef TARGET_CC_MODES_COMPATIBLE
1049 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1051 #undef TARGET_MACHINE_DEPENDENT_REORG
1052 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1054 #undef TARGET_BUILD_BUILTIN_VA_LIST
1055 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1057 #undef TARGET_MD_ASM_CLOBBERS
1058 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1060 #undef TARGET_PROMOTE_PROTOTYPES
1061 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1062 #undef TARGET_STRUCT_VALUE_RTX
1063 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1064 #undef TARGET_SETUP_INCOMING_VARARGS
1065 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1066 #undef TARGET_MUST_PASS_IN_STACK
1067 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1068 #undef TARGET_PASS_BY_REFERENCE
1069 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1071 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1072 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1074 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1075 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1077 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1078 #undef TARGET_INSERT_ATTRIBUTES
1079 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1082 struct gcc_target targetm = TARGET_INITIALIZER;
1085 /* The svr4 ABI for the i386 says that records and unions are returned
1087 #ifndef DEFAULT_PCC_STRUCT_RETURN
1088 #define DEFAULT_PCC_STRUCT_RETURN 1
1091 /* Sometimes certain combinations of command options do not make
1092 sense on a particular target machine. You can define a macro
1093 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1094 defined, is executed once just after all the command options have
1097 Don't use this macro to turn on various extra optimizations for
1098 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1101 override_options (void)
1104 int ix86_tune_defaulted = 0;
1106 /* Comes from final.c -- no real reason to change it. */
1107 #define MAX_CODE_ALIGN 16
1111 const struct processor_costs *cost; /* Processor costs */
1112 const int target_enable; /* Target flags to enable. */
1113 const int target_disable; /* Target flags to disable. */
1114 const int align_loop; /* Default alignments. */
1115 const int align_loop_max_skip;
1116 const int align_jump;
1117 const int align_jump_max_skip;
1118 const int align_func;
1120 const processor_target_table[PROCESSOR_max] =
1122 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1123 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1124 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1125 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1126 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1127 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1128 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1129 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1130 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1133 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1136 const char *const name; /* processor name or nickname. */
1137 const enum processor_type processor;
1138 const enum pta_flags
1144 PTA_PREFETCH_SSE = 16,
1150 const processor_alias_table[] =
1152 {"i386", PROCESSOR_I386, 0},
1153 {"i486", PROCESSOR_I486, 0},
1154 {"i586", PROCESSOR_PENTIUM, 0},
1155 {"pentium", PROCESSOR_PENTIUM, 0},
1156 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1157 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1158 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1159 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1160 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1161 {"i686", PROCESSOR_PENTIUMPRO, 0},
1162 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1163 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1164 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1165 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1166 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1167 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1170 | PTA_MMX | PTA_PREFETCH_SSE},
1171 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1172 | PTA_MMX | PTA_PREFETCH_SSE},
1173 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"k6", PROCESSOR_K6, PTA_MMX},
1176 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1177 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1178 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1180 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1181 | PTA_3DNOW | PTA_3DNOW_A},
1182 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1183 | PTA_3DNOW_A | PTA_SSE},
1184 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1185 | PTA_3DNOW_A | PTA_SSE},
1186 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1187 | PTA_3DNOW_A | PTA_SSE},
1188 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1189 | PTA_SSE | PTA_SSE2 },
1190 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1192 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1195 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1196 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1200 int const pta_size = ARRAY_SIZE (processor_alias_table);
1202 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1203 SUBTARGET_OVERRIDE_OPTIONS;
1206 /* Set the default values for switches whose default depends on TARGET_64BIT
1207 in case they weren't overwritten by command line options. */
1210 if (flag_omit_frame_pointer == 2)
1211 flag_omit_frame_pointer = 1;
1212 if (flag_asynchronous_unwind_tables == 2)
1213 flag_asynchronous_unwind_tables = 1;
1214 if (flag_pcc_struct_return == 2)
1215 flag_pcc_struct_return = 0;
1219 if (flag_omit_frame_pointer == 2)
1220 flag_omit_frame_pointer = 0;
1221 if (flag_asynchronous_unwind_tables == 2)
1222 flag_asynchronous_unwind_tables = 0;
1223 if (flag_pcc_struct_return == 2)
1224 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1227 if (!ix86_tune_string && ix86_arch_string)
1228 ix86_tune_string = ix86_arch_string;
1229 if (!ix86_tune_string)
1231 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1232 ix86_tune_defaulted = 1;
1234 if (!ix86_arch_string)
1235 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1237 if (ix86_cmodel_string != 0)
1239 if (!strcmp (ix86_cmodel_string, "small"))
1240 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1242 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1243 else if (!strcmp (ix86_cmodel_string, "32"))
1244 ix86_cmodel = CM_32;
1245 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1246 ix86_cmodel = CM_KERNEL;
1247 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1248 ix86_cmodel = CM_MEDIUM;
1249 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1250 ix86_cmodel = CM_LARGE;
1252 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1256 ix86_cmodel = CM_32;
1258 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1260 if (ix86_asm_string != 0)
1262 if (!strcmp (ix86_asm_string, "intel"))
1263 ix86_asm_dialect = ASM_INTEL;
1264 else if (!strcmp (ix86_asm_string, "att"))
1265 ix86_asm_dialect = ASM_ATT;
1267 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1269 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1270 error ("code model %qs not supported in the %s bit mode",
1271 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1272 if (ix86_cmodel == CM_LARGE)
1273 sorry ("code model %<large%> not supported yet");
1274 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1275 sorry ("%i-bit mode not compiled in",
1276 (target_flags & MASK_64BIT) ? 64 : 32);
1278 for (i = 0; i < pta_size; i++)
1279 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1281 ix86_arch = processor_alias_table[i].processor;
1282 /* Default cpu tuning to the architecture. */
1283 ix86_tune = ix86_arch;
1284 if (processor_alias_table[i].flags & PTA_MMX
1285 && !(target_flags_explicit & MASK_MMX))
1286 target_flags |= MASK_MMX;
1287 if (processor_alias_table[i].flags & PTA_3DNOW
1288 && !(target_flags_explicit & MASK_3DNOW))
1289 target_flags |= MASK_3DNOW;
1290 if (processor_alias_table[i].flags & PTA_3DNOW_A
1291 && !(target_flags_explicit & MASK_3DNOW_A))
1292 target_flags |= MASK_3DNOW_A;
1293 if (processor_alias_table[i].flags & PTA_SSE
1294 && !(target_flags_explicit & MASK_SSE))
1295 target_flags |= MASK_SSE;
1296 if (processor_alias_table[i].flags & PTA_SSE2
1297 && !(target_flags_explicit & MASK_SSE2))
1298 target_flags |= MASK_SSE2;
1299 if (processor_alias_table[i].flags & PTA_SSE3
1300 && !(target_flags_explicit & MASK_SSE3))
1301 target_flags |= MASK_SSE3;
1302 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1303 x86_prefetch_sse = true;
1304 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1305 error ("CPU you selected does not support x86-64 "
1311 error ("bad value (%s) for -march= switch", ix86_arch_string);
1313 for (i = 0; i < pta_size; i++)
1314 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1316 ix86_tune = processor_alias_table[i].processor;
1317 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1319 if (ix86_tune_defaulted)
1321 ix86_tune_string = "x86-64";
1322 for (i = 0; i < pta_size; i++)
1323 if (! strcmp (ix86_tune_string,
1324 processor_alias_table[i].name))
1326 ix86_tune = processor_alias_table[i].processor;
1329 error ("CPU you selected does not support x86-64 "
1332 /* Intel CPUs have always interpreted SSE prefetch instructions as
1333 NOPs; so, we can enable SSE prefetch instructions even when
1334 -mtune (rather than -march) points us to a processor that has them.
1335 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1336 higher processors. */
1337 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1338 x86_prefetch_sse = true;
1342 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1345 ix86_cost = &size_cost;
1347 ix86_cost = processor_target_table[ix86_tune].cost;
1348 target_flags |= processor_target_table[ix86_tune].target_enable;
1349 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1351 /* Arrange to set up i386_stack_locals for all functions. */
1352 init_machine_status = ix86_init_machine_status;
1354 /* Validate -mregparm= value. */
1355 if (ix86_regparm_string)
1357 i = atoi (ix86_regparm_string);
1358 if (i < 0 || i > REGPARM_MAX)
1359 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1365 ix86_regparm = REGPARM_MAX;
1367 /* If the user has provided any of the -malign-* options,
1368 warn and use that value only if -falign-* is not set.
1369 Remove this code in GCC 3.2 or later. */
1370 if (ix86_align_loops_string)
1372 warning ("-malign-loops is obsolete, use -falign-loops");
1373 if (align_loops == 0)
1375 i = atoi (ix86_align_loops_string);
1376 if (i < 0 || i > MAX_CODE_ALIGN)
1377 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1379 align_loops = 1 << i;
1383 if (ix86_align_jumps_string)
1385 warning ("-malign-jumps is obsolete, use -falign-jumps");
1386 if (align_jumps == 0)
1388 i = atoi (ix86_align_jumps_string);
1389 if (i < 0 || i > MAX_CODE_ALIGN)
1390 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1392 align_jumps = 1 << i;
1396 if (ix86_align_funcs_string)
1398 warning ("-malign-functions is obsolete, use -falign-functions");
1399 if (align_functions == 0)
1401 i = atoi (ix86_align_funcs_string);
1402 if (i < 0 || i > MAX_CODE_ALIGN)
1403 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1405 align_functions = 1 << i;
1409 /* Default align_* from the processor table. */
1410 if (align_loops == 0)
1412 align_loops = processor_target_table[ix86_tune].align_loop;
1413 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1415 if (align_jumps == 0)
1417 align_jumps = processor_target_table[ix86_tune].align_jump;
1418 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1420 if (align_functions == 0)
1422 align_functions = processor_target_table[ix86_tune].align_func;
1425 /* Validate -mpreferred-stack-boundary= value, or provide default.
1426 The default of 128 bits is for Pentium III's SSE __m128, but we
1427 don't want additional code to keep the stack aligned when
1428 optimizing for code size. */
1429 ix86_preferred_stack_boundary = (optimize_size
1430 ? TARGET_64BIT ? 128 : 32
1432 if (ix86_preferred_stack_boundary_string)
1434 i = atoi (ix86_preferred_stack_boundary_string);
1435 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1436 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1437 TARGET_64BIT ? 4 : 2);
1439 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1442 /* Validate -mbranch-cost= value, or provide default. */
1443 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1444 if (ix86_branch_cost_string)
1446 i = atoi (ix86_branch_cost_string);
1448 error ("-mbranch-cost=%d is not between 0 and 5", i);
1450 ix86_branch_cost = i;
1453 if (ix86_tls_dialect_string)
1455 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1456 ix86_tls_dialect = TLS_DIALECT_GNU;
1457 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1458 ix86_tls_dialect = TLS_DIALECT_SUN;
1460 error ("bad value (%s) for -mtls-dialect= switch",
1461 ix86_tls_dialect_string);
1464 /* Keep nonleaf frame pointers. */
1465 if (flag_omit_frame_pointer)
1466 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1467 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1468 flag_omit_frame_pointer = 1;
1470 /* If we're doing fast math, we don't care about comparison order
1471 wrt NaNs. This lets us use a shorter comparison sequence. */
1472 if (flag_unsafe_math_optimizations)
1473 target_flags &= ~MASK_IEEE_FP;
1475 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1476 since the insns won't need emulation. */
1477 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1478 target_flags &= ~MASK_NO_FANCY_MATH_387;
1480 /* Likewise, if the target doesn't have a 387, or we've specified
1481 software floating point, don't use 387 inline instrinsics. */
1483 target_flags |= MASK_NO_FANCY_MATH_387;
1485 /* Turn on SSE2 builtins for -msse3. */
1487 target_flags |= MASK_SSE2;
1489 /* Turn on SSE builtins for -msse2. */
1491 target_flags |= MASK_SSE;
1493 /* Turn on MMX builtins for -msse. */
1496 target_flags |= MASK_MMX & ~target_flags_explicit;
1497 x86_prefetch_sse = true;
1500 /* Turn on MMX builtins for 3Dnow. */
1502 target_flags |= MASK_MMX;
1506 if (TARGET_ALIGN_DOUBLE)
1507 error ("-malign-double makes no sense in the 64bit mode");
1509 error ("-mrtd calling convention not supported in the 64bit mode");
1511 /* Enable by default the SSE and MMX builtins. Do allow the user to
1512 explicitly disable any of these. In particular, disabling SSE and
1513 MMX for kernel code is extremely useful. */
1515 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1516 & ~target_flags_explicit);
1519 ix86_fpmath = FPMATH_SSE;
1523 ix86_fpmath = FPMATH_387;
1524 /* i386 ABI does not specify red zone. It still makes sense to use it
1525 when programmer takes care to stack from being destroyed. */
1526 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1527 target_flags |= MASK_NO_RED_ZONE;
1530 if (ix86_fpmath_string != 0)
1532 if (! strcmp (ix86_fpmath_string, "387"))
1533 ix86_fpmath = FPMATH_387;
1534 else if (! strcmp (ix86_fpmath_string, "sse"))
1538 warning ("SSE instruction set disabled, using 387 arithmetics");
1539 ix86_fpmath = FPMATH_387;
1542 ix86_fpmath = FPMATH_SSE;
1544 else if (! strcmp (ix86_fpmath_string, "387,sse")
1545 || ! strcmp (ix86_fpmath_string, "sse,387"))
1549 warning ("SSE instruction set disabled, using 387 arithmetics");
1550 ix86_fpmath = FPMATH_387;
1552 else if (!TARGET_80387)
1554 warning ("387 instruction set disabled, using SSE arithmetics");
1555 ix86_fpmath = FPMATH_SSE;
1558 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1561 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1564 /* If the i387 is disabled, then do not return values in it. */
1566 target_flags &= ~MASK_FLOAT_RETURNS;
1568 if ((x86_accumulate_outgoing_args & TUNEMASK)
1569 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1571 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1573 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1576 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1577 p = strchr (internal_label_prefix, 'X');
1578 internal_label_prefix_len = p - internal_label_prefix;
1582 /* When scheduling description is not available, disable scheduler pass
1583 so it won't slow down the compilation and make x87 code slower. */
1584 if (!TARGET_SCHEDULE)
1585 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1589 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1591 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1592 make the problem with not enough registers even worse. */
1593 #ifdef INSN_SCHEDULING
1595 flag_schedule_insns = 0;
1598 /* The default values of these switches depend on the TARGET_64BIT
1599 that is not known at this moment. Mark these values with 2 and
1600 let user the to override these. In case there is no command line option
1601 specifying them, we will set the defaults in override_options. */
1603 flag_omit_frame_pointer = 2;
1604 flag_pcc_struct_return = 2;
1605 flag_asynchronous_unwind_tables = 2;
1606 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1607 SUBTARGET_OPTIMIZATION_OPTIONS;
1611 /* Table of valid machine attributes. */
1612 const struct attribute_spec ix86_attribute_table[] =
1614 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1615 /* Stdcall attribute says callee is responsible for popping arguments
1616 if they are not variable. */
1617 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1618 /* Fastcall attribute says callee is responsible for popping arguments
1619 if they are not variable. */
1620 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1621 /* Cdecl attribute says the callee is a normal C declaration */
1622 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1623 /* Regparm attribute specifies how many integer arguments are to be
1624 passed in registers. */
1625 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1626 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1627 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1628 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1629 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1631 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1632 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1633 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1634 SUBTARGET_ATTRIBUTE_TABLE,
1636 { NULL, 0, 0, false, false, false, NULL }
1639 /* Decide whether we can make a sibling call to a function. DECL is the
1640 declaration of the function being targeted by the call and EXP is the
1641 CALL_EXPR representing the call. */
1644 ix86_function_ok_for_sibcall (tree decl, tree exp)
1646 /* If we are generating position-independent code, we cannot sibcall
1647 optimize any indirect call, or a direct call to a global function,
1648 as the PLT requires %ebx be live. */
1649 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1652 /* If we are returning floats on the 80387 register stack, we cannot
1653 make a sibcall from a function that doesn't return a float to a
1654 function that does or, conversely, from a function that does return
1655 a float to a function that doesn't; the necessary stack adjustment
1656 would not be executed. */
1657 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1658 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1661 /* If this call is indirect, we'll need to be able to use a call-clobbered
1662 register for the address of the target function. Make sure that all
1663 such registers are not used for passing parameters. */
1664 if (!decl && !TARGET_64BIT)
1668 /* We're looking at the CALL_EXPR, we need the type of the function. */
1669 type = TREE_OPERAND (exp, 0); /* pointer expression */
1670 type = TREE_TYPE (type); /* pointer type */
1671 type = TREE_TYPE (type); /* function type */
1673 if (ix86_function_regparm (type, NULL) >= 3)
1675 /* ??? Need to count the actual number of registers to be used,
1676 not the possible number of registers. Fix later. */
1681 /* Otherwise okay. That also includes certain types of indirect calls. */
1685 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1686 arguments as in struct attribute_spec.handler. */
1688 ix86_handle_cdecl_attribute (tree *node, tree name,
1689 tree args ATTRIBUTE_UNUSED,
1690 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1692 if (TREE_CODE (*node) != FUNCTION_TYPE
1693 && TREE_CODE (*node) != METHOD_TYPE
1694 && TREE_CODE (*node) != FIELD_DECL
1695 && TREE_CODE (*node) != TYPE_DECL)
1697 warning ("%qs attribute only applies to functions",
1698 IDENTIFIER_POINTER (name));
1699 *no_add_attrs = true;
1703 if (is_attribute_p ("fastcall", name))
1705 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1707 error ("fastcall and stdcall attributes are not compatible");
1709 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1711 error ("fastcall and regparm attributes are not compatible");
1714 else if (is_attribute_p ("stdcall", name))
1716 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1718 error ("fastcall and stdcall attributes are not compatible");
1725 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1726 *no_add_attrs = true;
1732 /* Handle a "regparm" attribute;
1733 arguments as in struct attribute_spec.handler. */
1735 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1736 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1738 if (TREE_CODE (*node) != FUNCTION_TYPE
1739 && TREE_CODE (*node) != METHOD_TYPE
1740 && TREE_CODE (*node) != FIELD_DECL
1741 && TREE_CODE (*node) != TYPE_DECL)
1743 warning ("%qs attribute only applies to functions",
1744 IDENTIFIER_POINTER (name));
1745 *no_add_attrs = true;
1751 cst = TREE_VALUE (args);
1752 if (TREE_CODE (cst) != INTEGER_CST)
1754 warning ("%qs attribute requires an integer constant argument",
1755 IDENTIFIER_POINTER (name));
1756 *no_add_attrs = true;
1758 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1760 warning ("argument to %qs attribute larger than %d",
1761 IDENTIFIER_POINTER (name), REGPARM_MAX);
1762 *no_add_attrs = true;
1765 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1767 error ("fastcall and regparm attributes are not compatible");
1774 /* Return 0 if the attributes for two types are incompatible, 1 if they
1775 are compatible, and 2 if they are nearly compatible (which causes a
1776 warning to be generated). */
1779 ix86_comp_type_attributes (tree type1, tree type2)
1781 /* Check for mismatch of non-default calling convention. */
1782 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1784 if (TREE_CODE (type1) != FUNCTION_TYPE)
1787 /* Check for mismatched fastcall types */
1788 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1789 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1792 /* Check for mismatched return types (cdecl vs stdcall). */
1793 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1794 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1796 if (ix86_function_regparm (type1, NULL)
1797 != ix86_function_regparm (type2, NULL))
1802 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1803 DECL may be NULL when calling function indirectly
1804 or considering a libcall. */
1807 ix86_function_regparm (tree type, tree decl)
1810 int regparm = ix86_regparm;
1811 bool user_convention = false;
1815 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1818 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1819 user_convention = true;
1822 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1825 user_convention = true;
1828 /* Use register calling convention for local functions when possible. */
1829 if (!TARGET_64BIT && !user_convention && decl
1830 && flag_unit_at_a_time && !profile_flag)
1832 struct cgraph_local_info *i = cgraph_local_info (decl);
1835 /* We can't use regparm(3) for nested functions as these use
1836 static chain pointer in third argument. */
1837 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1847 /* Return true if EAX is live at the start of the function. Used by
1848 ix86_expand_prologue to determine if we need special help before
1849 calling allocate_stack_worker. */
1852 ix86_eax_live_at_start_p (void)
1854 /* Cheat. Don't bother working forward from ix86_function_regparm
1855 to the function type to whether an actual argument is located in
1856 eax. Instead just look at cfg info, which is still close enough
1857 to correct at this point. This gives false positives for broken
1858 functions that might use uninitialized data that happens to be
1859 allocated in eax, but who cares? */
1860 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1863 /* Value is the number of bytes of arguments automatically
1864 popped when returning from a subroutine call.
1865 FUNDECL is the declaration node of the function (as a tree),
1866 FUNTYPE is the data type of the function (as a tree),
1867 or for a library call it is an identifier node for the subroutine name.
1868 SIZE is the number of bytes of arguments passed on the stack.
1870 On the 80386, the RTD insn may be used to pop them if the number
1871 of args is fixed, but if the number is variable then the caller
1872 must pop them all. RTD can't be used for library calls now
1873 because the library is compiled with the Unix compiler.
1874 Use of RTD is a selectable option, since it is incompatible with
1875 standard Unix calling sequences. If the option is not selected,
1876 the caller must always pop the args.
1878 The attribute stdcall is equivalent to RTD on a per module basis. */
1881 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1883 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1885 /* Cdecl functions override -mrtd, and never pop the stack. */
1886 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1888 /* Stdcall and fastcall functions will pop the stack if not
1890 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1891 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1895 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1896 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1897 == void_type_node)))
1901 /* Lose any fake structure return argument if it is passed on the stack. */
1902 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1904 && !KEEP_AGGREGATE_RETURN_POINTER)
1906 int nregs = ix86_function_regparm (funtype, fundecl);
1909 return GET_MODE_SIZE (Pmode);
1915 /* Argument support functions. */
1917 /* Return true when register may be used to pass function parameters. */
1919 ix86_function_arg_regno_p (int regno)
1923 return (regno < REGPARM_MAX
1924 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1925 if (SSE_REGNO_P (regno) && TARGET_SSE)
1927 /* RAX is used as hidden argument to va_arg functions. */
1930 for (i = 0; i < REGPARM_MAX; i++)
1931 if (regno == x86_64_int_parameter_registers[i])
1936 /* Return if we do not know how to pass TYPE solely in registers. */
1939 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1941 if (must_pass_in_stack_var_size_or_pad (mode, type))
1944 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1945 The layout_type routine is crafty and tries to trick us into passing
1946 currently unsupported vector types on the stack by using TImode. */
1947 return (!TARGET_64BIT && mode == TImode
1948 && type && TREE_CODE (type) != VECTOR_TYPE);
1951 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1952 for a call to a function whose data type is FNTYPE.
1953 For a library call, FNTYPE is 0. */
1956 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1957 tree fntype, /* tree ptr for function decl */
1958 rtx libname, /* SYMBOL_REF of library name or 0 */
1961 static CUMULATIVE_ARGS zero_cum;
1962 tree param, next_param;
1964 if (TARGET_DEBUG_ARG)
1966 fprintf (stderr, "\ninit_cumulative_args (");
1968 fprintf (stderr, "fntype code = %s, ret code = %s",
1969 tree_code_name[(int) TREE_CODE (fntype)],
1970 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1972 fprintf (stderr, "no fntype");
1975 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1980 /* Set up the number of registers to use for passing arguments. */
1982 cum->nregs = ix86_function_regparm (fntype, fndecl);
1984 cum->nregs = ix86_regparm;
1986 cum->sse_nregs = SSE_REGPARM_MAX;
1988 cum->mmx_nregs = MMX_REGPARM_MAX;
1989 cum->warn_sse = true;
1990 cum->warn_mmx = true;
1991 cum->maybe_vaarg = false;
1993 /* Use ecx and edx registers if function has fastcall attribute */
1994 if (fntype && !TARGET_64BIT)
1996 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2003 /* Determine if this function has variable arguments. This is
2004 indicated by the last argument being 'void_type_mode' if there
2005 are no variable arguments. If there are variable arguments, then
2006 we won't pass anything in registers in 32-bit mode. */
2008 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2010 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2011 param != 0; param = next_param)
2013 next_param = TREE_CHAIN (param);
2014 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2025 cum->maybe_vaarg = true;
2029 if ((!fntype && !libname)
2030 || (fntype && !TYPE_ARG_TYPES (fntype)))
2031 cum->maybe_vaarg = 1;
2033 if (TARGET_DEBUG_ARG)
2034 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2039 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2040 But in the case of vector types, it is some vector mode.
2042 When we have only some of our vector isa extensions enabled, then there
2043 are some modes for which vector_mode_supported_p is false. For these
2044 modes, the generic vector support in gcc will choose some non-vector mode
2045 in order to implement the type. By computing the natural mode, we'll
2046 select the proper ABI location for the operand and not depend on whatever
2047 the middle-end decides to do with these vector types. */
2049 static enum machine_mode
2050 type_natural_mode (tree type)
2052 enum machine_mode mode = TYPE_MODE (type);
2054 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2056 HOST_WIDE_INT size = int_size_in_bytes (type);
2057 if ((size == 8 || size == 16)
2058 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2059 && TYPE_VECTOR_SUBPARTS (type) > 1)
2061 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2063 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2064 mode = MIN_MODE_VECTOR_FLOAT;
2066 mode = MIN_MODE_VECTOR_INT;
2068 /* Get the mode which has this inner mode and number of units. */
2069 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2070 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2071 && GET_MODE_INNER (mode) == innermode)
2081 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2082 this may not agree with the mode that the type system has chosen for the
2083 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2084 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2087 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2092 if (orig_mode != BLKmode)
2093 tmp = gen_rtx_REG (orig_mode, regno);
2096 tmp = gen_rtx_REG (mode, regno);
2097 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2098 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2104 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2105 of this code is to classify each 8bytes of incoming argument by the register
2106 class and assign registers accordingly. */
2108 /* Return the union class of CLASS1 and CLASS2.
2109 See the x86-64 PS ABI for details. */
2111 static enum x86_64_reg_class
2112 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2114 /* Rule #1: If both classes are equal, this is the resulting class. */
2115 if (class1 == class2)
2118 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2120 if (class1 == X86_64_NO_CLASS)
2122 if (class2 == X86_64_NO_CLASS)
2125 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2126 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2127 return X86_64_MEMORY_CLASS;
2129 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2130 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2131 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2132 return X86_64_INTEGERSI_CLASS;
2133 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2134 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2135 return X86_64_INTEGER_CLASS;
2137 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2139 if (class1 == X86_64_X87_CLASS
2140 || class1 == X86_64_X87UP_CLASS
2141 || class1 == X86_64_COMPLEX_X87_CLASS
2142 || class2 == X86_64_X87_CLASS
2143 || class2 == X86_64_X87UP_CLASS
2144 || class2 == X86_64_COMPLEX_X87_CLASS)
2145 return X86_64_MEMORY_CLASS;
2147 /* Rule #6: Otherwise class SSE is used. */
2148 return X86_64_SSE_CLASS;
2151 /* Classify the argument of type TYPE and mode MODE.
2152 CLASSES will be filled by the register class used to pass each word
2153 of the operand. The number of words is returned. In case the parameter
2154 should be passed in memory, 0 is returned. As a special case for zero
2155 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2157 BIT_OFFSET is used internally for handling records and specifies offset
2158 of the offset in bits modulo 256 to avoid overflow cases.
2160 See the x86-64 PS ABI for details.
2164 classify_argument (enum machine_mode mode, tree type,
2165 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2167 HOST_WIDE_INT bytes =
2168 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2169 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2171 /* Variable sized entities are always passed/returned in memory. */
2175 if (mode != VOIDmode
2176 && targetm.calls.must_pass_in_stack (mode, type))
2179 if (type && AGGREGATE_TYPE_P (type))
2183 enum x86_64_reg_class subclasses[MAX_CLASSES];
2185 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2189 for (i = 0; i < words; i++)
2190 classes[i] = X86_64_NO_CLASS;
2192 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2193 signalize memory class, so handle it as special case. */
2196 classes[0] = X86_64_NO_CLASS;
2200 /* Classify each field of record and merge classes. */
2201 if (TREE_CODE (type) == RECORD_TYPE)
2203 /* For classes first merge in the field of the subclasses. */
2204 if (TYPE_BINFO (type))
2206 tree binfo, base_binfo;
2209 for (binfo = TYPE_BINFO (type), basenum = 0;
2210 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2213 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2214 tree type = BINFO_TYPE (base_binfo);
2216 num = classify_argument (TYPE_MODE (type),
2218 (offset + bit_offset) % 256);
2221 for (i = 0; i < num; i++)
2223 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2225 merge_classes (subclasses[i], classes[i + pos]);
2229 /* And now merge the fields of structure. */
2230 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2232 if (TREE_CODE (field) == FIELD_DECL)
2236 /* Bitfields are always classified as integer. Handle them
2237 early, since later code would consider them to be
2238 misaligned integers. */
2239 if (DECL_BIT_FIELD (field))
2241 for (i = int_bit_position (field) / 8 / 8;
2242 i < (int_bit_position (field)
2243 + tree_low_cst (DECL_SIZE (field), 0)
2246 merge_classes (X86_64_INTEGER_CLASS,
2251 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2252 TREE_TYPE (field), subclasses,
2253 (int_bit_position (field)
2254 + bit_offset) % 256);
2257 for (i = 0; i < num; i++)
2260 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2262 merge_classes (subclasses[i], classes[i + pos]);
2268 /* Arrays are handled as small records. */
2269 else if (TREE_CODE (type) == ARRAY_TYPE)
2272 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2273 TREE_TYPE (type), subclasses, bit_offset);
2277 /* The partial classes are now full classes. */
2278 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2279 subclasses[0] = X86_64_SSE_CLASS;
2280 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2281 subclasses[0] = X86_64_INTEGER_CLASS;
2283 for (i = 0; i < words; i++)
2284 classes[i] = subclasses[i % num];
2286 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2287 else if (TREE_CODE (type) == UNION_TYPE
2288 || TREE_CODE (type) == QUAL_UNION_TYPE)
2290 /* For classes first merge in the field of the subclasses. */
2291 if (TYPE_BINFO (type))
2293 tree binfo, base_binfo;
2296 for (binfo = TYPE_BINFO (type), basenum = 0;
2297 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2300 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2301 tree type = BINFO_TYPE (base_binfo);
2303 num = classify_argument (TYPE_MODE (type),
2305 (offset + (bit_offset % 64)) % 256);
2308 for (i = 0; i < num; i++)
2310 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2312 merge_classes (subclasses[i], classes[i + pos]);
2316 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2318 if (TREE_CODE (field) == FIELD_DECL)
2321 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2322 TREE_TYPE (field), subclasses,
2326 for (i = 0; i < num; i++)
2327 classes[i] = merge_classes (subclasses[i], classes[i]);
2334 /* Final merger cleanup. */
2335 for (i = 0; i < words; i++)
2337 /* If one class is MEMORY, everything should be passed in
2339 if (classes[i] == X86_64_MEMORY_CLASS)
2342 /* The X86_64_SSEUP_CLASS should be always preceded by
2343 X86_64_SSE_CLASS. */
2344 if (classes[i] == X86_64_SSEUP_CLASS
2345 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2346 classes[i] = X86_64_SSE_CLASS;
2348 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2349 if (classes[i] == X86_64_X87UP_CLASS
2350 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2351 classes[i] = X86_64_SSE_CLASS;
2356 /* Compute alignment needed. We align all types to natural boundaries with
2357 exception of XFmode that is aligned to 64bits. */
2358 if (mode != VOIDmode && mode != BLKmode)
2360 int mode_alignment = GET_MODE_BITSIZE (mode);
2363 mode_alignment = 128;
2364 else if (mode == XCmode)
2365 mode_alignment = 256;
2366 if (COMPLEX_MODE_P (mode))
2367 mode_alignment /= 2;
2368 /* Misaligned fields are always returned in memory. */
2369 if (bit_offset % mode_alignment)
2373 /* for V1xx modes, just use the base mode */
2374 if (VECTOR_MODE_P (mode)
2375 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2376 mode = GET_MODE_INNER (mode);
2378 /* Classification of atomic types. */
2388 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2389 classes[0] = X86_64_INTEGERSI_CLASS;
2391 classes[0] = X86_64_INTEGER_CLASS;
2395 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2400 if (!(bit_offset % 64))
2401 classes[0] = X86_64_SSESF_CLASS;
2403 classes[0] = X86_64_SSE_CLASS;
2406 classes[0] = X86_64_SSEDF_CLASS;
2409 classes[0] = X86_64_X87_CLASS;
2410 classes[1] = X86_64_X87UP_CLASS;
2413 classes[0] = X86_64_SSE_CLASS;
2414 classes[1] = X86_64_SSEUP_CLASS;
2417 classes[0] = X86_64_SSE_CLASS;
2420 classes[0] = X86_64_SSEDF_CLASS;
2421 classes[1] = X86_64_SSEDF_CLASS;
2424 classes[0] = X86_64_COMPLEX_X87_CLASS;
2427 /* This modes is larger than 16 bytes. */
2435 classes[0] = X86_64_SSE_CLASS;
2436 classes[1] = X86_64_SSEUP_CLASS;
2442 classes[0] = X86_64_SSE_CLASS;
2448 if (VECTOR_MODE_P (mode))
2452 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2454 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2455 classes[0] = X86_64_INTEGERSI_CLASS;
2457 classes[0] = X86_64_INTEGER_CLASS;
2458 classes[1] = X86_64_INTEGER_CLASS;
2459 return 1 + (bytes > 8);
2466 /* Examine the argument and return set number of register required in each
2467 class. Return 0 iff parameter should be passed in memory. */
2469 examine_argument (enum machine_mode mode, tree type, int in_return,
2470 int *int_nregs, int *sse_nregs)
2472 enum x86_64_reg_class class[MAX_CLASSES];
2473 int n = classify_argument (mode, type, class, 0);
2479 for (n--; n >= 0; n--)
2482 case X86_64_INTEGER_CLASS:
2483 case X86_64_INTEGERSI_CLASS:
2486 case X86_64_SSE_CLASS:
2487 case X86_64_SSESF_CLASS:
2488 case X86_64_SSEDF_CLASS:
2491 case X86_64_NO_CLASS:
2492 case X86_64_SSEUP_CLASS:
2494 case X86_64_X87_CLASS:
2495 case X86_64_X87UP_CLASS:
2499 case X86_64_COMPLEX_X87_CLASS:
2500 return in_return ? 2 : 0;
2501 case X86_64_MEMORY_CLASS:
2507 /* Construct container for the argument used by GCC interface. See
2508 FUNCTION_ARG for the detailed description. */
2511 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2512 tree type, int in_return, int nintregs, int nsseregs,
2513 const int *intreg, int sse_regno)
2515 enum machine_mode tmpmode;
2517 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2518 enum x86_64_reg_class class[MAX_CLASSES];
2522 int needed_sseregs, needed_intregs;
2523 rtx exp[MAX_CLASSES];
2526 n = classify_argument (mode, type, class, 0);
2527 if (TARGET_DEBUG_ARG)
2530 fprintf (stderr, "Memory class\n");
2533 fprintf (stderr, "Classes:");
2534 for (i = 0; i < n; i++)
2536 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2538 fprintf (stderr, "\n");
2543 if (!examine_argument (mode, type, in_return, &needed_intregs,
2546 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2549 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2550 some less clueful developer tries to use floating-point anyway. */
2551 if (needed_sseregs && !TARGET_SSE)
2553 static bool issued_error;
2556 issued_error = true;
2558 error ("SSE register return with SSE disabled");
2560 error ("SSE register argument with SSE disabled");
2565 /* First construct simple cases. Avoid SCmode, since we want to use
2566 single register to pass this type. */
2567 if (n == 1 && mode != SCmode)
2570 case X86_64_INTEGER_CLASS:
2571 case X86_64_INTEGERSI_CLASS:
2572 return gen_rtx_REG (mode, intreg[0]);
2573 case X86_64_SSE_CLASS:
2574 case X86_64_SSESF_CLASS:
2575 case X86_64_SSEDF_CLASS:
2576 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2577 case X86_64_X87_CLASS:
2578 case X86_64_COMPLEX_X87_CLASS:
2579 return gen_rtx_REG (mode, FIRST_STACK_REG);
2580 case X86_64_NO_CLASS:
2581 /* Zero sized array, struct or class. */
2586 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2588 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2590 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2591 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2592 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2593 && class[1] == X86_64_INTEGER_CLASS
2594 && (mode == CDImode || mode == TImode || mode == TFmode)
2595 && intreg[0] + 1 == intreg[1])
2596 return gen_rtx_REG (mode, intreg[0]);
2598 /* Otherwise figure out the entries of the PARALLEL. */
2599 for (i = 0; i < n; i++)
2603 case X86_64_NO_CLASS:
2605 case X86_64_INTEGER_CLASS:
2606 case X86_64_INTEGERSI_CLASS:
2607 /* Merge TImodes on aligned occasions here too. */
2608 if (i * 8 + 8 > bytes)
2609 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2610 else if (class[i] == X86_64_INTEGERSI_CLASS)
2614 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2615 if (tmpmode == BLKmode)
2617 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2618 gen_rtx_REG (tmpmode, *intreg),
2622 case X86_64_SSESF_CLASS:
2623 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2624 gen_rtx_REG (SFmode,
2625 SSE_REGNO (sse_regno)),
2629 case X86_64_SSEDF_CLASS:
2630 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2631 gen_rtx_REG (DFmode,
2632 SSE_REGNO (sse_regno)),
2636 case X86_64_SSE_CLASS:
2637 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2641 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2642 gen_rtx_REG (tmpmode,
2643 SSE_REGNO (sse_regno)),
2645 if (tmpmode == TImode)
2653 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2654 for (i = 0; i < nexps; i++)
2655 XVECEXP (ret, 0, i) = exp [i];
2659 /* Update the data in CUM to advance over an argument
2660 of mode MODE and data type TYPE.
2661 (TYPE is null for libcalls where that information may not be available.) */
2664 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2665 tree type, int named)
2668 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2669 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2672 mode = type_natural_mode (type);
2674 if (TARGET_DEBUG_ARG)
2675 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2676 "mode=%s, named=%d)\n\n",
2677 words, cum->words, cum->nregs, cum->sse_nregs,
2678 GET_MODE_NAME (mode), named);
2682 int int_nregs, sse_nregs;
2683 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2684 cum->words += words;
2685 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2687 cum->nregs -= int_nregs;
2688 cum->sse_nregs -= sse_nregs;
2689 cum->regno += int_nregs;
2690 cum->sse_regno += sse_nregs;
2693 cum->words += words;
2711 cum->words += words;
2712 cum->nregs -= words;
2713 cum->regno += words;
2715 if (cum->nregs <= 0)
2729 if (!type || !AGGREGATE_TYPE_P (type))
2731 cum->sse_words += words;
2732 cum->sse_nregs -= 1;
2733 cum->sse_regno += 1;
2734 if (cum->sse_nregs <= 0)
2746 if (!type || !AGGREGATE_TYPE_P (type))
2748 cum->mmx_words += words;
2749 cum->mmx_nregs -= 1;
2750 cum->mmx_regno += 1;
2751 if (cum->mmx_nregs <= 0)
2762 /* Define where to put the arguments to a function.
2763 Value is zero to push the argument on the stack,
2764 or a hard register in which to store the argument.
2766 MODE is the argument's machine mode.
2767 TYPE is the data type of the argument (as a tree).
2768 This is null for libcalls where that information may
2770 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2771 the preceding args and about the function being called.
2772 NAMED is nonzero if this argument is a named parameter
2773 (otherwise it is an extra parameter matching an ellipsis). */
2776 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2777 tree type, int named)
2779 enum machine_mode mode = orig_mode;
2782 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2783 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2784 static bool warnedsse, warnedmmx;
2786 /* To simplify the code below, represent vector types with a vector mode
2787 even if MMX/SSE are not active. */
2788 if (type && TREE_CODE (type) == VECTOR_TYPE)
2789 mode = type_natural_mode (type);
2791 /* Handle a hidden AL argument containing number of registers for varargs
2792 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2794 if (mode == VOIDmode)
2797 return GEN_INT (cum->maybe_vaarg
2798 ? (cum->sse_nregs < 0
2806 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2808 &x86_64_int_parameter_registers [cum->regno],
2813 /* For now, pass fp/complex values on the stack. */
2825 if (words <= cum->nregs)
2827 int regno = cum->regno;
2829 /* Fastcall allocates the first two DWORD (SImode) or
2830 smaller arguments to ECX and EDX. */
2833 if (mode == BLKmode || mode == DImode)
2836 /* ECX not EAX is the first allocated register. */
2840 ret = gen_rtx_REG (mode, regno);
2850 if (!type || !AGGREGATE_TYPE_P (type))
2852 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2855 warning ("SSE vector argument without SSE enabled "
2859 ret = gen_reg_or_parallel (mode, orig_mode,
2860 cum->sse_regno + FIRST_SSE_REG);
2867 if (!type || !AGGREGATE_TYPE_P (type))
2869 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2872 warning ("MMX vector argument without MMX enabled "
2876 ret = gen_reg_or_parallel (mode, orig_mode,
2877 cum->mmx_regno + FIRST_MMX_REG);
2882 if (TARGET_DEBUG_ARG)
2885 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2886 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2889 print_simple_rtl (stderr, ret);
2891 fprintf (stderr, ", stack");
2893 fprintf (stderr, " )\n");
2899 /* A C expression that indicates when an argument must be passed by
2900 reference. If nonzero for an argument, a copy of that argument is
2901 made in memory and a pointer to the argument is passed instead of
2902 the argument itself. The pointer is passed in whatever way is
2903 appropriate for passing a pointer to that type. */
2906 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2907 enum machine_mode mode ATTRIBUTE_UNUSED,
2908 tree type, bool named ATTRIBUTE_UNUSED)
2913 if (type && int_size_in_bytes (type) == -1)
2915 if (TARGET_DEBUG_ARG)
2916 fprintf (stderr, "function_arg_pass_by_reference\n");
2923 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2924 ABI. Only called if TARGET_SSE. */
2926 contains_128bit_aligned_vector_p (tree type)
2928 enum machine_mode mode = TYPE_MODE (type);
2929 if (SSE_REG_MODE_P (mode)
2930 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2932 if (TYPE_ALIGN (type) < 128)
2935 if (AGGREGATE_TYPE_P (type))
2937 /* Walk the aggregates recursively. */
2938 if (TREE_CODE (type) == RECORD_TYPE
2939 || TREE_CODE (type) == UNION_TYPE
2940 || TREE_CODE (type) == QUAL_UNION_TYPE)
2944 if (TYPE_BINFO (type))
2946 tree binfo, base_binfo;
2949 for (binfo = TYPE_BINFO (type), i = 0;
2950 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2951 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2954 /* And now merge the fields of structure. */
2955 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2957 if (TREE_CODE (field) == FIELD_DECL
2958 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2962 /* Just for use if some languages passes arrays by value. */
2963 else if (TREE_CODE (type) == ARRAY_TYPE)
2965 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2974 /* Gives the alignment boundary, in bits, of an argument with the
2975 specified mode and type. */
2978 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2982 align = TYPE_ALIGN (type);
2984 align = GET_MODE_ALIGNMENT (mode);
2985 if (align < PARM_BOUNDARY)
2986 align = PARM_BOUNDARY;
2989 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2990 make an exception for SSE modes since these require 128bit
2993 The handling here differs from field_alignment. ICC aligns MMX
2994 arguments to 4 byte boundaries, while structure fields are aligned
2995 to 8 byte boundaries. */
2997 align = PARM_BOUNDARY;
3000 if (!SSE_REG_MODE_P (mode))
3001 align = PARM_BOUNDARY;
3005 if (!contains_128bit_aligned_vector_p (type))
3006 align = PARM_BOUNDARY;
3014 /* Return true if N is a possible register number of function value. */
3016 ix86_function_value_regno_p (int regno)
3020 return ((regno) == 0
3021 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3022 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3024 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3025 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3026 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3029 /* Define how to find the value returned by a function.
3030 VALTYPE is the data type of the value (as a tree).
3031 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3032 otherwise, FUNC is 0. */
3034 ix86_function_value (tree valtype)
3036 enum machine_mode natmode = type_natural_mode (valtype);
3040 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3041 1, REGPARM_MAX, SSE_REGPARM_MAX,
3042 x86_64_int_return_registers, 0);
3043 /* For zero sized structures, construct_container return NULL, but we
3044 need to keep rest of compiler happy by returning meaningful value. */
3046 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3050 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
3053 /* Return false iff type is returned in memory. */
3055 ix86_return_in_memory (tree type)
3057 int needed_intregs, needed_sseregs, size;
3058 enum machine_mode mode = type_natural_mode (type);
3061 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3063 if (mode == BLKmode)
3066 size = int_size_in_bytes (type);
3068 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3071 if (VECTOR_MODE_P (mode) || mode == TImode)
3073 /* User-created vectors small enough to fit in EAX. */
3077 /* MMX/3dNow values are returned on the stack, since we've
3078 got to EMMS/FEMMS before returning. */
3082 /* SSE values are returned in XMM0, except when it doesn't exist. */
3084 return (TARGET_SSE ? 0 : 1);
3095 /* When returning SSE vector types, we have a choice of either
3096 (1) being abi incompatible with a -march switch, or
3097 (2) generating an error.
3098 Given no good solution, I think the safest thing is one warning.
3099 The user won't be able to use -Werror, but....
3101 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3102 called in response to actually generating a caller or callee that
3103 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3104 via aggregate_value_p for general type probing from tree-ssa. */
3107 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3111 if (!TARGET_SSE && type && !warned)
3113 /* Look at the return type of the function, not the function type. */
3114 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3117 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3120 warning ("SSE vector return without SSE enabled changes the ABI");
3127 /* Define how to find the value returned by a library function
3128 assuming the value has mode MODE. */
3130 ix86_libcall_value (enum machine_mode mode)
3141 return gen_rtx_REG (mode, FIRST_SSE_REG);
3144 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3148 return gen_rtx_REG (mode, 0);
3152 return gen_rtx_REG (mode, ix86_value_regno (mode));
3155 /* Given a mode, return the register to use for a return value. */
3158 ix86_value_regno (enum machine_mode mode)
3160 /* Floating point return values in %st(0). */
3161 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3162 return FIRST_FLOAT_REG;
3163 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3164 we prevent this case when sse is not available. */
3165 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3166 return FIRST_SSE_REG;
3167 /* Everything else in %eax. */
3171 /* Create the va_list data type. */
3174 ix86_build_builtin_va_list (void)
3176 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3178 /* For i386 we use plain pointer to argument area. */
3180 return build_pointer_type (char_type_node);
3182 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3183 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3185 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3186 unsigned_type_node);
3187 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3188 unsigned_type_node);
3189 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3191 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3194 DECL_FIELD_CONTEXT (f_gpr) = record;
3195 DECL_FIELD_CONTEXT (f_fpr) = record;
3196 DECL_FIELD_CONTEXT (f_ovf) = record;
3197 DECL_FIELD_CONTEXT (f_sav) = record;
3199 TREE_CHAIN (record) = type_decl;
3200 TYPE_NAME (record) = type_decl;
3201 TYPE_FIELDS (record) = f_gpr;
3202 TREE_CHAIN (f_gpr) = f_fpr;
3203 TREE_CHAIN (f_fpr) = f_ovf;
3204 TREE_CHAIN (f_ovf) = f_sav;
3206 layout_type (record);
3208 /* The correct type is an array type of one element. */
3209 return build_array_type (record, build_index_type (size_zero_node));
3212 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3215 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3216 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3219 CUMULATIVE_ARGS next_cum;
3220 rtx save_area = NULL_RTX, mem;
3233 /* Indicate to allocate space on the stack for varargs save area. */
3234 ix86_save_varrargs_registers = 1;
3236 cfun->stack_alignment_needed = 128;
3238 fntype = TREE_TYPE (current_function_decl);
3239 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3240 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3241 != void_type_node));
3243 /* For varargs, we do not want to skip the dummy va_dcl argument.
3244 For stdargs, we do want to skip the last named argument. */
3247 function_arg_advance (&next_cum, mode, type, 1);
3250 save_area = frame_pointer_rtx;
3252 set = get_varargs_alias_set ();
3254 for (i = next_cum.regno; i < ix86_regparm; i++)
3256 mem = gen_rtx_MEM (Pmode,
3257 plus_constant (save_area, i * UNITS_PER_WORD));
3258 set_mem_alias_set (mem, set);
3259 emit_move_insn (mem, gen_rtx_REG (Pmode,
3260 x86_64_int_parameter_registers[i]));
3263 if (next_cum.sse_nregs)
3265 /* Now emit code to save SSE registers. The AX parameter contains number
3266 of SSE parameter registers used to call this function. We use
3267 sse_prologue_save insn template that produces computed jump across
3268 SSE saves. We need some preparation work to get this working. */
3270 label = gen_label_rtx ();
3271 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3273 /* Compute address to jump to :
3274 label - 5*eax + nnamed_sse_arguments*5 */
3275 tmp_reg = gen_reg_rtx (Pmode);
3276 nsse_reg = gen_reg_rtx (Pmode);
3277 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3278 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3279 gen_rtx_MULT (Pmode, nsse_reg,
3281 if (next_cum.sse_regno)
3284 gen_rtx_CONST (DImode,
3285 gen_rtx_PLUS (DImode,
3287 GEN_INT (next_cum.sse_regno * 4))));
3289 emit_move_insn (nsse_reg, label_ref);
3290 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3292 /* Compute address of memory block we save into. We always use pointer
3293 pointing 127 bytes after first byte to store - this is needed to keep
3294 instruction size limited by 4 bytes. */
3295 tmp_reg = gen_reg_rtx (Pmode);
3296 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3297 plus_constant (save_area,
3298 8 * REGPARM_MAX + 127)));
3299 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3300 set_mem_alias_set (mem, set);
3301 set_mem_align (mem, BITS_PER_WORD);
3303 /* And finally do the dirty job! */
3304 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3305 GEN_INT (next_cum.sse_regno), label));
3310 /* Implement va_start. */
3313 ix86_va_start (tree valist, rtx nextarg)
3315 HOST_WIDE_INT words, n_gpr, n_fpr;
3316 tree f_gpr, f_fpr, f_ovf, f_sav;
3317 tree gpr, fpr, ovf, sav, t;
3319 /* Only 64bit target needs something special. */
3322 std_expand_builtin_va_start (valist, nextarg);
3326 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3327 f_fpr = TREE_CHAIN (f_gpr);
3328 f_ovf = TREE_CHAIN (f_fpr);
3329 f_sav = TREE_CHAIN (f_ovf);
3331 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3332 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3333 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3334 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3335 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3337 /* Count number of gp and fp argument registers used. */
3338 words = current_function_args_info.words;
3339 n_gpr = current_function_args_info.regno;
3340 n_fpr = current_function_args_info.sse_regno;
3342 if (TARGET_DEBUG_ARG)
3343 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3344 (int) words, (int) n_gpr, (int) n_fpr);
3346 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3347 build_int_cst (NULL_TREE, n_gpr * 8));
3348 TREE_SIDE_EFFECTS (t) = 1;
3349 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3351 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3352 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3353 TREE_SIDE_EFFECTS (t) = 1;
3354 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3356 /* Find the overflow area. */
3357 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3359 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3360 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3361 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3362 TREE_SIDE_EFFECTS (t) = 1;
3363 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3365 /* Find the register save area.
3366 Prologue of the function save it right above stack frame. */
3367 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3368 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3369 TREE_SIDE_EFFECTS (t) = 1;
3370 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3373 /* Implement va_arg. */
3376 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3378 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3379 tree f_gpr, f_fpr, f_ovf, f_sav;
3380 tree gpr, fpr, ovf, sav, t;
3382 tree lab_false, lab_over = NULL_TREE;
3387 enum machine_mode nat_mode;
3389 /* Only 64bit target needs something special. */
3391 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3393 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3394 f_fpr = TREE_CHAIN (f_gpr);
3395 f_ovf = TREE_CHAIN (f_fpr);
3396 f_sav = TREE_CHAIN (f_ovf);
3398 valist = build_va_arg_indirect_ref (valist);
3399 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3400 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3401 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3402 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3404 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3406 type = build_pointer_type (type);
3407 size = int_size_in_bytes (type);
3408 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3410 nat_mode = type_natural_mode (type);
3411 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3412 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3414 /* Pull the value out of the saved registers. */
3416 addr = create_tmp_var (ptr_type_node, "addr");
3417 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3421 int needed_intregs, needed_sseregs;
3423 tree int_addr, sse_addr;
3425 lab_false = create_artificial_label ();
3426 lab_over = create_artificial_label ();
3428 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3430 need_temp = (!REG_P (container)
3431 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3432 || TYPE_ALIGN (type) > 128));
3434 /* In case we are passing structure, verify that it is consecutive block
3435 on the register save area. If not we need to do moves. */
3436 if (!need_temp && !REG_P (container))
3438 /* Verify that all registers are strictly consecutive */
3439 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3443 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3445 rtx slot = XVECEXP (container, 0, i);
3446 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3447 || INTVAL (XEXP (slot, 1)) != i * 16)
3455 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3457 rtx slot = XVECEXP (container, 0, i);
3458 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3459 || INTVAL (XEXP (slot, 1)) != i * 8)
3471 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3472 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3473 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3474 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3477 /* First ensure that we fit completely in registers. */
3480 t = build_int_cst (TREE_TYPE (gpr),
3481 (REGPARM_MAX - needed_intregs + 1) * 8);
3482 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3483 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3484 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3485 gimplify_and_add (t, pre_p);
3489 t = build_int_cst (TREE_TYPE (fpr),
3490 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3492 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3493 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3494 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3495 gimplify_and_add (t, pre_p);
3498 /* Compute index to start of area used for integer regs. */
3501 /* int_addr = gpr + sav; */
3502 t = fold_convert (ptr_type_node, gpr);
3503 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3504 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3505 gimplify_and_add (t, pre_p);
3509 /* sse_addr = fpr + sav; */
3510 t = fold_convert (ptr_type_node, fpr);
3511 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3512 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3513 gimplify_and_add (t, pre_p);
3518 tree temp = create_tmp_var (type, "va_arg_tmp");
3521 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3522 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3523 gimplify_and_add (t, pre_p);
3525 for (i = 0; i < XVECLEN (container, 0); i++)
3527 rtx slot = XVECEXP (container, 0, i);
3528 rtx reg = XEXP (slot, 0);
3529 enum machine_mode mode = GET_MODE (reg);
3530 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3531 tree addr_type = build_pointer_type (piece_type);
3534 tree dest_addr, dest;
3536 if (SSE_REGNO_P (REGNO (reg)))
3538 src_addr = sse_addr;
3539 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3543 src_addr = int_addr;
3544 src_offset = REGNO (reg) * 8;
3546 src_addr = fold_convert (addr_type, src_addr);
3547 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3548 size_int (src_offset)));
3549 src = build_va_arg_indirect_ref (src_addr);
3551 dest_addr = fold_convert (addr_type, addr);
3552 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3553 size_int (INTVAL (XEXP (slot, 1)))));
3554 dest = build_va_arg_indirect_ref (dest_addr);
3556 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3557 gimplify_and_add (t, pre_p);
3563 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3564 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3565 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3566 gimplify_and_add (t, pre_p);
3570 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3571 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3572 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3573 gimplify_and_add (t, pre_p);
3576 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3577 gimplify_and_add (t, pre_p);
3579 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3580 append_to_statement_list (t, pre_p);
3583 /* ... otherwise out of the overflow area. */
3585 /* Care for on-stack alignment if needed. */
3586 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3590 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3591 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3592 build_int_cst (TREE_TYPE (ovf), align - 1));
3593 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3594 build_int_cst (TREE_TYPE (t), -align));
3596 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3598 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3599 gimplify_and_add (t2, pre_p);
3601 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3602 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3603 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3604 gimplify_and_add (t, pre_p);
3608 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3609 append_to_statement_list (t, pre_p);
3612 ptrtype = build_pointer_type (type);
3613 addr = fold_convert (ptrtype, addr);
3616 addr = build_va_arg_indirect_ref (addr);
3617 return build_va_arg_indirect_ref (addr);
3620 /* Return nonzero if OPNUM's MEM should be matched
3621 in movabs* patterns. */
3624 ix86_check_movabs (rtx insn, int opnum)
3628 set = PATTERN (insn);
3629 if (GET_CODE (set) == PARALLEL)
3630 set = XVECEXP (set, 0, 0);
3631 if (GET_CODE (set) != SET)
3633 mem = XEXP (set, opnum);
3634 while (GET_CODE (mem) == SUBREG)
3635 mem = SUBREG_REG (mem);
3636 if (GET_CODE (mem) != MEM)
3638 return (volatile_ok || !MEM_VOLATILE_P (mem));
3641 /* Initialize the table of extra 80387 mathematical constants. */
3644 init_ext_80387_constants (void)
3646 static const char * cst[5] =
3648 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3649 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3650 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3651 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3652 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3656 for (i = 0; i < 5; i++)
3658 real_from_string (&ext_80387_constants_table[i], cst[i]);
3659 /* Ensure each constant is rounded to XFmode precision. */
3660 real_convert (&ext_80387_constants_table[i],
3661 XFmode, &ext_80387_constants_table[i]);
3664 ext_80387_constants_init = 1;
3667 /* Return true if the constant is something that can be loaded with
3668 a special instruction. */
3671 standard_80387_constant_p (rtx x)
3673 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3676 if (x == CONST0_RTX (GET_MODE (x)))
3678 if (x == CONST1_RTX (GET_MODE (x)))
3681 /* For XFmode constants, try to find a special 80387 instruction when
3682 optimizing for size or on those CPUs that benefit from them. */
3683 if (GET_MODE (x) == XFmode
3684 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3689 if (! ext_80387_constants_init)
3690 init_ext_80387_constants ();
3692 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3693 for (i = 0; i < 5; i++)
3694 if (real_identical (&r, &ext_80387_constants_table[i]))
3701 /* Return the opcode of the special instruction to be used to load
3705 standard_80387_constant_opcode (rtx x)
3707 switch (standard_80387_constant_p (x))
3727 /* Return the CONST_DOUBLE representing the 80387 constant that is
3728 loaded by the specified special instruction. The argument IDX
3729 matches the return value from standard_80387_constant_p. */
3732 standard_80387_constant_rtx (int idx)
3736 if (! ext_80387_constants_init)
3737 init_ext_80387_constants ();
3753 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3757 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3760 standard_sse_constant_p (rtx x)
3762 if (x == const0_rtx)
3764 return (x == CONST0_RTX (GET_MODE (x)));
3767 /* Returns 1 if OP contains a symbol reference */
3770 symbolic_reference_mentioned_p (rtx op)
3775 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3778 fmt = GET_RTX_FORMAT (GET_CODE (op));
3779 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3785 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3786 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3790 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3797 /* Return 1 if it is appropriate to emit `ret' instructions in the
3798 body of a function. Do this only if the epilogue is simple, needing a
3799 couple of insns. Prior to reloading, we can't tell how many registers
3800 must be saved, so return 0 then. Return 0 if there is no frame
3801 marker to de-allocate. */
3804 ix86_can_use_return_insn_p (void)
3806 struct ix86_frame frame;
3808 if (! reload_completed || frame_pointer_needed)
3811 /* Don't allow more than 32 pop, since that's all we can do
3812 with one instruction. */
3813 if (current_function_pops_args
3814 && current_function_args_size >= 32768)
3817 ix86_compute_frame_layout (&frame);
3818 return frame.to_allocate == 0 && frame.nregs == 0;
3821 /* Value should be nonzero if functions must have frame pointers.
3822 Zero means the frame pointer need not be set up (and parms may
3823 be accessed via the stack pointer) in functions that seem suitable. */
3826 ix86_frame_pointer_required (void)
3828 /* If we accessed previous frames, then the generated code expects
3829 to be able to access the saved ebp value in our frame. */
3830 if (cfun->machine->accesses_prev_frame)
3833 /* Several x86 os'es need a frame pointer for other reasons,
3834 usually pertaining to setjmp. */
3835 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3838 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3839 the frame pointer by default. Turn it back on now if we've not
3840 got a leaf function. */
3841 if (TARGET_OMIT_LEAF_FRAME_POINTER
3842 && (!current_function_is_leaf))
3845 if (current_function_profile)
3851 /* Record that the current function accesses previous call frames. */
3854 ix86_setup_frame_addresses (void)
3856 cfun->machine->accesses_prev_frame = 1;
3859 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3860 # define USE_HIDDEN_LINKONCE 1
3862 # define USE_HIDDEN_LINKONCE 0
3865 static int pic_labels_used;
3867 /* Fills in the label name that should be used for a pc thunk for
3868 the given register. */
3871 get_pc_thunk_name (char name[32], unsigned int regno)
3873 if (USE_HIDDEN_LINKONCE)
3874 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3876 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3880 /* This function generates code for -fpic that loads %ebx with
3881 the return address of the caller and then returns. */
3884 ix86_file_end (void)
3889 for (regno = 0; regno < 8; ++regno)
3893 if (! ((pic_labels_used >> regno) & 1))
3896 get_pc_thunk_name (name, regno);
3898 if (USE_HIDDEN_LINKONCE)
3902 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3904 TREE_PUBLIC (decl) = 1;
3905 TREE_STATIC (decl) = 1;
3906 DECL_ONE_ONLY (decl) = 1;
3908 (*targetm.asm_out.unique_section) (decl, 0);
3909 named_section (decl, NULL, 0);
3911 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3912 fputs ("\t.hidden\t", asm_out_file);
3913 assemble_name (asm_out_file, name);
3914 fputc ('\n', asm_out_file);
3915 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3920 ASM_OUTPUT_LABEL (asm_out_file, name);
3923 xops[0] = gen_rtx_REG (SImode, regno);
3924 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3925 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3926 output_asm_insn ("ret", xops);
3929 if (NEED_INDICATE_EXEC_STACK)
3930 file_end_indicate_exec_stack ();
3933 /* Emit code for the SET_GOT patterns. */
3936 output_set_got (rtx dest)
3941 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3943 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3945 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3948 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3950 output_asm_insn ("call\t%a2", xops);
3953 /* Output the "canonical" label name ("Lxx$pb") here too. This
3954 is what will be referred to by the Mach-O PIC subsystem. */
3955 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3957 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3958 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3961 output_asm_insn ("pop{l}\t%0", xops);
3966 get_pc_thunk_name (name, REGNO (dest));
3967 pic_labels_used |= 1 << REGNO (dest);
3969 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3970 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3971 output_asm_insn ("call\t%X2", xops);
3974 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3975 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3976 else if (!TARGET_MACHO)
3977 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3982 /* Generate an "push" pattern for input ARG. */
3987 return gen_rtx_SET (VOIDmode,
3989 gen_rtx_PRE_DEC (Pmode,
3990 stack_pointer_rtx)),
3994 /* Return >= 0 if there is an unused call-clobbered register available
3995 for the entire function. */
3998 ix86_select_alt_pic_regnum (void)
4000 if (current_function_is_leaf && !current_function_profile)
4003 for (i = 2; i >= 0; --i)
4004 if (!regs_ever_live[i])
4008 return INVALID_REGNUM;
4011 /* Return 1 if we need to save REGNO. */
4013 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4015 if (pic_offset_table_rtx
4016 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4017 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4018 || current_function_profile
4019 || current_function_calls_eh_return
4020 || current_function_uses_const_pool))
4022 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4027 if (current_function_calls_eh_return && maybe_eh_return)
4032 unsigned test = EH_RETURN_DATA_REGNO (i);
4033 if (test == INVALID_REGNUM)
4040 return (regs_ever_live[regno]
4041 && !call_used_regs[regno]
4042 && !fixed_regs[regno]
4043 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4046 /* Return number of registers to be saved on the stack. */
4049 ix86_nsaved_regs (void)
4054 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4055 if (ix86_save_reg (regno, true))
4060 /* Return the offset between two registers, one to be eliminated, and the other
4061 its replacement, at the start of a routine. */
4064 ix86_initial_elimination_offset (int from, int to)
4066 struct ix86_frame frame;
4067 ix86_compute_frame_layout (&frame);
4069 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4070 return frame.hard_frame_pointer_offset;
4071 else if (from == FRAME_POINTER_REGNUM
4072 && to == HARD_FRAME_POINTER_REGNUM)
4073 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4076 if (to != STACK_POINTER_REGNUM)
4078 else if (from == ARG_POINTER_REGNUM)
4079 return frame.stack_pointer_offset;
4080 else if (from != FRAME_POINTER_REGNUM)
4083 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4087 /* Fill structure ix86_frame about frame of currently computed function. */
4090 ix86_compute_frame_layout (struct ix86_frame *frame)
4092 HOST_WIDE_INT total_size;
4093 unsigned int stack_alignment_needed;
4094 HOST_WIDE_INT offset;
4095 unsigned int preferred_alignment;
4096 HOST_WIDE_INT size = get_frame_size ();
4098 frame->nregs = ix86_nsaved_regs ();
4101 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4102 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4104 /* During reload iteration the amount of registers saved can change.
4105 Recompute the value as needed. Do not recompute when amount of registers
4106 didn't change as reload does mutiple calls to the function and does not
4107 expect the decision to change within single iteration. */
4109 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4111 int count = frame->nregs;
4113 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4114 /* The fast prologue uses move instead of push to save registers. This
4115 is significantly longer, but also executes faster as modern hardware
4116 can execute the moves in parallel, but can't do that for push/pop.
4118 Be careful about choosing what prologue to emit: When function takes
4119 many instructions to execute we may use slow version as well as in
4120 case function is known to be outside hot spot (this is known with
4121 feedback only). Weight the size of function by number of registers
4122 to save as it is cheap to use one or two push instructions but very
4123 slow to use many of them. */
4125 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4126 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4127 || (flag_branch_probabilities
4128 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4129 cfun->machine->use_fast_prologue_epilogue = false;
4131 cfun->machine->use_fast_prologue_epilogue
4132 = !expensive_function_p (count);
4134 if (TARGET_PROLOGUE_USING_MOVE
4135 && cfun->machine->use_fast_prologue_epilogue)
4136 frame->save_regs_using_mov = true;
4138 frame->save_regs_using_mov = false;
4141 /* Skip return address and saved base pointer. */
4142 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4144 frame->hard_frame_pointer_offset = offset;
4146 /* Do some sanity checking of stack_alignment_needed and
4147 preferred_alignment, since i386 port is the only using those features
4148 that may break easily. */
4150 if (size && !stack_alignment_needed)
4152 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4154 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4156 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4159 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4160 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4162 /* Register save area */
4163 offset += frame->nregs * UNITS_PER_WORD;
4166 if (ix86_save_varrargs_registers)
4168 offset += X86_64_VARARGS_SIZE;
4169 frame->va_arg_size = X86_64_VARARGS_SIZE;
4172 frame->va_arg_size = 0;
4174 /* Align start of frame for local function. */
4175 frame->padding1 = ((offset + stack_alignment_needed - 1)
4176 & -stack_alignment_needed) - offset;
4178 offset += frame->padding1;
4180 /* Frame pointer points here. */
4181 frame->frame_pointer_offset = offset;
4185 /* Add outgoing arguments area. Can be skipped if we eliminated
4186 all the function calls as dead code.
4187 Skipping is however impossible when function calls alloca. Alloca
4188 expander assumes that last current_function_outgoing_args_size
4189 of stack frame are unused. */
4190 if (ACCUMULATE_OUTGOING_ARGS
4191 && (!current_function_is_leaf || current_function_calls_alloca))
4193 offset += current_function_outgoing_args_size;
4194 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4197 frame->outgoing_arguments_size = 0;
4199 /* Align stack boundary. Only needed if we're calling another function
4201 if (!current_function_is_leaf || current_function_calls_alloca)
4202 frame->padding2 = ((offset + preferred_alignment - 1)
4203 & -preferred_alignment) - offset;
4205 frame->padding2 = 0;
4207 offset += frame->padding2;
4209 /* We've reached end of stack frame. */
4210 frame->stack_pointer_offset = offset;
4212 /* Size prologue needs to allocate. */
4213 frame->to_allocate =
4214 (size + frame->padding1 + frame->padding2
4215 + frame->outgoing_arguments_size + frame->va_arg_size);
4217 if ((!frame->to_allocate && frame->nregs <= 1)
4218 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4219 frame->save_regs_using_mov = false;
4221 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4222 && current_function_is_leaf)
4224 frame->red_zone_size = frame->to_allocate;
4225 if (frame->save_regs_using_mov)
4226 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4227 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4228 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4231 frame->red_zone_size = 0;
4232 frame->to_allocate -= frame->red_zone_size;
4233 frame->stack_pointer_offset -= frame->red_zone_size;
4235 fprintf (stderr, "nregs: %i\n", frame->nregs);
4236 fprintf (stderr, "size: %i\n", size);
4237 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4238 fprintf (stderr, "padding1: %i\n", frame->padding1);
4239 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4240 fprintf (stderr, "padding2: %i\n", frame->padding2);
4241 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4242 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4243 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4244 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4245 frame->hard_frame_pointer_offset);
4246 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4250 /* Emit code to save registers in the prologue. */
4253 ix86_emit_save_regs (void)
4258 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4259 if (ix86_save_reg (regno, true))
4261 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4262 RTX_FRAME_RELATED_P (insn) = 1;
4266 /* Emit code to save registers using MOV insns. First register
4267 is restored from POINTER + OFFSET. */
4269 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4274 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4275 if (ix86_save_reg (regno, true))
4277 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4279 gen_rtx_REG (Pmode, regno));
4280 RTX_FRAME_RELATED_P (insn) = 1;
4281 offset += UNITS_PER_WORD;
4285 /* Expand prologue or epilogue stack adjustment.
4286 The pattern exist to put a dependency on all ebp-based memory accesses.
4287 STYLE should be negative if instructions should be marked as frame related,
4288 zero if %r11 register is live and cannot be freely used and positive
4292 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4297 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4298 else if (x86_64_immediate_operand (offset, DImode))
4299 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4303 /* r11 is used by indirect sibcall return as well, set before the
4304 epilogue and used after the epilogue. ATM indirect sibcall
4305 shouldn't be used together with huge frame sizes in one
4306 function because of the frame_size check in sibcall.c. */
4309 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4310 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4312 RTX_FRAME_RELATED_P (insn) = 1;
4313 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4317 RTX_FRAME_RELATED_P (insn) = 1;
4320 /* Expand the prologue into a bunch of separate insns. */
4323 ix86_expand_prologue (void)
4327 struct ix86_frame frame;
4328 HOST_WIDE_INT allocate;
4330 ix86_compute_frame_layout (&frame);
4332 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4333 slower on all targets. Also sdb doesn't like it. */
4335 if (frame_pointer_needed)
4337 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4338 RTX_FRAME_RELATED_P (insn) = 1;
4340 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4341 RTX_FRAME_RELATED_P (insn) = 1;
4344 allocate = frame.to_allocate;
4346 if (!frame.save_regs_using_mov)
4347 ix86_emit_save_regs ();
4349 allocate += frame.nregs * UNITS_PER_WORD;
4351 /* When using red zone we may start register saving before allocating
4352 the stack frame saving one cycle of the prologue. */
4353 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4354 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4355 : stack_pointer_rtx,
4356 -frame.nregs * UNITS_PER_WORD);
4360 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4361 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4362 GEN_INT (-allocate), -1);
4365 /* Only valid for Win32. */
4366 rtx eax = gen_rtx_REG (SImode, 0);
4367 bool eax_live = ix86_eax_live_at_start_p ();
4375 emit_insn (gen_push (eax));
4379 emit_move_insn (eax, GEN_INT (allocate));
4381 insn = emit_insn (gen_allocate_stack_worker (eax));
4382 RTX_FRAME_RELATED_P (insn) = 1;
4383 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4384 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4385 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4386 t, REG_NOTES (insn));
4390 if (frame_pointer_needed)
4391 t = plus_constant (hard_frame_pointer_rtx,
4394 - frame.nregs * UNITS_PER_WORD);
4396 t = plus_constant (stack_pointer_rtx, allocate);
4397 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4401 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4403 if (!frame_pointer_needed || !frame.to_allocate)
4404 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4406 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4407 -frame.nregs * UNITS_PER_WORD);
4410 pic_reg_used = false;
4411 if (pic_offset_table_rtx
4412 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4413 || current_function_profile))
4415 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4417 if (alt_pic_reg_used != INVALID_REGNUM)
4418 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4420 pic_reg_used = true;
4425 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4427 /* Even with accurate pre-reload life analysis, we can wind up
4428 deleting all references to the pic register after reload.
4429 Consider if cross-jumping unifies two sides of a branch
4430 controlled by a comparison vs the only read from a global.
4431 In which case, allow the set_got to be deleted, though we're
4432 too late to do anything about the ebx save in the prologue. */
4433 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4436 /* Prevent function calls from be scheduled before the call to mcount.
4437 In the pic_reg_used case, make sure that the got load isn't deleted. */
4438 if (current_function_profile)
4439 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4442 /* Emit code to restore saved registers using MOV insns. First register
4443 is restored from POINTER + OFFSET. */
4445 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4446 int maybe_eh_return)
4449 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4451 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4452 if (ix86_save_reg (regno, maybe_eh_return))
4454 /* Ensure that adjust_address won't be forced to produce pointer
4455 out of range allowed by x86-64 instruction set. */
4456 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4460 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4461 emit_move_insn (r11, GEN_INT (offset));
4462 emit_insn (gen_adddi3 (r11, r11, pointer));
4463 base_address = gen_rtx_MEM (Pmode, r11);
4466 emit_move_insn (gen_rtx_REG (Pmode, regno),
4467 adjust_address (base_address, Pmode, offset));
4468 offset += UNITS_PER_WORD;
4472 /* Restore function stack, frame, and registers. */
4475 ix86_expand_epilogue (int style)
4478 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4479 struct ix86_frame frame;
4480 HOST_WIDE_INT offset;
4482 ix86_compute_frame_layout (&frame);
4484 /* Calculate start of saved registers relative to ebp. Special care
4485 must be taken for the normal return case of a function using
4486 eh_return: the eax and edx registers are marked as saved, but not
4487 restored along this path. */
4488 offset = frame.nregs;
4489 if (current_function_calls_eh_return && style != 2)
4491 offset *= -UNITS_PER_WORD;
4493 /* If we're only restoring one register and sp is not valid then
4494 using a move instruction to restore the register since it's
4495 less work than reloading sp and popping the register.
4497 The default code result in stack adjustment using add/lea instruction,
4498 while this code results in LEAVE instruction (or discrete equivalent),
4499 so it is profitable in some other cases as well. Especially when there
4500 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4501 and there is exactly one register to pop. This heuristic may need some
4502 tuning in future. */
4503 if ((!sp_valid && frame.nregs <= 1)
4504 || (TARGET_EPILOGUE_USING_MOVE
4505 && cfun->machine->use_fast_prologue_epilogue
4506 && (frame.nregs > 1 || frame.to_allocate))
4507 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4508 || (frame_pointer_needed && TARGET_USE_LEAVE
4509 && cfun->machine->use_fast_prologue_epilogue
4510 && frame.nregs == 1)
4511 || current_function_calls_eh_return)
4513 /* Restore registers. We can use ebp or esp to address the memory
4514 locations. If both are available, default to ebp, since offsets
4515 are known to be small. Only exception is esp pointing directly to the
4516 end of block of saved registers, where we may simplify addressing
4519 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4520 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4521 frame.to_allocate, style == 2);
4523 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4524 offset, style == 2);
4526 /* eh_return epilogues need %ecx added to the stack pointer. */
4529 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4531 if (frame_pointer_needed)
4533 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4534 tmp = plus_constant (tmp, UNITS_PER_WORD);
4535 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4537 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4538 emit_move_insn (hard_frame_pointer_rtx, tmp);
4540 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4545 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4546 tmp = plus_constant (tmp, (frame.to_allocate
4547 + frame.nregs * UNITS_PER_WORD));
4548 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4551 else if (!frame_pointer_needed)
4552 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4553 GEN_INT (frame.to_allocate
4554 + frame.nregs * UNITS_PER_WORD),
4556 /* If not an i386, mov & pop is faster than "leave". */
4557 else if (TARGET_USE_LEAVE || optimize_size
4558 || !cfun->machine->use_fast_prologue_epilogue)
4559 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4562 pro_epilogue_adjust_stack (stack_pointer_rtx,
4563 hard_frame_pointer_rtx,
4566 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4568 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4573 /* First step is to deallocate the stack frame so that we can
4574 pop the registers. */
4577 if (!frame_pointer_needed)
4579 pro_epilogue_adjust_stack (stack_pointer_rtx,
4580 hard_frame_pointer_rtx,
4581 GEN_INT (offset), style);
4583 else if (frame.to_allocate)
4584 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4585 GEN_INT (frame.to_allocate), style);
4587 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4588 if (ix86_save_reg (regno, false))
4591 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4593 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4595 if (frame_pointer_needed)
4597 /* Leave results in shorter dependency chains on CPUs that are
4598 able to grok it fast. */
4599 if (TARGET_USE_LEAVE)
4600 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4601 else if (TARGET_64BIT)
4602 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4604 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4608 /* Sibcall epilogues don't want a return instruction. */
4612 if (current_function_pops_args && current_function_args_size)
4614 rtx popc = GEN_INT (current_function_pops_args);
4616 /* i386 can only pop 64K bytes. If asked to pop more, pop
4617 return address, do explicit add, and jump indirectly to the
4620 if (current_function_pops_args >= 65536)
4622 rtx ecx = gen_rtx_REG (SImode, 2);
4624 /* There is no "pascal" calling convention in 64bit ABI. */
4628 emit_insn (gen_popsi1 (ecx));
4629 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4630 emit_jump_insn (gen_return_indirect_internal (ecx));
4633 emit_jump_insn (gen_return_pop_internal (popc));
4636 emit_jump_insn (gen_return_internal ());
4639 /* Reset from the function's potential modifications. */
4642 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4643 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4645 if (pic_offset_table_rtx)
4646 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4649 /* Extract the parts of an RTL expression that is a valid memory address
4650 for an instruction. Return 0 if the structure of the address is
4651 grossly off. Return -1 if the address contains ASHIFT, so it is not
4652 strictly valid, but still used for computing length of lea instruction. */
4655 ix86_decompose_address (rtx addr, struct ix86_address *out)
4657 rtx base = NULL_RTX;
4658 rtx index = NULL_RTX;
4659 rtx disp = NULL_RTX;
4660 HOST_WIDE_INT scale = 1;
4661 rtx scale_rtx = NULL_RTX;
4663 enum ix86_address_seg seg = SEG_DEFAULT;
4665 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4667 else if (GET_CODE (addr) == PLUS)
4677 addends[n++] = XEXP (op, 1);
4680 while (GET_CODE (op) == PLUS);
4685 for (i = n; i >= 0; --i)
4688 switch (GET_CODE (op))
4693 index = XEXP (op, 0);
4694 scale_rtx = XEXP (op, 1);
4698 if (XINT (op, 1) == UNSPEC_TP
4699 && TARGET_TLS_DIRECT_SEG_REFS
4700 && seg == SEG_DEFAULT)
4701 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4730 else if (GET_CODE (addr) == MULT)
4732 index = XEXP (addr, 0); /* index*scale */
4733 scale_rtx = XEXP (addr, 1);
4735 else if (GET_CODE (addr) == ASHIFT)
4739 /* We're called for lea too, which implements ashift on occasion. */
4740 index = XEXP (addr, 0);
4741 tmp = XEXP (addr, 1);
4742 if (GET_CODE (tmp) != CONST_INT)
4744 scale = INTVAL (tmp);
4745 if ((unsigned HOST_WIDE_INT) scale > 3)
4751 disp = addr; /* displacement */
4753 /* Extract the integral value of scale. */
4756 if (GET_CODE (scale_rtx) != CONST_INT)
4758 scale = INTVAL (scale_rtx);
4761 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4762 if (base && index && scale == 1
4763 && (index == arg_pointer_rtx
4764 || index == frame_pointer_rtx
4765 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4772 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4773 if ((base == hard_frame_pointer_rtx
4774 || base == frame_pointer_rtx
4775 || base == arg_pointer_rtx) && !disp)
4778 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4779 Avoid this by transforming to [%esi+0]. */
4780 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4781 && base && !index && !disp
4783 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4786 /* Special case: encode reg+reg instead of reg*2. */
4787 if (!base && index && scale && scale == 2)
4788 base = index, scale = 1;
4790 /* Special case: scaling cannot be encoded without base or displacement. */
4791 if (!base && !disp && index && scale != 1)
4803 /* Return cost of the memory address x.
4804 For i386, it is better to use a complex address than let gcc copy
4805 the address into a reg and make a new pseudo. But not if the address
4806 requires to two regs - that would mean more pseudos with longer
4809 ix86_address_cost (rtx x)
4811 struct ix86_address parts;
4814 if (!ix86_decompose_address (x, &parts))
4817 /* More complex memory references are better. */
4818 if (parts.disp && parts.disp != const0_rtx)
4820 if (parts.seg != SEG_DEFAULT)
4823 /* Attempt to minimize number of registers in the address. */
4825 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4827 && (!REG_P (parts.index)
4828 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4832 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4834 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4835 && parts.base != parts.index)
4838 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4839 since it's predecode logic can't detect the length of instructions
4840 and it degenerates to vector decoded. Increase cost of such
4841 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4842 to split such addresses or even refuse such addresses at all.
4844 Following addressing modes are affected:
4849 The first and last case may be avoidable by explicitly coding the zero in
4850 memory address, but I don't have AMD-K6 machine handy to check this
4854 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4855 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4856 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4862 /* If X is a machine specific address (i.e. a symbol or label being
4863 referenced as a displacement from the GOT implemented using an
4864 UNSPEC), then return the base term. Otherwise return X. */
4867 ix86_find_base_term (rtx x)
4873 if (GET_CODE (x) != CONST)
4876 if (GET_CODE (term) == PLUS
4877 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4878 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4879 term = XEXP (term, 0);
4880 if (GET_CODE (term) != UNSPEC
4881 || XINT (term, 1) != UNSPEC_GOTPCREL)
4884 term = XVECEXP (term, 0, 0);
4886 if (GET_CODE (term) != SYMBOL_REF
4887 && GET_CODE (term) != LABEL_REF)
4893 term = ix86_delegitimize_address (x);
4895 if (GET_CODE (term) != SYMBOL_REF
4896 && GET_CODE (term) != LABEL_REF)
4902 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4903 this is used for to form addresses to local data when -fPIC is in
4907 darwin_local_data_pic (rtx disp)
4909 if (GET_CODE (disp) == MINUS)
4911 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4912 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4913 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4915 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4916 if (! strcmp (sym_name, "<pic base>"))
4924 /* Determine if a given RTX is a valid constant. We already know this
4925 satisfies CONSTANT_P. */
4928 legitimate_constant_p (rtx x)
4930 switch (GET_CODE (x))
4935 if (GET_CODE (x) == PLUS)
4937 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4942 if (TARGET_MACHO && darwin_local_data_pic (x))
4945 /* Only some unspecs are valid as "constants". */
4946 if (GET_CODE (x) == UNSPEC)
4947 switch (XINT (x, 1))
4951 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4953 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4958 /* We must have drilled down to a symbol. */
4959 if (!symbolic_operand (x, Pmode))
4964 /* TLS symbols are never valid. */
4965 if (tls_symbolic_operand (x, Pmode))
4973 /* Otherwise we handle everything else in the move patterns. */
4977 /* Determine if it's legal to put X into the constant pool. This
4978 is not possible for the address of thread-local symbols, which
4979 is checked above. */
4982 ix86_cannot_force_const_mem (rtx x)
4984 return !legitimate_constant_p (x);
4987 /* Determine if a given RTX is a valid constant address. */
4990 constant_address_p (rtx x)
4992 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4995 /* Nonzero if the constant value X is a legitimate general operand
4996 when generating PIC code. It is given that flag_pic is on and
4997 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5000 legitimate_pic_operand_p (rtx x)
5004 switch (GET_CODE (x))
5007 inner = XEXP (x, 0);
5009 /* Only some unspecs are valid as "constants". */
5010 if (GET_CODE (inner) == UNSPEC)
5011 switch (XINT (inner, 1))
5014 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5022 return legitimate_pic_address_disp_p (x);
5029 /* Determine if a given CONST RTX is a valid memory displacement
5033 legitimate_pic_address_disp_p (rtx disp)
5037 /* In 64bit mode we can allow direct addresses of symbols and labels
5038 when they are not dynamic symbols. */
5041 /* TLS references should always be enclosed in UNSPEC. */
5042 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5044 if (GET_CODE (disp) == SYMBOL_REF
5045 && ix86_cmodel == CM_SMALL_PIC
5046 && SYMBOL_REF_LOCAL_P (disp))
5048 if (GET_CODE (disp) == LABEL_REF)
5050 if (GET_CODE (disp) == CONST
5051 && GET_CODE (XEXP (disp, 0)) == PLUS)
5053 rtx op0 = XEXP (XEXP (disp, 0), 0);
5054 rtx op1 = XEXP (XEXP (disp, 0), 1);
5056 /* TLS references should always be enclosed in UNSPEC. */
5057 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5059 if (((GET_CODE (op0) == SYMBOL_REF
5060 && ix86_cmodel == CM_SMALL_PIC
5061 && SYMBOL_REF_LOCAL_P (op0))
5062 || GET_CODE (op0) == LABEL_REF)
5063 && GET_CODE (op1) == CONST_INT
5064 && INTVAL (op1) < 16*1024*1024
5065 && INTVAL (op1) >= -16*1024*1024)
5069 if (GET_CODE (disp) != CONST)
5071 disp = XEXP (disp, 0);
5075 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5076 of GOT tables. We should not need these anyway. */
5077 if (GET_CODE (disp) != UNSPEC
5078 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5081 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5082 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5088 if (GET_CODE (disp) == PLUS)
5090 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5092 disp = XEXP (disp, 0);
5096 if (TARGET_MACHO && darwin_local_data_pic (disp))
5099 if (GET_CODE (disp) != UNSPEC)
5102 switch (XINT (disp, 1))
5107 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5109 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5110 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5111 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5113 case UNSPEC_GOTTPOFF:
5114 case UNSPEC_GOTNTPOFF:
5115 case UNSPEC_INDNTPOFF:
5118 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5120 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5122 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5128 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5129 memory address for an instruction. The MODE argument is the machine mode
5130 for the MEM expression that wants to use this address.
5132 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5133 convert common non-canonical forms to canonical form so that they will
5137 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5139 struct ix86_address parts;
5140 rtx base, index, disp;
5141 HOST_WIDE_INT scale;
5142 const char *reason = NULL;
5143 rtx reason_rtx = NULL_RTX;
5145 if (TARGET_DEBUG_ADDR)
5148 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5149 GET_MODE_NAME (mode), strict);
5153 if (ix86_decompose_address (addr, &parts) <= 0)
5155 reason = "decomposition failed";
5160 index = parts.index;
5162 scale = parts.scale;
5164 /* Validate base register.
5166 Don't allow SUBREG's here, it can lead to spill failures when the base
5167 is one word out of a two word structure, which is represented internally
5174 if (GET_CODE (base) != REG)
5176 reason = "base is not a register";
5180 if (GET_MODE (base) != Pmode)
5182 reason = "base is not in Pmode";
5186 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5187 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5189 reason = "base is not valid";
5194 /* Validate index register.
5196 Don't allow SUBREG's here, it can lead to spill failures when the index
5197 is one word out of a two word structure, which is represented internally
5204 if (GET_CODE (index) != REG)
5206 reason = "index is not a register";
5210 if (GET_MODE (index) != Pmode)
5212 reason = "index is not in Pmode";
5216 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5217 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5219 reason = "index is not valid";
5224 /* Validate scale factor. */
5227 reason_rtx = GEN_INT (scale);
5230 reason = "scale without index";
5234 if (scale != 2 && scale != 4 && scale != 8)
5236 reason = "scale is not a valid multiplier";
5241 /* Validate displacement. */
5246 if (GET_CODE (disp) == CONST
5247 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5248 switch (XINT (XEXP (disp, 0), 1))
5252 case UNSPEC_GOTPCREL:
5255 goto is_legitimate_pic;
5257 case UNSPEC_GOTTPOFF:
5258 case UNSPEC_GOTNTPOFF:
5259 case UNSPEC_INDNTPOFF:
5265 reason = "invalid address unspec";
5269 else if (flag_pic && (SYMBOLIC_CONST (disp)
5271 && !machopic_operand_p (disp)
5276 if (TARGET_64BIT && (index || base))
5278 /* foo@dtpoff(%rX) is ok. */
5279 if (GET_CODE (disp) != CONST
5280 || GET_CODE (XEXP (disp, 0)) != PLUS
5281 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5282 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5283 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5284 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5286 reason = "non-constant pic memory reference";
5290 else if (! legitimate_pic_address_disp_p (disp))
5292 reason = "displacement is an invalid pic construct";
5296 /* This code used to verify that a symbolic pic displacement
5297 includes the pic_offset_table_rtx register.
5299 While this is good idea, unfortunately these constructs may
5300 be created by "adds using lea" optimization for incorrect
5309 This code is nonsensical, but results in addressing
5310 GOT table with pic_offset_table_rtx base. We can't
5311 just refuse it easily, since it gets matched by
5312 "addsi3" pattern, that later gets split to lea in the
5313 case output register differs from input. While this
5314 can be handled by separate addsi pattern for this case
5315 that never results in lea, this seems to be easier and
5316 correct fix for crash to disable this test. */
5318 else if (GET_CODE (disp) != LABEL_REF
5319 && GET_CODE (disp) != CONST_INT
5320 && (GET_CODE (disp) != CONST
5321 || !legitimate_constant_p (disp))
5322 && (GET_CODE (disp) != SYMBOL_REF
5323 || !legitimate_constant_p (disp)))
5325 reason = "displacement is not constant";
5328 else if (TARGET_64BIT
5329 && !x86_64_immediate_operand (disp, VOIDmode))
5331 reason = "displacement is out of range";
5336 /* Everything looks valid. */
5337 if (TARGET_DEBUG_ADDR)
5338 fprintf (stderr, "Success.\n");
5342 if (TARGET_DEBUG_ADDR)
5344 fprintf (stderr, "Error: %s\n", reason);
5345 debug_rtx (reason_rtx);
5350 /* Return an unique alias set for the GOT. */
5352 static HOST_WIDE_INT
5353 ix86_GOT_alias_set (void)
5355 static HOST_WIDE_INT set = -1;
5357 set = new_alias_set ();
5361 /* Return a legitimate reference for ORIG (an address) using the
5362 register REG. If REG is 0, a new pseudo is generated.
5364 There are two types of references that must be handled:
5366 1. Global data references must load the address from the GOT, via
5367 the PIC reg. An insn is emitted to do this load, and the reg is
5370 2. Static data references, constant pool addresses, and code labels
5371 compute the address as an offset from the GOT, whose base is in
5372 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5373 differentiate them from global data objects. The returned
5374 address is the PIC reg + an unspec constant.
5376 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5377 reg also appears in the address. */
5380 legitimize_pic_address (rtx orig, rtx reg)
5388 reg = gen_reg_rtx (Pmode);
5389 /* Use the generic Mach-O PIC machinery. */
5390 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5393 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5395 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5397 /* This symbol may be referenced via a displacement from the PIC
5398 base address (@GOTOFF). */
5400 if (reload_in_progress)
5401 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5402 if (GET_CODE (addr) == CONST)
5403 addr = XEXP (addr, 0);
5404 if (GET_CODE (addr) == PLUS)
5406 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5407 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5410 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5411 new = gen_rtx_CONST (Pmode, new);
5412 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5416 emit_move_insn (reg, new);
5420 else if (GET_CODE (addr) == SYMBOL_REF)
5424 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5425 new = gen_rtx_CONST (Pmode, new);
5426 new = gen_const_mem (Pmode, new);
5427 set_mem_alias_set (new, ix86_GOT_alias_set ());
5430 reg = gen_reg_rtx (Pmode);
5431 /* Use directly gen_movsi, otherwise the address is loaded
5432 into register for CSE. We don't want to CSE this addresses,
5433 instead we CSE addresses from the GOT table, so skip this. */
5434 emit_insn (gen_movsi (reg, new));
5439 /* This symbol must be referenced via a load from the
5440 Global Offset Table (@GOT). */
5442 if (reload_in_progress)
5443 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5444 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5445 new = gen_rtx_CONST (Pmode, new);
5446 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5447 new = gen_const_mem (Pmode, new);
5448 set_mem_alias_set (new, ix86_GOT_alias_set ());
5451 reg = gen_reg_rtx (Pmode);
5452 emit_move_insn (reg, new);
5458 if (GET_CODE (addr) == CONST)
5460 addr = XEXP (addr, 0);
5462 /* We must match stuff we generate before. Assume the only
5463 unspecs that can get here are ours. Not that we could do
5464 anything with them anyway.... */
5465 if (GET_CODE (addr) == UNSPEC
5466 || (GET_CODE (addr) == PLUS
5467 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5469 if (GET_CODE (addr) != PLUS)
5472 if (GET_CODE (addr) == PLUS)
5474 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5476 /* Check first to see if this is a constant offset from a @GOTOFF
5477 symbol reference. */
5478 if (local_symbolic_operand (op0, Pmode)
5479 && GET_CODE (op1) == CONST_INT)
5483 if (reload_in_progress)
5484 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5485 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5487 new = gen_rtx_PLUS (Pmode, new, op1);
5488 new = gen_rtx_CONST (Pmode, new);
5489 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5493 emit_move_insn (reg, new);
5499 if (INTVAL (op1) < -16*1024*1024
5500 || INTVAL (op1) >= 16*1024*1024)
5501 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5506 base = legitimize_pic_address (XEXP (addr, 0), reg);
5507 new = legitimize_pic_address (XEXP (addr, 1),
5508 base == reg ? NULL_RTX : reg);
5510 if (GET_CODE (new) == CONST_INT)
5511 new = plus_constant (base, INTVAL (new));
5514 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5516 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5517 new = XEXP (new, 1);
5519 new = gen_rtx_PLUS (Pmode, base, new);
5527 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5530 get_thread_pointer (int to_reg)
5534 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5538 reg = gen_reg_rtx (Pmode);
5539 insn = gen_rtx_SET (VOIDmode, reg, tp);
5540 insn = emit_insn (insn);
5545 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5546 false if we expect this to be used for a memory address and true if
5547 we expect to load the address into a register. */
5550 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5552 rtx dest, base, off, pic;
5557 case TLS_MODEL_GLOBAL_DYNAMIC:
5558 dest = gen_reg_rtx (Pmode);
5561 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5564 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5565 insns = get_insns ();
5568 emit_libcall_block (insns, dest, rax, x);
5571 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5574 case TLS_MODEL_LOCAL_DYNAMIC:
5575 base = gen_reg_rtx (Pmode);
5578 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5581 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5582 insns = get_insns ();
5585 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5586 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5587 emit_libcall_block (insns, base, rax, note);
5590 emit_insn (gen_tls_local_dynamic_base_32 (base));
5592 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5593 off = gen_rtx_CONST (Pmode, off);
5595 return gen_rtx_PLUS (Pmode, base, off);
5597 case TLS_MODEL_INITIAL_EXEC:
5601 type = UNSPEC_GOTNTPOFF;
5605 if (reload_in_progress)
5606 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5607 pic = pic_offset_table_rtx;
5608 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5610 else if (!TARGET_GNU_TLS)
5612 pic = gen_reg_rtx (Pmode);
5613 emit_insn (gen_set_got (pic));
5614 type = UNSPEC_GOTTPOFF;
5619 type = UNSPEC_INDNTPOFF;
5622 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5623 off = gen_rtx_CONST (Pmode, off);
5625 off = gen_rtx_PLUS (Pmode, pic, off);
5626 off = gen_const_mem (Pmode, off);
5627 set_mem_alias_set (off, ix86_GOT_alias_set ());
5629 if (TARGET_64BIT || TARGET_GNU_TLS)
5631 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5632 off = force_reg (Pmode, off);
5633 return gen_rtx_PLUS (Pmode, base, off);
5637 base = get_thread_pointer (true);
5638 dest = gen_reg_rtx (Pmode);
5639 emit_insn (gen_subsi3 (dest, base, off));
5643 case TLS_MODEL_LOCAL_EXEC:
5644 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5645 (TARGET_64BIT || TARGET_GNU_TLS)
5646 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5647 off = gen_rtx_CONST (Pmode, off);
5649 if (TARGET_64BIT || TARGET_GNU_TLS)
5651 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5652 return gen_rtx_PLUS (Pmode, base, off);
5656 base = get_thread_pointer (true);
5657 dest = gen_reg_rtx (Pmode);
5658 emit_insn (gen_subsi3 (dest, base, off));
5669 /* Try machine-dependent ways of modifying an illegitimate address
5670 to be legitimate. If we find one, return the new, valid address.
5671 This macro is used in only one place: `memory_address' in explow.c.
5673 OLDX is the address as it was before break_out_memory_refs was called.
5674 In some cases it is useful to look at this to decide what needs to be done.
5676 MODE and WIN are passed so that this macro can use
5677 GO_IF_LEGITIMATE_ADDRESS.
5679 It is always safe for this macro to do nothing. It exists to recognize
5680 opportunities to optimize the output.
5682 For the 80386, we handle X+REG by loading X into a register R and
5683 using R+REG. R will go in a general reg and indexing will be used.
5684 However, if REG is a broken-out memory address or multiplication,
5685 nothing needs to be done because REG can certainly go in a general reg.
5687 When -fpic is used, special handling is needed for symbolic references.
5688 See comments by legitimize_pic_address in i386.c for details. */
5691 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5696 if (TARGET_DEBUG_ADDR)
5698 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5699 GET_MODE_NAME (mode));
5703 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5705 return legitimize_tls_address (x, log, false);
5706 if (GET_CODE (x) == CONST
5707 && GET_CODE (XEXP (x, 0)) == PLUS
5708 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5709 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5711 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5712 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5715 if (flag_pic && SYMBOLIC_CONST (x))
5716 return legitimize_pic_address (x, 0);
5718 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5719 if (GET_CODE (x) == ASHIFT
5720 && GET_CODE (XEXP (x, 1)) == CONST_INT
5721 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5724 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5725 GEN_INT (1 << log));
5728 if (GET_CODE (x) == PLUS)
5730 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5732 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5733 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5734 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5737 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5738 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5739 GEN_INT (1 << log));
5742 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5743 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5744 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5747 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5748 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5749 GEN_INT (1 << log));
5752 /* Put multiply first if it isn't already. */
5753 if (GET_CODE (XEXP (x, 1)) == MULT)
5755 rtx tmp = XEXP (x, 0);
5756 XEXP (x, 0) = XEXP (x, 1);
5761 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5762 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5763 created by virtual register instantiation, register elimination, and
5764 similar optimizations. */
5765 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5768 x = gen_rtx_PLUS (Pmode,
5769 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5770 XEXP (XEXP (x, 1), 0)),
5771 XEXP (XEXP (x, 1), 1));
5775 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5776 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5777 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5778 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5779 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5780 && CONSTANT_P (XEXP (x, 1)))
5783 rtx other = NULL_RTX;
5785 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5787 constant = XEXP (x, 1);
5788 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5790 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5792 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5793 other = XEXP (x, 1);
5801 x = gen_rtx_PLUS (Pmode,
5802 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5803 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5804 plus_constant (other, INTVAL (constant)));
5808 if (changed && legitimate_address_p (mode, x, FALSE))
5811 if (GET_CODE (XEXP (x, 0)) == MULT)
5814 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5817 if (GET_CODE (XEXP (x, 1)) == MULT)
5820 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5824 && GET_CODE (XEXP (x, 1)) == REG
5825 && GET_CODE (XEXP (x, 0)) == REG)
5828 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5831 x = legitimize_pic_address (x, 0);
5834 if (changed && legitimate_address_p (mode, x, FALSE))
5837 if (GET_CODE (XEXP (x, 0)) == REG)
5839 rtx temp = gen_reg_rtx (Pmode);
5840 rtx val = force_operand (XEXP (x, 1), temp);
5842 emit_move_insn (temp, val);
5848 else if (GET_CODE (XEXP (x, 1)) == REG)
5850 rtx temp = gen_reg_rtx (Pmode);
5851 rtx val = force_operand (XEXP (x, 0), temp);
5853 emit_move_insn (temp, val);
5863 /* Print an integer constant expression in assembler syntax. Addition
5864 and subtraction are the only arithmetic that may appear in these
5865 expressions. FILE is the stdio stream to write to, X is the rtx, and
5866 CODE is the operand print code from the output string. */
5869 output_pic_addr_const (FILE *file, rtx x, int code)
5873 switch (GET_CODE (x))
5883 /* Mark the decl as referenced so that cgraph will output the function. */
5884 if (SYMBOL_REF_DECL (x))
5885 mark_decl_referenced (SYMBOL_REF_DECL (x));
5887 assemble_name (file, XSTR (x, 0));
5888 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5889 fputs ("@PLT", file);
5896 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5897 assemble_name (asm_out_file, buf);
5901 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5905 /* This used to output parentheses around the expression,
5906 but that does not work on the 386 (either ATT or BSD assembler). */
5907 output_pic_addr_const (file, XEXP (x, 0), code);
5911 if (GET_MODE (x) == VOIDmode)
5913 /* We can use %d if the number is <32 bits and positive. */
5914 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5915 fprintf (file, "0x%lx%08lx",
5916 (unsigned long) CONST_DOUBLE_HIGH (x),
5917 (unsigned long) CONST_DOUBLE_LOW (x));
5919 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5922 /* We can't handle floating point constants;
5923 PRINT_OPERAND must handle them. */
5924 output_operand_lossage ("floating constant misused");
5928 /* Some assemblers need integer constants to appear first. */
5929 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5931 output_pic_addr_const (file, XEXP (x, 0), code);
5933 output_pic_addr_const (file, XEXP (x, 1), code);
5935 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5937 output_pic_addr_const (file, XEXP (x, 1), code);
5939 output_pic_addr_const (file, XEXP (x, 0), code);
5947 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5948 output_pic_addr_const (file, XEXP (x, 0), code);
5950 output_pic_addr_const (file, XEXP (x, 1), code);
5952 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5956 if (XVECLEN (x, 0) != 1)
5958 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5959 switch (XINT (x, 1))
5962 fputs ("@GOT", file);
5965 fputs ("@GOTOFF", file);
5967 case UNSPEC_GOTPCREL:
5968 fputs ("@GOTPCREL(%rip)", file);
5970 case UNSPEC_GOTTPOFF:
5971 /* FIXME: This might be @TPOFF in Sun ld too. */
5972 fputs ("@GOTTPOFF", file);
5975 fputs ("@TPOFF", file);
5979 fputs ("@TPOFF", file);
5981 fputs ("@NTPOFF", file);
5984 fputs ("@DTPOFF", file);
5986 case UNSPEC_GOTNTPOFF:
5988 fputs ("@GOTTPOFF(%rip)", file);
5990 fputs ("@GOTNTPOFF", file);
5992 case UNSPEC_INDNTPOFF:
5993 fputs ("@INDNTPOFF", file);
5996 output_operand_lossage ("invalid UNSPEC as operand");
6002 output_operand_lossage ("invalid expression as operand");
6006 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6007 We need to emit DTP-relative relocations. */
6010 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6012 fputs (ASM_LONG, file);
6013 output_addr_const (file, x);
6014 fputs ("@DTPOFF", file);
6020 fputs (", 0", file);
6027 /* In the name of slightly smaller debug output, and to cater to
6028 general assembler losage, recognize PIC+GOTOFF and turn it back
6029 into a direct symbol reference. */
6032 ix86_delegitimize_address (rtx orig_x)
6036 if (GET_CODE (x) == MEM)
6041 if (GET_CODE (x) != CONST
6042 || GET_CODE (XEXP (x, 0)) != UNSPEC
6043 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6044 || GET_CODE (orig_x) != MEM)
6046 return XVECEXP (XEXP (x, 0), 0, 0);
6049 if (GET_CODE (x) != PLUS
6050 || GET_CODE (XEXP (x, 1)) != CONST)
6053 if (GET_CODE (XEXP (x, 0)) == REG
6054 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6055 /* %ebx + GOT/GOTOFF */
6057 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6059 /* %ebx + %reg * scale + GOT/GOTOFF */
6061 if (GET_CODE (XEXP (y, 0)) == REG
6062 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6064 else if (GET_CODE (XEXP (y, 1)) == REG
6065 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6069 if (GET_CODE (y) != REG
6070 && GET_CODE (y) != MULT
6071 && GET_CODE (y) != ASHIFT)
6077 x = XEXP (XEXP (x, 1), 0);
6078 if (GET_CODE (x) == UNSPEC
6079 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6080 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6083 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6084 return XVECEXP (x, 0, 0);
6087 if (GET_CODE (x) == PLUS
6088 && GET_CODE (XEXP (x, 0)) == UNSPEC
6089 && GET_CODE (XEXP (x, 1)) == CONST_INT
6090 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6091 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6092 && GET_CODE (orig_x) != MEM)))
6094 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6096 return gen_rtx_PLUS (Pmode, y, x);
6104 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6109 if (mode == CCFPmode || mode == CCFPUmode)
6111 enum rtx_code second_code, bypass_code;
6112 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6113 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6115 code = ix86_fp_compare_code_to_integer (code);
6119 code = reverse_condition (code);
6130 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6135 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6136 Those same assemblers have the same but opposite losage on cmov. */
6139 suffix = fp ? "nbe" : "a";
6142 if (mode == CCNOmode || mode == CCGOCmode)
6144 else if (mode == CCmode || mode == CCGCmode)
6155 if (mode == CCNOmode || mode == CCGOCmode)
6157 else if (mode == CCmode || mode == CCGCmode)
6166 suffix = fp ? "nb" : "ae";
6169 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6179 suffix = fp ? "u" : "p";
6182 suffix = fp ? "nu" : "np";
6187 fputs (suffix, file);
6190 /* Print the name of register X to FILE based on its machine mode and number.
6191 If CODE is 'w', pretend the mode is HImode.
6192 If CODE is 'b', pretend the mode is QImode.
6193 If CODE is 'k', pretend the mode is SImode.
6194 If CODE is 'q', pretend the mode is DImode.
6195 If CODE is 'h', pretend the reg is the `high' byte register.
6196 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6199 print_reg (rtx x, int code, FILE *file)
6201 if (REGNO (x) == ARG_POINTER_REGNUM
6202 || REGNO (x) == FRAME_POINTER_REGNUM
6203 || REGNO (x) == FLAGS_REG
6204 || REGNO (x) == FPSR_REG)
6207 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6210 if (code == 'w' || MMX_REG_P (x))
6212 else if (code == 'b')
6214 else if (code == 'k')
6216 else if (code == 'q')
6218 else if (code == 'y')
6220 else if (code == 'h')
6223 code = GET_MODE_SIZE (GET_MODE (x));
6225 /* Irritatingly, AMD extended registers use different naming convention
6226 from the normal registers. */
6227 if (REX_INT_REG_P (x))
6234 error ("extended registers have no high halves");
6237 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6240 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6243 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6246 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6249 error ("unsupported operand size for extended register");
6257 if (STACK_TOP_P (x))
6259 fputs ("st(0)", file);
6266 if (! ANY_FP_REG_P (x))
6267 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6272 fputs (hi_reg_name[REGNO (x)], file);
6275 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6277 fputs (qi_reg_name[REGNO (x)], file);
6280 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6282 fputs (qi_high_reg_name[REGNO (x)], file);
6289 /* Locate some local-dynamic symbol still in use by this function
6290 so that we can print its name in some tls_local_dynamic_base
6294 get_some_local_dynamic_name (void)
6298 if (cfun->machine->some_ld_name)
6299 return cfun->machine->some_ld_name;
6301 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6303 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6304 return cfun->machine->some_ld_name;
6310 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6314 if (GET_CODE (x) == SYMBOL_REF
6315 && local_dynamic_symbolic_operand (x, Pmode))
6317 cfun->machine->some_ld_name = XSTR (x, 0);
6325 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6326 C -- print opcode suffix for set/cmov insn.
6327 c -- like C, but print reversed condition
6328 F,f -- likewise, but for floating-point.
6329 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6331 R -- print the prefix for register names.
6332 z -- print the opcode suffix for the size of the current operand.
6333 * -- print a star (in certain assembler syntax)
6334 A -- print an absolute memory reference.
6335 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6336 s -- print a shift double count, followed by the assemblers argument
6338 b -- print the QImode name of the register for the indicated operand.
6339 %b0 would print %al if operands[0] is reg 0.
6340 w -- likewise, print the HImode name of the register.
6341 k -- likewise, print the SImode name of the register.
6342 q -- likewise, print the DImode name of the register.
6343 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6344 y -- print "st(0)" instead of "st" as a register.
6345 D -- print condition for SSE cmp instruction.
6346 P -- if PIC, print an @PLT suffix.
6347 X -- don't print any sort of PIC '@' suffix for a symbol.
6348 & -- print some in-use local-dynamic symbol name.
6349 H -- print a memory address offset by 8; used for sse high-parts
6353 print_operand (FILE *file, rtx x, int code)
6360 if (ASSEMBLER_DIALECT == ASM_ATT)
6365 assemble_name (file, get_some_local_dynamic_name ());
6369 if (ASSEMBLER_DIALECT == ASM_ATT)
6371 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6373 /* Intel syntax. For absolute addresses, registers should not
6374 be surrounded by braces. */
6375 if (GET_CODE (x) != REG)
6378 PRINT_OPERAND (file, x, 0);
6386 PRINT_OPERAND (file, x, 0);
6391 if (ASSEMBLER_DIALECT == ASM_ATT)
6396 if (ASSEMBLER_DIALECT == ASM_ATT)
6401 if (ASSEMBLER_DIALECT == ASM_ATT)
6406 if (ASSEMBLER_DIALECT == ASM_ATT)
6411 if (ASSEMBLER_DIALECT == ASM_ATT)
6416 if (ASSEMBLER_DIALECT == ASM_ATT)
6421 /* 387 opcodes don't get size suffixes if the operands are
6423 if (STACK_REG_P (x))
6426 /* Likewise if using Intel opcodes. */
6427 if (ASSEMBLER_DIALECT == ASM_INTEL)
6430 /* This is the size of op from size of operand. */
6431 switch (GET_MODE_SIZE (GET_MODE (x)))
6434 #ifdef HAVE_GAS_FILDS_FISTS
6440 if (GET_MODE (x) == SFmode)
6455 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6457 #ifdef GAS_MNEMONICS
6483 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6485 PRINT_OPERAND (file, x, 0);
6491 /* Little bit of braindamage here. The SSE compare instructions
6492 does use completely different names for the comparisons that the
6493 fp conditional moves. */
6494 switch (GET_CODE (x))
6509 fputs ("unord", file);
6513 fputs ("neq", file);
6517 fputs ("nlt", file);
6521 fputs ("nle", file);
6524 fputs ("ord", file);
6532 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6533 if (ASSEMBLER_DIALECT == ASM_ATT)
6535 switch (GET_MODE (x))
6537 case HImode: putc ('w', file); break;
6539 case SFmode: putc ('l', file); break;
6541 case DFmode: putc ('q', file); break;
6549 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6552 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6553 if (ASSEMBLER_DIALECT == ASM_ATT)
6556 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6559 /* Like above, but reverse condition */
6561 /* Check to see if argument to %c is really a constant
6562 and not a condition code which needs to be reversed. */
6563 if (!COMPARISON_P (x))
6565 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6568 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6571 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6572 if (ASSEMBLER_DIALECT == ASM_ATT)
6575 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6579 /* It doesn't actually matter what mode we use here, as we're
6580 only going to use this for printing. */
6581 x = adjust_address_nv (x, DImode, 8);
6588 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6591 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6594 int pred_val = INTVAL (XEXP (x, 0));
6596 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6597 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6599 int taken = pred_val > REG_BR_PROB_BASE / 2;
6600 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6602 /* Emit hints only in the case default branch prediction
6603 heuristics would fail. */
6604 if (taken != cputaken)
6606 /* We use 3e (DS) prefix for taken branches and
6607 2e (CS) prefix for not taken branches. */
6609 fputs ("ds ; ", file);
6611 fputs ("cs ; ", file);
6618 output_operand_lossage ("invalid operand code '%c'", code);
6622 if (GET_CODE (x) == REG)
6623 print_reg (x, code, file);
6625 else if (GET_CODE (x) == MEM)
6627 /* No `byte ptr' prefix for call instructions. */
6628 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6631 switch (GET_MODE_SIZE (GET_MODE (x)))
6633 case 1: size = "BYTE"; break;
6634 case 2: size = "WORD"; break;
6635 case 4: size = "DWORD"; break;
6636 case 8: size = "QWORD"; break;
6637 case 12: size = "XWORD"; break;
6638 case 16: size = "XMMWORD"; break;
6643 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6646 else if (code == 'w')
6648 else if (code == 'k')
6652 fputs (" PTR ", file);
6656 /* Avoid (%rip) for call operands. */
6657 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6658 && GET_CODE (x) != CONST_INT)
6659 output_addr_const (file, x);
6660 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6661 output_operand_lossage ("invalid constraints for operand");
6666 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6671 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6672 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6674 if (ASSEMBLER_DIALECT == ASM_ATT)
6676 fprintf (file, "0x%08lx", l);
6679 /* These float cases don't actually occur as immediate operands. */
6680 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6684 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6685 fprintf (file, "%s", dstr);
6688 else if (GET_CODE (x) == CONST_DOUBLE
6689 && GET_MODE (x) == XFmode)
6693 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6694 fprintf (file, "%s", dstr);
6701 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6703 if (ASSEMBLER_DIALECT == ASM_ATT)
6706 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6707 || GET_CODE (x) == LABEL_REF)
6709 if (ASSEMBLER_DIALECT == ASM_ATT)
6712 fputs ("OFFSET FLAT:", file);
6715 if (GET_CODE (x) == CONST_INT)
6716 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6718 output_pic_addr_const (file, x, code);
6720 output_addr_const (file, x);
6724 /* Print a memory operand whose address is ADDR. */
6727 print_operand_address (FILE *file, rtx addr)
6729 struct ix86_address parts;
6730 rtx base, index, disp;
6733 if (! ix86_decompose_address (addr, &parts))
6737 index = parts.index;
6739 scale = parts.scale;
6747 if (USER_LABEL_PREFIX[0] == 0)
6749 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6755 if (!base && !index)
6757 /* Displacement only requires special attention. */
6759 if (GET_CODE (disp) == CONST_INT)
6761 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6763 if (USER_LABEL_PREFIX[0] == 0)
6765 fputs ("ds:", file);
6767 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6770 output_pic_addr_const (file, disp, 0);
6772 output_addr_const (file, disp);
6774 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6776 && ((GET_CODE (disp) == SYMBOL_REF
6777 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6778 || GET_CODE (disp) == LABEL_REF
6779 || (GET_CODE (disp) == CONST
6780 && GET_CODE (XEXP (disp, 0)) == PLUS
6781 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6782 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6783 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6784 fputs ("(%rip)", file);
6788 if (ASSEMBLER_DIALECT == ASM_ATT)
6793 output_pic_addr_const (file, disp, 0);
6794 else if (GET_CODE (disp) == LABEL_REF)
6795 output_asm_label (disp);
6797 output_addr_const (file, disp);
6802 print_reg (base, 0, file);
6806 print_reg (index, 0, file);
6808 fprintf (file, ",%d", scale);
6814 rtx offset = NULL_RTX;
6818 /* Pull out the offset of a symbol; print any symbol itself. */
6819 if (GET_CODE (disp) == CONST
6820 && GET_CODE (XEXP (disp, 0)) == PLUS
6821 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6823 offset = XEXP (XEXP (disp, 0), 1);
6824 disp = gen_rtx_CONST (VOIDmode,
6825 XEXP (XEXP (disp, 0), 0));
6829 output_pic_addr_const (file, disp, 0);
6830 else if (GET_CODE (disp) == LABEL_REF)
6831 output_asm_label (disp);
6832 else if (GET_CODE (disp) == CONST_INT)
6835 output_addr_const (file, disp);
6841 print_reg (base, 0, file);
6844 if (INTVAL (offset) >= 0)
6846 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6850 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6857 print_reg (index, 0, file);
6859 fprintf (file, "*%d", scale);
6867 output_addr_const_extra (FILE *file, rtx x)
6871 if (GET_CODE (x) != UNSPEC)
6874 op = XVECEXP (x, 0, 0);
6875 switch (XINT (x, 1))
6877 case UNSPEC_GOTTPOFF:
6878 output_addr_const (file, op);
6879 /* FIXME: This might be @TPOFF in Sun ld. */
6880 fputs ("@GOTTPOFF", file);
6883 output_addr_const (file, op);
6884 fputs ("@TPOFF", file);
6887 output_addr_const (file, op);
6889 fputs ("@TPOFF", file);
6891 fputs ("@NTPOFF", file);
6894 output_addr_const (file, op);
6895 fputs ("@DTPOFF", file);
6897 case UNSPEC_GOTNTPOFF:
6898 output_addr_const (file, op);
6900 fputs ("@GOTTPOFF(%rip)", file);
6902 fputs ("@GOTNTPOFF", file);
6904 case UNSPEC_INDNTPOFF:
6905 output_addr_const (file, op);
6906 fputs ("@INDNTPOFF", file);
6916 /* Split one or more DImode RTL references into pairs of SImode
6917 references. The RTL can be REG, offsettable MEM, integer constant, or
6918 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6919 split and "num" is its length. lo_half and hi_half are output arrays
6920 that parallel "operands". */
6923 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6927 rtx op = operands[num];
6929 /* simplify_subreg refuse to split volatile memory addresses,
6930 but we still have to handle it. */
6931 if (GET_CODE (op) == MEM)
6933 lo_half[num] = adjust_address (op, SImode, 0);
6934 hi_half[num] = adjust_address (op, SImode, 4);
6938 lo_half[num] = simplify_gen_subreg (SImode, op,
6939 GET_MODE (op) == VOIDmode
6940 ? DImode : GET_MODE (op), 0);
6941 hi_half[num] = simplify_gen_subreg (SImode, op,
6942 GET_MODE (op) == VOIDmode
6943 ? DImode : GET_MODE (op), 4);
6947 /* Split one or more TImode RTL references into pairs of SImode
6948 references. The RTL can be REG, offsettable MEM, integer constant, or
6949 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6950 split and "num" is its length. lo_half and hi_half are output arrays
6951 that parallel "operands". */
6954 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6958 rtx op = operands[num];
6960 /* simplify_subreg refuse to split volatile memory addresses, but we
6961 still have to handle it. */
6962 if (GET_CODE (op) == MEM)
6964 lo_half[num] = adjust_address (op, DImode, 0);
6965 hi_half[num] = adjust_address (op, DImode, 8);
6969 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6970 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6975 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6976 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6977 is the expression of the binary operation. The output may either be
6978 emitted here, or returned to the caller, like all output_* functions.
6980 There is no guarantee that the operands are the same mode, as they
6981 might be within FLOAT or FLOAT_EXTEND expressions. */
6983 #ifndef SYSV386_COMPAT
6984 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6985 wants to fix the assemblers because that causes incompatibility
6986 with gcc. No-one wants to fix gcc because that causes
6987 incompatibility with assemblers... You can use the option of
6988 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6989 #define SYSV386_COMPAT 1
6993 output_387_binary_op (rtx insn, rtx *operands)
6995 static char buf[30];
6998 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7000 #ifdef ENABLE_CHECKING
7001 /* Even if we do not want to check the inputs, this documents input
7002 constraints. Which helps in understanding the following code. */
7003 if (STACK_REG_P (operands[0])
7004 && ((REG_P (operands[1])
7005 && REGNO (operands[0]) == REGNO (operands[1])
7006 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7007 || (REG_P (operands[2])
7008 && REGNO (operands[0]) == REGNO (operands[2])
7009 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7010 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7016 switch (GET_CODE (operands[3]))
7019 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7020 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7028 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7029 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7037 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7038 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7046 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7047 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7061 if (GET_MODE (operands[0]) == SFmode)
7062 strcat (buf, "ss\t{%2, %0|%0, %2}");
7064 strcat (buf, "sd\t{%2, %0|%0, %2}");
7069 switch (GET_CODE (operands[3]))
7073 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7075 rtx temp = operands[2];
7076 operands[2] = operands[1];
7080 /* know operands[0] == operands[1]. */
7082 if (GET_CODE (operands[2]) == MEM)
7088 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7090 if (STACK_TOP_P (operands[0]))
7091 /* How is it that we are storing to a dead operand[2]?
7092 Well, presumably operands[1] is dead too. We can't
7093 store the result to st(0) as st(0) gets popped on this
7094 instruction. Instead store to operands[2] (which I
7095 think has to be st(1)). st(1) will be popped later.
7096 gcc <= 2.8.1 didn't have this check and generated
7097 assembly code that the Unixware assembler rejected. */
7098 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7100 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7104 if (STACK_TOP_P (operands[0]))
7105 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7107 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7112 if (GET_CODE (operands[1]) == MEM)
7118 if (GET_CODE (operands[2]) == MEM)
7124 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7127 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7128 derived assemblers, confusingly reverse the direction of
7129 the operation for fsub{r} and fdiv{r} when the
7130 destination register is not st(0). The Intel assembler
7131 doesn't have this brain damage. Read !SYSV386_COMPAT to
7132 figure out what the hardware really does. */
7133 if (STACK_TOP_P (operands[0]))
7134 p = "{p\t%0, %2|rp\t%2, %0}";
7136 p = "{rp\t%2, %0|p\t%0, %2}";
7138 if (STACK_TOP_P (operands[0]))
7139 /* As above for fmul/fadd, we can't store to st(0). */
7140 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7142 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7147 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7150 if (STACK_TOP_P (operands[0]))
7151 p = "{rp\t%0, %1|p\t%1, %0}";
7153 p = "{p\t%1, %0|rp\t%0, %1}";
7155 if (STACK_TOP_P (operands[0]))
7156 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7158 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7163 if (STACK_TOP_P (operands[0]))
7165 if (STACK_TOP_P (operands[1]))
7166 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7168 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7171 else if (STACK_TOP_P (operands[1]))
7174 p = "{\t%1, %0|r\t%0, %1}";
7176 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7182 p = "{r\t%2, %0|\t%0, %2}";
7184 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7197 /* Output code to initialize control word copies used by trunc?f?i and
7198 rounding patterns. CURRENT_MODE is set to current control word,
7199 while NEW_MODE is set to new control word. */
7202 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7204 rtx reg = gen_reg_rtx (HImode);
7206 emit_insn (gen_x86_fnstcw_1 (current_mode));
7207 emit_move_insn (reg, current_mode);
7209 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7215 /* round down toward -oo */
7216 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7220 /* round up toward +oo */
7221 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7225 /* round toward zero (truncate) */
7226 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7229 case I387_CW_MASK_PM:
7230 /* mask precision exception for nearbyint() */
7231 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7243 /* round down toward -oo */
7244 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7245 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7249 /* round up toward +oo */
7250 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7251 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7255 /* round toward zero (truncate) */
7256 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7259 case I387_CW_MASK_PM:
7260 /* mask precision exception for nearbyint() */
7261 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7269 emit_move_insn (new_mode, reg);
7272 /* Output code for INSN to convert a float to a signed int. OPERANDS
7273 are the insn operands. The output may be [HSD]Imode and the input
7274 operand may be [SDX]Fmode. */
7277 output_fix_trunc (rtx insn, rtx *operands)
7279 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7280 int dimode_p = GET_MODE (operands[0]) == DImode;
7282 /* Jump through a hoop or two for DImode, since the hardware has no
7283 non-popping instruction. We used to do this a different way, but
7284 that was somewhat fragile and broke with post-reload splitters. */
7285 if (dimode_p && !stack_top_dies)
7286 output_asm_insn ("fld\t%y1", operands);
7288 if (!STACK_TOP_P (operands[1]))
7291 if (GET_CODE (operands[0]) != MEM)
7294 output_asm_insn ("fldcw\t%3", operands);
7295 if (stack_top_dies || dimode_p)
7296 output_asm_insn ("fistp%z0\t%0", operands);
7298 output_asm_insn ("fist%z0\t%0", operands);
7299 output_asm_insn ("fldcw\t%2", operands);
7304 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7305 should be used. UNORDERED_P is true when fucom should be used. */
7308 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7311 rtx cmp_op0, cmp_op1;
7312 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7316 cmp_op0 = operands[0];
7317 cmp_op1 = operands[1];
7321 cmp_op0 = operands[1];
7322 cmp_op1 = operands[2];
7327 if (GET_MODE (operands[0]) == SFmode)
7329 return "ucomiss\t{%1, %0|%0, %1}";
7331 return "comiss\t{%1, %0|%0, %1}";
7334 return "ucomisd\t{%1, %0|%0, %1}";
7336 return "comisd\t{%1, %0|%0, %1}";
7339 if (! STACK_TOP_P (cmp_op0))
7342 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7344 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7348 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7349 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7352 return "ftst\n\tfnstsw\t%0";
7355 if (STACK_REG_P (cmp_op1)
7357 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7358 && REGNO (cmp_op1) != FIRST_STACK_REG)
7360 /* If both the top of the 387 stack dies, and the other operand
7361 is also a stack register that dies, then this must be a
7362 `fcompp' float compare */
7366 /* There is no double popping fcomi variant. Fortunately,
7367 eflags is immune from the fstp's cc clobbering. */
7369 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7371 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7372 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7377 return "fucompp\n\tfnstsw\t%0";
7379 return "fcompp\n\tfnstsw\t%0";
7384 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7386 static const char * const alt[16] =
7388 "fcom%z2\t%y2\n\tfnstsw\t%0",
7389 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7390 "fucom%z2\t%y2\n\tfnstsw\t%0",
7391 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7393 "ficom%z2\t%y2\n\tfnstsw\t%0",
7394 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7398 "fcomi\t{%y1, %0|%0, %y1}",
7399 "fcomip\t{%y1, %0|%0, %y1}",
7400 "fucomi\t{%y1, %0|%0, %y1}",
7401 "fucomip\t{%y1, %0|%0, %y1}",
7412 mask = eflags_p << 3;
7413 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7414 mask |= unordered_p << 1;
7415 mask |= stack_top_dies;
7428 ix86_output_addr_vec_elt (FILE *file, int value)
7430 const char *directive = ASM_LONG;
7435 directive = ASM_QUAD;
7441 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7445 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7448 fprintf (file, "%s%s%d-%s%d\n",
7449 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7450 else if (HAVE_AS_GOTOFF_IN_DATA)
7451 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7453 else if (TARGET_MACHO)
7455 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7456 machopic_output_function_base_name (file);
7457 fprintf(file, "\n");
7461 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7462 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7465 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7469 ix86_expand_clear (rtx dest)
7473 /* We play register width games, which are only valid after reload. */
7474 if (!reload_completed)
7477 /* Avoid HImode and its attendant prefix byte. */
7478 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7479 dest = gen_rtx_REG (SImode, REGNO (dest));
7481 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7483 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7484 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7486 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7487 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7493 /* X is an unchanging MEM. If it is a constant pool reference, return
7494 the constant pool rtx, else NULL. */
7497 maybe_get_pool_constant (rtx x)
7499 x = ix86_delegitimize_address (XEXP (x, 0));
7501 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7502 return get_pool_constant (x);
7508 ix86_expand_move (enum machine_mode mode, rtx operands[])
7510 int strict = (reload_in_progress || reload_completed);
7512 enum tls_model model;
7517 if (GET_CODE (op1) == SYMBOL_REF)
7519 model = SYMBOL_REF_TLS_MODEL (op1);
7522 op1 = legitimize_tls_address (op1, model, true);
7523 op1 = force_operand (op1, op0);
7528 else if (GET_CODE (op1) == CONST
7529 && GET_CODE (XEXP (op1, 0)) == PLUS
7530 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7532 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7535 rtx addend = XEXP (XEXP (op1, 0), 1);
7536 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7537 op1 = force_operand (op1, NULL);
7538 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7539 op0, 1, OPTAB_DIRECT);
7545 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7550 rtx temp = ((reload_in_progress
7551 || ((op0 && GET_CODE (op0) == REG)
7553 ? op0 : gen_reg_rtx (Pmode));
7554 op1 = machopic_indirect_data_reference (op1, temp);
7555 op1 = machopic_legitimize_pic_address (op1, mode,
7556 temp == op1 ? 0 : temp);
7558 else if (MACHOPIC_INDIRECT)
7559 op1 = machopic_indirect_data_reference (op1, 0);
7563 if (GET_CODE (op0) == MEM)
7564 op1 = force_reg (Pmode, op1);
7566 op1 = legitimize_address (op1, op1, Pmode);
7567 #endif /* TARGET_MACHO */
7571 if (GET_CODE (op0) == MEM
7572 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7573 || !push_operand (op0, mode))
7574 && GET_CODE (op1) == MEM)
7575 op1 = force_reg (mode, op1);
7577 if (push_operand (op0, mode)
7578 && ! general_no_elim_operand (op1, mode))
7579 op1 = copy_to_mode_reg (mode, op1);
7581 /* Force large constants in 64bit compilation into register
7582 to get them CSEed. */
7583 if (TARGET_64BIT && mode == DImode
7584 && immediate_operand (op1, mode)
7585 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7586 && !register_operand (op0, mode)
7587 && optimize && !reload_completed && !reload_in_progress)
7588 op1 = copy_to_mode_reg (mode, op1);
7590 if (FLOAT_MODE_P (mode))
7592 /* If we are loading a floating point constant to a register,
7593 force the value to memory now, since we'll get better code
7594 out the back end. */
7598 else if (GET_CODE (op1) == CONST_DOUBLE)
7600 op1 = validize_mem (force_const_mem (mode, op1));
7601 if (!register_operand (op0, mode))
7603 rtx temp = gen_reg_rtx (mode);
7604 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7605 emit_move_insn (op0, temp);
7612 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7616 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7618 rtx op0 = operands[0], op1 = operands[1];
7620 /* Force constants other than zero into memory. We do not know how
7621 the instructions used to build constants modify the upper 64 bits
7622 of the register, once we have that information we may be able
7623 to handle some of them more efficiently. */
7624 if ((reload_in_progress | reload_completed) == 0
7625 && register_operand (op0, mode)
7626 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7627 op1 = validize_mem (force_const_mem (mode, op1));
7629 /* Make operand1 a register if it isn't already. */
7631 && !register_operand (op0, mode)
7632 && !register_operand (op1, mode))
7634 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7638 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7641 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7642 straight to ix86_expand_vector_move. */
7645 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7654 /* If we're optimizing for size, movups is the smallest. */
7657 op0 = gen_lowpart (V4SFmode, op0);
7658 op1 = gen_lowpart (V4SFmode, op1);
7659 emit_insn (gen_sse_movups (op0, op1));
7663 /* ??? If we have typed data, then it would appear that using
7664 movdqu is the only way to get unaligned data loaded with
7666 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7668 op0 = gen_lowpart (V16QImode, op0);
7669 op1 = gen_lowpart (V16QImode, op1);
7670 emit_insn (gen_sse2_movdqu (op0, op1));
7674 if (TARGET_SSE2 && mode == V2DFmode)
7678 /* When SSE registers are split into halves, we can avoid
7679 writing to the top half twice. */
7680 if (TARGET_SSE_SPLIT_REGS)
7682 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7687 /* ??? Not sure about the best option for the Intel chips.
7688 The following would seem to satisfy; the register is
7689 entirely cleared, breaking the dependency chain. We
7690 then store to the upper half, with a dependency depth
7691 of one. A rumor has it that Intel recommends two movsd
7692 followed by an unpacklpd, but this is unconfirmed. And
7693 given that the dependency depth of the unpacklpd would
7694 still be one, I'm not sure why this would be better. */
7695 zero = CONST0_RTX (V2DFmode);
7698 m = adjust_address (op1, DFmode, 0);
7699 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7700 m = adjust_address (op1, DFmode, 8);
7701 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7705 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7706 emit_move_insn (op0, CONST0_RTX (mode));
7708 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7710 if (mode != V4SFmode)
7711 op0 = gen_lowpart (V4SFmode, op0);
7712 m = adjust_address (op1, V2SFmode, 0);
7713 emit_insn (gen_sse_loadlps (op0, op0, m));
7714 m = adjust_address (op1, V2SFmode, 8);
7715 emit_insn (gen_sse_loadhps (op0, op0, m));
7718 else if (MEM_P (op0))
7720 /* If we're optimizing for size, movups is the smallest. */
7723 op0 = gen_lowpart (V4SFmode, op0);
7724 op1 = gen_lowpart (V4SFmode, op1);
7725 emit_insn (gen_sse_movups (op0, op1));
7729 /* ??? Similar to above, only less clear because of quote
7730 typeless stores unquote. */
7731 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7732 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7734 op0 = gen_lowpart (V16QImode, op0);
7735 op1 = gen_lowpart (V16QImode, op1);
7736 emit_insn (gen_sse2_movdqu (op0, op1));
7740 if (TARGET_SSE2 && mode == V2DFmode)
7742 m = adjust_address (op0, DFmode, 0);
7743 emit_insn (gen_sse2_storelpd (m, op1));
7744 m = adjust_address (op0, DFmode, 8);
7745 emit_insn (gen_sse2_storehpd (m, op1));
7749 if (mode != V4SFmode)
7750 op1 = gen_lowpart (V4SFmode, op1);
7751 m = adjust_address (op0, V2SFmode, 0);
7752 emit_insn (gen_sse_storelps (m, op1));
7753 m = adjust_address (op0, V2SFmode, 8);
7754 emit_insn (gen_sse_storehps (m, op1));
7761 /* Expand a push in MODE. This is some mode for which we do not support
7762 proper push instructions, at least from the registers that we expect
7763 the value to live in. */
7766 ix86_expand_push (enum machine_mode mode, rtx x)
7770 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
7771 GEN_INT (-GET_MODE_SIZE (mode)),
7772 stack_pointer_rtx, 1, OPTAB_DIRECT);
7773 if (tmp != stack_pointer_rtx)
7774 emit_move_insn (stack_pointer_rtx, tmp);
7776 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
7777 emit_move_insn (tmp, x);
7780 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7781 destination to use for the operation. If different from the true
7782 destination in operands[0], a copy operation will be required. */
7785 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7788 int matching_memory;
7789 rtx src1, src2, dst;
7795 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7796 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7797 && (rtx_equal_p (dst, src2)
7798 || immediate_operand (src1, mode)))
7805 /* If the destination is memory, and we do not have matching source
7806 operands, do things in registers. */
7807 matching_memory = 0;
7808 if (GET_CODE (dst) == MEM)
7810 if (rtx_equal_p (dst, src1))
7811 matching_memory = 1;
7812 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7813 && rtx_equal_p (dst, src2))
7814 matching_memory = 2;
7816 dst = gen_reg_rtx (mode);
7819 /* Both source operands cannot be in memory. */
7820 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7822 if (matching_memory != 2)
7823 src2 = force_reg (mode, src2);
7825 src1 = force_reg (mode, src1);
7828 /* If the operation is not commutable, source 1 cannot be a constant
7829 or non-matching memory. */
7830 if ((CONSTANT_P (src1)
7831 || (!matching_memory && GET_CODE (src1) == MEM))
7832 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7833 src1 = force_reg (mode, src1);
7835 /* If optimizing, copy to regs to improve CSE */
7836 if (optimize && ! no_new_pseudos)
7838 if (GET_CODE (dst) == MEM)
7839 dst = gen_reg_rtx (mode);
7840 if (GET_CODE (src1) == MEM)
7841 src1 = force_reg (mode, src1);
7842 if (GET_CODE (src2) == MEM)
7843 src2 = force_reg (mode, src2);
7846 src1 = operands[1] = src1;
7847 src2 = operands[2] = src2;
7851 /* Similarly, but assume that the destination has already been
7855 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7856 enum machine_mode mode, rtx operands[])
7858 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7859 gcc_assert (dst == operands[0]);
7862 /* Attempt to expand a binary operator. Make the expansion closer to the
7863 actual machine, then just general_operand, which will allow 3 separate
7864 memory references (one output, two input) in a single insn. */
7867 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7870 rtx src1, src2, dst, op, clob;
7872 dst = ix86_fixup_binary_operands (code, mode, operands);
7876 /* Emit the instruction. */
7878 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7879 if (reload_in_progress)
7881 /* Reload doesn't know about the flags register, and doesn't know that
7882 it doesn't want to clobber it. We can only do this with PLUS. */
7889 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7890 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7893 /* Fix up the destination if needed. */
7894 if (dst != operands[0])
7895 emit_move_insn (operands[0], dst);
7898 /* Return TRUE or FALSE depending on whether the binary operator meets the
7899 appropriate constraints. */
7902 ix86_binary_operator_ok (enum rtx_code code,
7903 enum machine_mode mode ATTRIBUTE_UNUSED,
7906 /* Both source operands cannot be in memory. */
7907 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7909 /* If the operation is not commutable, source 1 cannot be a constant. */
7910 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7912 /* If the destination is memory, we must have a matching source operand. */
7913 if (GET_CODE (operands[0]) == MEM
7914 && ! (rtx_equal_p (operands[0], operands[1])
7915 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7916 && rtx_equal_p (operands[0], operands[2]))))
7918 /* If the operation is not commutable and the source 1 is memory, we must
7919 have a matching destination. */
7920 if (GET_CODE (operands[1]) == MEM
7921 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7922 && ! rtx_equal_p (operands[0], operands[1]))
7927 /* Attempt to expand a unary operator. Make the expansion closer to the
7928 actual machine, then just general_operand, which will allow 2 separate
7929 memory references (one output, one input) in a single insn. */
7932 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7935 int matching_memory;
7936 rtx src, dst, op, clob;
7941 /* If the destination is memory, and we do not have matching source
7942 operands, do things in registers. */
7943 matching_memory = 0;
7946 if (rtx_equal_p (dst, src))
7947 matching_memory = 1;
7949 dst = gen_reg_rtx (mode);
7952 /* When source operand is memory, destination must match. */
7953 if (MEM_P (src) && !matching_memory)
7954 src = force_reg (mode, src);
7956 /* If optimizing, copy to regs to improve CSE. */
7957 if (optimize && ! no_new_pseudos)
7959 if (GET_CODE (dst) == MEM)
7960 dst = gen_reg_rtx (mode);
7961 if (GET_CODE (src) == MEM)
7962 src = force_reg (mode, src);
7965 /* Emit the instruction. */
7967 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7968 if (reload_in_progress || code == NOT)
7970 /* Reload doesn't know about the flags register, and doesn't know that
7971 it doesn't want to clobber it. */
7978 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7979 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7982 /* Fix up the destination if needed. */
7983 if (dst != operands[0])
7984 emit_move_insn (operands[0], dst);
7987 /* Return TRUE or FALSE depending on whether the unary operator meets the
7988 appropriate constraints. */
7991 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7992 enum machine_mode mode ATTRIBUTE_UNUSED,
7993 rtx operands[2] ATTRIBUTE_UNUSED)
7995 /* If one of operands is memory, source and destination must match. */
7996 if ((GET_CODE (operands[0]) == MEM
7997 || GET_CODE (operands[1]) == MEM)
7998 && ! rtx_equal_p (operands[0], operands[1]))
8003 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8004 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8005 true, then replicate the mask for all elements of the vector register.
8006 If INVERT is true, then create a mask excluding the sign bit. */
8009 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8011 enum machine_mode vec_mode;
8012 HOST_WIDE_INT hi, lo;
8017 /* Find the sign bit, sign extended to 2*HWI. */
8019 lo = 0x80000000, hi = lo < 0;
8020 else if (HOST_BITS_PER_WIDE_INT >= 64)
8021 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8023 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8028 /* Force this value into the low part of a fp vector constant. */
8029 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8030 mask = gen_lowpart (mode, mask);
8035 v = gen_rtvec (4, mask, mask, mask, mask);
8037 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8038 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8039 vec_mode = V4SFmode;
8044 v = gen_rtvec (2, mask, mask);
8046 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8047 vec_mode = V2DFmode;
8050 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8053 /* Generate code for floating point ABS or NEG. */
8056 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8059 rtx mask, set, use, clob, dst, src;
8060 bool matching_memory;
8061 bool use_sse = false;
8062 bool vector_mode = VECTOR_MODE_P (mode);
8063 enum machine_mode elt_mode = mode;
8067 elt_mode = GET_MODE_INNER (mode);
8070 else if (TARGET_SSE_MATH)
8071 use_sse = SSE_REG_MODE_P (mode);
8073 /* NEG and ABS performed with SSE use bitwise mask operations.
8074 Create the appropriate mask now. */
8076 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8079 /* When not using SSE, we don't use the mask, but prefer to keep the
8080 same general form of the insn pattern to reduce duplication when
8081 it comes time to split. */
8088 /* If the destination is memory, and we don't have matching source
8089 operands, do things in registers. */
8090 matching_memory = false;
8093 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8094 matching_memory = true;
8096 dst = gen_reg_rtx (mode);
8098 if (MEM_P (src) && !matching_memory)
8099 src = force_reg (mode, src);
8103 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8104 set = gen_rtx_SET (VOIDmode, dst, set);
8109 set = gen_rtx_fmt_e (code, mode, src);
8110 set = gen_rtx_SET (VOIDmode, dst, set);
8111 use = gen_rtx_USE (VOIDmode, mask);
8112 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8113 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8116 if (dst != operands[0])
8117 emit_move_insn (operands[0], dst);
8120 /* Deconstruct a copysign operation into bit masks. */
8123 ix86_split_copysign (rtx operands[])
8125 enum machine_mode mode, vmode;
8126 rtx dest, scratch, op0, op1, mask, nmask, x;
8129 scratch = operands[1];
8131 nmask = operands[3];
8135 mode = GET_MODE (dest);
8136 vmode = GET_MODE (mask);
8138 if (rtx_equal_p (op0, op1))
8140 /* Shouldn't happen often (it's useless, obviously), but when it does
8141 we'd generate incorrect code if we continue below. */
8142 emit_move_insn (dest, op0);
8146 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8148 gcc_assert (REGNO (op1) == REGNO (scratch));
8150 x = gen_rtx_AND (vmode, scratch, mask);
8151 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8154 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8155 x = gen_rtx_NOT (vmode, dest);
8156 x = gen_rtx_AND (vmode, x, op0);
8157 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8161 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8163 x = gen_rtx_AND (vmode, scratch, mask);
8165 else /* alternative 2,4 */
8167 gcc_assert (REGNO (mask) == REGNO (scratch));
8168 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8169 x = gen_rtx_AND (vmode, scratch, op1);
8171 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8173 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8175 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8176 x = gen_rtx_AND (vmode, dest, nmask);
8178 else /* alternative 3,4 */
8180 gcc_assert (REGNO (nmask) == REGNO (dest));
8182 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8183 x = gen_rtx_AND (vmode, dest, op0);
8185 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8188 x = gen_rtx_IOR (vmode, dest, scratch);
8189 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8192 /* Return TRUE or FALSE depending on whether the first SET in INSN
8193 has source and destination with matching CC modes, and that the
8194 CC mode is at least as constrained as REQ_MODE. */
8197 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8200 enum machine_mode set_mode;
8202 set = PATTERN (insn);
8203 if (GET_CODE (set) == PARALLEL)
8204 set = XVECEXP (set, 0, 0);
8205 if (GET_CODE (set) != SET)
8207 if (GET_CODE (SET_SRC (set)) != COMPARE)
8210 set_mode = GET_MODE (SET_DEST (set));
8214 if (req_mode != CCNOmode
8215 && (req_mode != CCmode
8216 || XEXP (SET_SRC (set), 1) != const0_rtx))
8220 if (req_mode == CCGCmode)
8224 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8228 if (req_mode == CCZmode)
8238 return (GET_MODE (SET_SRC (set)) == set_mode);
8241 /* Generate insn patterns to do an integer compare of OPERANDS. */
8244 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8246 enum machine_mode cmpmode;
8249 cmpmode = SELECT_CC_MODE (code, op0, op1);
8250 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8252 /* This is very simple, but making the interface the same as in the
8253 FP case makes the rest of the code easier. */
8254 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8255 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8257 /* Return the test that should be put into the flags user, i.e.
8258 the bcc, scc, or cmov instruction. */
8259 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8262 /* Figure out whether to use ordered or unordered fp comparisons.
8263 Return the appropriate mode to use. */
8266 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8268 /* ??? In order to make all comparisons reversible, we do all comparisons
8269 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8270 all forms trapping and nontrapping comparisons, we can make inequality
8271 comparisons trapping again, since it results in better code when using
8272 FCOM based compares. */
8273 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8277 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8279 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8280 return ix86_fp_compare_mode (code);
8283 /* Only zero flag is needed. */
8285 case NE: /* ZF!=0 */
8287 /* Codes needing carry flag. */
8288 case GEU: /* CF=0 */
8289 case GTU: /* CF=0 & ZF=0 */
8290 case LTU: /* CF=1 */
8291 case LEU: /* CF=1 | ZF=1 */
8293 /* Codes possibly doable only with sign flag when
8294 comparing against zero. */
8295 case GE: /* SF=OF or SF=0 */
8296 case LT: /* SF<>OF or SF=1 */
8297 if (op1 == const0_rtx)
8300 /* For other cases Carry flag is not required. */
8302 /* Codes doable only with sign flag when comparing
8303 against zero, but we miss jump instruction for it
8304 so we need to use relational tests against overflow
8305 that thus needs to be zero. */
8306 case GT: /* ZF=0 & SF=OF */
8307 case LE: /* ZF=1 | SF<>OF */
8308 if (op1 == const0_rtx)
8312 /* strcmp pattern do (use flags) and combine may ask us for proper
8321 /* Return the fixed registers used for condition codes. */
8324 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8331 /* If two condition code modes are compatible, return a condition code
8332 mode which is compatible with both. Otherwise, return
8335 static enum machine_mode
8336 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8341 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8344 if ((m1 == CCGCmode && m2 == CCGOCmode)
8345 || (m1 == CCGOCmode && m2 == CCGCmode))
8373 /* These are only compatible with themselves, which we already
8379 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8382 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8384 enum rtx_code swapped_code = swap_condition (code);
8385 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8386 || (ix86_fp_comparison_cost (swapped_code)
8387 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8390 /* Swap, force into registers, or otherwise massage the two operands
8391 to a fp comparison. The operands are updated in place; the new
8392 comparison code is returned. */
8394 static enum rtx_code
8395 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8397 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8398 rtx op0 = *pop0, op1 = *pop1;
8399 enum machine_mode op_mode = GET_MODE (op0);
8400 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8402 /* All of the unordered compare instructions only work on registers.
8403 The same is true of the fcomi compare instructions. The same is
8404 true of the XFmode compare instructions if not comparing with
8405 zero (ftst insn is used in this case). */
8408 && (fpcmp_mode == CCFPUmode
8409 || (op_mode == XFmode
8410 && ! (standard_80387_constant_p (op0) == 1
8411 || standard_80387_constant_p (op1) == 1))
8412 || ix86_use_fcomi_compare (code)))
8414 op0 = force_reg (op_mode, op0);
8415 op1 = force_reg (op_mode, op1);
8419 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8420 things around if they appear profitable, otherwise force op0
8423 if (standard_80387_constant_p (op0) == 0
8424 || (GET_CODE (op0) == MEM
8425 && ! (standard_80387_constant_p (op1) == 0
8426 || GET_CODE (op1) == MEM)))
8429 tmp = op0, op0 = op1, op1 = tmp;
8430 code = swap_condition (code);
8433 if (GET_CODE (op0) != REG)
8434 op0 = force_reg (op_mode, op0);
8436 if (CONSTANT_P (op1))
8438 int tmp = standard_80387_constant_p (op1);
8440 op1 = validize_mem (force_const_mem (op_mode, op1));
8444 op1 = force_reg (op_mode, op1);
8447 op1 = force_reg (op_mode, op1);
8451 /* Try to rearrange the comparison to make it cheaper. */
8452 if (ix86_fp_comparison_cost (code)
8453 > ix86_fp_comparison_cost (swap_condition (code))
8454 && (GET_CODE (op1) == REG || !no_new_pseudos))
8457 tmp = op0, op0 = op1, op1 = tmp;
8458 code = swap_condition (code);
8459 if (GET_CODE (op0) != REG)
8460 op0 = force_reg (op_mode, op0);
8468 /* Convert comparison codes we use to represent FP comparison to integer
8469 code that will result in proper branch. Return UNKNOWN if no such code
8473 ix86_fp_compare_code_to_integer (enum rtx_code code)
8502 /* Split comparison code CODE into comparisons we can do using branch
8503 instructions. BYPASS_CODE is comparison code for branch that will
8504 branch around FIRST_CODE and SECOND_CODE. If some of branches
8505 is not required, set value to UNKNOWN.
8506 We never require more than two branches. */
8509 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8510 enum rtx_code *first_code,
8511 enum rtx_code *second_code)
8514 *bypass_code = UNKNOWN;
8515 *second_code = UNKNOWN;
8517 /* The fcomi comparison sets flags as follows:
8527 case GT: /* GTU - CF=0 & ZF=0 */
8528 case GE: /* GEU - CF=0 */
8529 case ORDERED: /* PF=0 */
8530 case UNORDERED: /* PF=1 */
8531 case UNEQ: /* EQ - ZF=1 */
8532 case UNLT: /* LTU - CF=1 */
8533 case UNLE: /* LEU - CF=1 | ZF=1 */
8534 case LTGT: /* EQ - ZF=0 */
8536 case LT: /* LTU - CF=1 - fails on unordered */
8538 *bypass_code = UNORDERED;
8540 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8542 *bypass_code = UNORDERED;
8544 case EQ: /* EQ - ZF=1 - fails on unordered */
8546 *bypass_code = UNORDERED;
8548 case NE: /* NE - ZF=0 - fails on unordered */
8550 *second_code = UNORDERED;
8552 case UNGE: /* GEU - CF=0 - fails on unordered */
8554 *second_code = UNORDERED;
8556 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8558 *second_code = UNORDERED;
8563 if (!TARGET_IEEE_FP)
8565 *second_code = UNKNOWN;
8566 *bypass_code = UNKNOWN;
8570 /* Return cost of comparison done fcom + arithmetics operations on AX.
8571 All following functions do use number of instructions as a cost metrics.
8572 In future this should be tweaked to compute bytes for optimize_size and
8573 take into account performance of various instructions on various CPUs. */
8575 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8577 if (!TARGET_IEEE_FP)
8579 /* The cost of code output by ix86_expand_fp_compare. */
8607 /* Return cost of comparison done using fcomi operation.
8608 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8610 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8612 enum rtx_code bypass_code, first_code, second_code;
8613 /* Return arbitrarily high cost when instruction is not supported - this
8614 prevents gcc from using it. */
8617 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8618 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8621 /* Return cost of comparison done using sahf operation.
8622 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8624 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8626 enum rtx_code bypass_code, first_code, second_code;
8627 /* Return arbitrarily high cost when instruction is not preferred - this
8628 avoids gcc from using it. */
8629 if (!TARGET_USE_SAHF && !optimize_size)
8631 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8632 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8635 /* Compute cost of the comparison done using any method.
8636 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8638 ix86_fp_comparison_cost (enum rtx_code code)
8640 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8643 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8644 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8646 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8647 if (min > sahf_cost)
8649 if (min > fcomi_cost)
8654 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8657 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8658 rtx *second_test, rtx *bypass_test)
8660 enum machine_mode fpcmp_mode, intcmp_mode;
8662 int cost = ix86_fp_comparison_cost (code);
8663 enum rtx_code bypass_code, first_code, second_code;
8665 fpcmp_mode = ix86_fp_compare_mode (code);
8666 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8669 *second_test = NULL_RTX;
8671 *bypass_test = NULL_RTX;
8673 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8675 /* Do fcomi/sahf based test when profitable. */
8676 if ((bypass_code == UNKNOWN || bypass_test)
8677 && (second_code == UNKNOWN || second_test)
8678 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8682 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8683 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8689 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8690 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8692 scratch = gen_reg_rtx (HImode);
8693 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8694 emit_insn (gen_x86_sahf_1 (scratch));
8697 /* The FP codes work out to act like unsigned. */
8698 intcmp_mode = fpcmp_mode;
8700 if (bypass_code != UNKNOWN)
8701 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8702 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8704 if (second_code != UNKNOWN)
8705 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8706 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8711 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8712 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8713 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8715 scratch = gen_reg_rtx (HImode);
8716 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8718 /* In the unordered case, we have to check C2 for NaN's, which
8719 doesn't happen to work out to anything nice combination-wise.
8720 So do some bit twiddling on the value we've got in AH to come
8721 up with an appropriate set of condition codes. */
8723 intcmp_mode = CCNOmode;
8728 if (code == GT || !TARGET_IEEE_FP)
8730 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8735 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8736 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8737 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8738 intcmp_mode = CCmode;
8744 if (code == LT && TARGET_IEEE_FP)
8746 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8747 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8748 intcmp_mode = CCmode;
8753 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8759 if (code == GE || !TARGET_IEEE_FP)
8761 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8766 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8767 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8774 if (code == LE && TARGET_IEEE_FP)
8776 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8777 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8778 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8779 intcmp_mode = CCmode;
8784 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8790 if (code == EQ && TARGET_IEEE_FP)
8792 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8793 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8794 intcmp_mode = CCmode;
8799 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8806 if (code == NE && TARGET_IEEE_FP)
8808 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8809 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8815 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8821 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8825 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8834 /* Return the test that should be put into the flags user, i.e.
8835 the bcc, scc, or cmov instruction. */
8836 return gen_rtx_fmt_ee (code, VOIDmode,
8837 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8842 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8845 op0 = ix86_compare_op0;
8846 op1 = ix86_compare_op1;
8849 *second_test = NULL_RTX;
8851 *bypass_test = NULL_RTX;
8853 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8854 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8855 second_test, bypass_test);
8857 ret = ix86_expand_int_compare (code, op0, op1);
8862 /* Return true if the CODE will result in nontrivial jump sequence. */
8864 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8866 enum rtx_code bypass_code, first_code, second_code;
8869 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8870 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8874 ix86_expand_branch (enum rtx_code code, rtx label)
8878 switch (GET_MODE (ix86_compare_op0))
8884 tmp = ix86_expand_compare (code, NULL, NULL);
8885 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8886 gen_rtx_LABEL_REF (VOIDmode, label),
8888 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8897 enum rtx_code bypass_code, first_code, second_code;
8899 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8902 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8904 /* Check whether we will use the natural sequence with one jump. If
8905 so, we can expand jump early. Otherwise delay expansion by
8906 creating compound insn to not confuse optimizers. */
8907 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8910 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8911 gen_rtx_LABEL_REF (VOIDmode, label),
8912 pc_rtx, NULL_RTX, NULL_RTX);
8916 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8917 ix86_compare_op0, ix86_compare_op1);
8918 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8919 gen_rtx_LABEL_REF (VOIDmode, label),
8921 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8923 use_fcomi = ix86_use_fcomi_compare (code);
8924 vec = rtvec_alloc (3 + !use_fcomi);
8925 RTVEC_ELT (vec, 0) = tmp;
8927 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8929 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8932 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8934 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8942 /* Expand DImode branch into multiple compare+branch. */
8944 rtx lo[2], hi[2], label2;
8945 enum rtx_code code1, code2, code3;
8947 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8949 tmp = ix86_compare_op0;
8950 ix86_compare_op0 = ix86_compare_op1;
8951 ix86_compare_op1 = tmp;
8952 code = swap_condition (code);
8954 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8955 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8957 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8958 avoid two branches. This costs one extra insn, so disable when
8959 optimizing for size. */
8961 if ((code == EQ || code == NE)
8963 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8968 if (hi[1] != const0_rtx)
8969 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8970 NULL_RTX, 0, OPTAB_WIDEN);
8973 if (lo[1] != const0_rtx)
8974 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8975 NULL_RTX, 0, OPTAB_WIDEN);
8977 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8978 NULL_RTX, 0, OPTAB_WIDEN);
8980 ix86_compare_op0 = tmp;
8981 ix86_compare_op1 = const0_rtx;
8982 ix86_expand_branch (code, label);
8986 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8987 op1 is a constant and the low word is zero, then we can just
8988 examine the high word. */
8990 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8993 case LT: case LTU: case GE: case GEU:
8994 ix86_compare_op0 = hi[0];
8995 ix86_compare_op1 = hi[1];
8996 ix86_expand_branch (code, label);
9002 /* Otherwise, we need two or three jumps. */
9004 label2 = gen_label_rtx ();
9007 code2 = swap_condition (code);
9008 code3 = unsigned_condition (code);
9012 case LT: case GT: case LTU: case GTU:
9015 case LE: code1 = LT; code2 = GT; break;
9016 case GE: code1 = GT; code2 = LT; break;
9017 case LEU: code1 = LTU; code2 = GTU; break;
9018 case GEU: code1 = GTU; code2 = LTU; break;
9020 case EQ: code1 = UNKNOWN; code2 = NE; break;
9021 case NE: code2 = UNKNOWN; break;
9029 * if (hi(a) < hi(b)) goto true;
9030 * if (hi(a) > hi(b)) goto false;
9031 * if (lo(a) < lo(b)) goto true;
9035 ix86_compare_op0 = hi[0];
9036 ix86_compare_op1 = hi[1];
9038 if (code1 != UNKNOWN)
9039 ix86_expand_branch (code1, label);
9040 if (code2 != UNKNOWN)
9041 ix86_expand_branch (code2, label2);
9043 ix86_compare_op0 = lo[0];
9044 ix86_compare_op1 = lo[1];
9045 ix86_expand_branch (code3, label);
9047 if (code2 != UNKNOWN)
9048 emit_label (label2);
9057 /* Split branch based on floating point condition. */
9059 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9060 rtx target1, rtx target2, rtx tmp, rtx pushed)
9063 rtx label = NULL_RTX;
9065 int bypass_probability = -1, second_probability = -1, probability = -1;
9068 if (target2 != pc_rtx)
9071 code = reverse_condition_maybe_unordered (code);
9076 condition = ix86_expand_fp_compare (code, op1, op2,
9077 tmp, &second, &bypass);
9079 /* Remove pushed operand from stack. */
9081 ix86_free_from_memory (GET_MODE (pushed));
9083 if (split_branch_probability >= 0)
9085 /* Distribute the probabilities across the jumps.
9086 Assume the BYPASS and SECOND to be always test
9088 probability = split_branch_probability;
9090 /* Value of 1 is low enough to make no need for probability
9091 to be updated. Later we may run some experiments and see
9092 if unordered values are more frequent in practice. */
9094 bypass_probability = 1;
9096 second_probability = 1;
9098 if (bypass != NULL_RTX)
9100 label = gen_label_rtx ();
9101 i = emit_jump_insn (gen_rtx_SET
9103 gen_rtx_IF_THEN_ELSE (VOIDmode,
9105 gen_rtx_LABEL_REF (VOIDmode,
9108 if (bypass_probability >= 0)
9110 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9111 GEN_INT (bypass_probability),
9114 i = emit_jump_insn (gen_rtx_SET
9116 gen_rtx_IF_THEN_ELSE (VOIDmode,
9117 condition, target1, target2)));
9118 if (probability >= 0)
9120 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9121 GEN_INT (probability),
9123 if (second != NULL_RTX)
9125 i = emit_jump_insn (gen_rtx_SET
9127 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9129 if (second_probability >= 0)
9131 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9132 GEN_INT (second_probability),
9135 if (label != NULL_RTX)
9140 ix86_expand_setcc (enum rtx_code code, rtx dest)
9142 rtx ret, tmp, tmpreg, equiv;
9143 rtx second_test, bypass_test;
9145 if (GET_MODE (ix86_compare_op0) == DImode
9147 return 0; /* FAIL */
9149 if (GET_MODE (dest) != QImode)
9152 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9153 PUT_MODE (ret, QImode);
9158 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9159 if (bypass_test || second_test)
9161 rtx test = second_test;
9163 rtx tmp2 = gen_reg_rtx (QImode);
9170 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9172 PUT_MODE (test, QImode);
9173 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9176 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9178 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9181 /* Attach a REG_EQUAL note describing the comparison result. */
9182 equiv = simplify_gen_relational (code, QImode,
9183 GET_MODE (ix86_compare_op0),
9184 ix86_compare_op0, ix86_compare_op1);
9185 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9187 return 1; /* DONE */
9190 /* Expand comparison setting or clearing carry flag. Return true when
9191 successful and set pop for the operation. */
9193 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9195 enum machine_mode mode =
9196 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9198 /* Do not handle DImode compares that go trought special path. Also we can't
9199 deal with FP compares yet. This is possible to add. */
9200 if ((mode == DImode && !TARGET_64BIT))
9202 if (FLOAT_MODE_P (mode))
9204 rtx second_test = NULL, bypass_test = NULL;
9205 rtx compare_op, compare_seq;
9207 /* Shortcut: following common codes never translate into carry flag compares. */
9208 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9209 || code == ORDERED || code == UNORDERED)
9212 /* These comparisons require zero flag; swap operands so they won't. */
9213 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9219 code = swap_condition (code);
9222 /* Try to expand the comparison and verify that we end up with carry flag
9223 based comparison. This is fails to be true only when we decide to expand
9224 comparison using arithmetic that is not too common scenario. */
9226 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9227 &second_test, &bypass_test);
9228 compare_seq = get_insns ();
9231 if (second_test || bypass_test)
9233 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9234 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9235 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9237 code = GET_CODE (compare_op);
9238 if (code != LTU && code != GEU)
9240 emit_insn (compare_seq);
9244 if (!INTEGRAL_MODE_P (mode))
9252 /* Convert a==0 into (unsigned)a<1. */
9255 if (op1 != const0_rtx)
9258 code = (code == EQ ? LTU : GEU);
9261 /* Convert a>b into b<a or a>=b-1. */
9264 if (GET_CODE (op1) == CONST_INT)
9266 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9267 /* Bail out on overflow. We still can swap operands but that
9268 would force loading of the constant into register. */
9269 if (op1 == const0_rtx
9270 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9272 code = (code == GTU ? GEU : LTU);
9279 code = (code == GTU ? LTU : GEU);
9283 /* Convert a>=0 into (unsigned)a<0x80000000. */
9286 if (mode == DImode || op1 != const0_rtx)
9288 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9289 code = (code == LT ? GEU : LTU);
9293 if (mode == DImode || op1 != constm1_rtx)
9295 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9296 code = (code == LE ? GEU : LTU);
9302 /* Swapping operands may cause constant to appear as first operand. */
9303 if (!nonimmediate_operand (op0, VOIDmode))
9307 op0 = force_reg (mode, op0);
9309 ix86_compare_op0 = op0;
9310 ix86_compare_op1 = op1;
9311 *pop = ix86_expand_compare (code, NULL, NULL);
9312 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9318 ix86_expand_int_movcc (rtx operands[])
9320 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9321 rtx compare_seq, compare_op;
9322 rtx second_test, bypass_test;
9323 enum machine_mode mode = GET_MODE (operands[0]);
9324 bool sign_bit_compare_p = false;;
9327 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9328 compare_seq = get_insns ();
9331 compare_code = GET_CODE (compare_op);
9333 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9334 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9335 sign_bit_compare_p = true;
9337 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9338 HImode insns, we'd be swallowed in word prefix ops. */
9340 if ((mode != HImode || TARGET_FAST_PREFIX)
9341 && (mode != DImode || TARGET_64BIT)
9342 && GET_CODE (operands[2]) == CONST_INT
9343 && GET_CODE (operands[3]) == CONST_INT)
9345 rtx out = operands[0];
9346 HOST_WIDE_INT ct = INTVAL (operands[2]);
9347 HOST_WIDE_INT cf = INTVAL (operands[3]);
9351 /* Sign bit compares are better done using shifts than we do by using
9353 if (sign_bit_compare_p
9354 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9355 ix86_compare_op1, &compare_op))
9357 /* Detect overlap between destination and compare sources. */
9360 if (!sign_bit_compare_p)
9364 compare_code = GET_CODE (compare_op);
9366 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9367 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9370 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9373 /* To simplify rest of code, restrict to the GEU case. */
9374 if (compare_code == LTU)
9376 HOST_WIDE_INT tmp = ct;
9379 compare_code = reverse_condition (compare_code);
9380 code = reverse_condition (code);
9385 PUT_CODE (compare_op,
9386 reverse_condition_maybe_unordered
9387 (GET_CODE (compare_op)));
9389 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9393 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9394 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9395 tmp = gen_reg_rtx (mode);
9398 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9400 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9404 if (code == GT || code == GE)
9405 code = reverse_condition (code);
9408 HOST_WIDE_INT tmp = ct;
9413 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9414 ix86_compare_op1, VOIDmode, 0, -1);
9427 tmp = expand_simple_binop (mode, PLUS,
9429 copy_rtx (tmp), 1, OPTAB_DIRECT);
9440 tmp = expand_simple_binop (mode, IOR,
9442 copy_rtx (tmp), 1, OPTAB_DIRECT);
9444 else if (diff == -1 && ct)
9454 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9456 tmp = expand_simple_binop (mode, PLUS,
9457 copy_rtx (tmp), GEN_INT (cf),
9458 copy_rtx (tmp), 1, OPTAB_DIRECT);
9466 * andl cf - ct, dest
9476 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9479 tmp = expand_simple_binop (mode, AND,
9481 gen_int_mode (cf - ct, mode),
9482 copy_rtx (tmp), 1, OPTAB_DIRECT);
9484 tmp = expand_simple_binop (mode, PLUS,
9485 copy_rtx (tmp), GEN_INT (ct),
9486 copy_rtx (tmp), 1, OPTAB_DIRECT);
9489 if (!rtx_equal_p (tmp, out))
9490 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9492 return 1; /* DONE */
9498 tmp = ct, ct = cf, cf = tmp;
9500 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9502 /* We may be reversing unordered compare to normal compare, that
9503 is not valid in general (we may convert non-trapping condition
9504 to trapping one), however on i386 we currently emit all
9505 comparisons unordered. */
9506 compare_code = reverse_condition_maybe_unordered (compare_code);
9507 code = reverse_condition_maybe_unordered (code);
9511 compare_code = reverse_condition (compare_code);
9512 code = reverse_condition (code);
9516 compare_code = UNKNOWN;
9517 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9518 && GET_CODE (ix86_compare_op1) == CONST_INT)
9520 if (ix86_compare_op1 == const0_rtx
9521 && (code == LT || code == GE))
9522 compare_code = code;
9523 else if (ix86_compare_op1 == constm1_rtx)
9527 else if (code == GT)
9532 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9533 if (compare_code != UNKNOWN
9534 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9535 && (cf == -1 || ct == -1))
9537 /* If lea code below could be used, only optimize
9538 if it results in a 2 insn sequence. */
9540 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9541 || diff == 3 || diff == 5 || diff == 9)
9542 || (compare_code == LT && ct == -1)
9543 || (compare_code == GE && cf == -1))
9546 * notl op1 (if necessary)
9554 code = reverse_condition (code);
9557 out = emit_store_flag (out, code, ix86_compare_op0,
9558 ix86_compare_op1, VOIDmode, 0, -1);
9560 out = expand_simple_binop (mode, IOR,
9562 out, 1, OPTAB_DIRECT);
9563 if (out != operands[0])
9564 emit_move_insn (operands[0], out);
9566 return 1; /* DONE */
9571 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9572 || diff == 3 || diff == 5 || diff == 9)
9573 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9575 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9581 * lea cf(dest*(ct-cf)),dest
9585 * This also catches the degenerate setcc-only case.
9591 out = emit_store_flag (out, code, ix86_compare_op0,
9592 ix86_compare_op1, VOIDmode, 0, 1);
9595 /* On x86_64 the lea instruction operates on Pmode, so we need
9596 to get arithmetics done in proper mode to match. */
9598 tmp = copy_rtx (out);
9602 out1 = copy_rtx (out);
9603 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9607 tmp = gen_rtx_PLUS (mode, tmp, out1);
9613 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9616 if (!rtx_equal_p (tmp, out))
9619 out = force_operand (tmp, copy_rtx (out));
9621 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9623 if (!rtx_equal_p (out, operands[0]))
9624 emit_move_insn (operands[0], copy_rtx (out));
9626 return 1; /* DONE */
9630 * General case: Jumpful:
9631 * xorl dest,dest cmpl op1, op2
9632 * cmpl op1, op2 movl ct, dest
9634 * decl dest movl cf, dest
9635 * andl (cf-ct),dest 1:
9640 * This is reasonably steep, but branch mispredict costs are
9641 * high on modern cpus, so consider failing only if optimizing
9645 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9646 && BRANCH_COST >= 2)
9652 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9653 /* We may be reversing unordered compare to normal compare,
9654 that is not valid in general (we may convert non-trapping
9655 condition to trapping one), however on i386 we currently
9656 emit all comparisons unordered. */
9657 code = reverse_condition_maybe_unordered (code);
9660 code = reverse_condition (code);
9661 if (compare_code != UNKNOWN)
9662 compare_code = reverse_condition (compare_code);
9666 if (compare_code != UNKNOWN)
9668 /* notl op1 (if needed)
9673 For x < 0 (resp. x <= -1) there will be no notl,
9674 so if possible swap the constants to get rid of the
9676 True/false will be -1/0 while code below (store flag
9677 followed by decrement) is 0/-1, so the constants need
9678 to be exchanged once more. */
9680 if (compare_code == GE || !cf)
9682 code = reverse_condition (code);
9687 HOST_WIDE_INT tmp = cf;
9692 out = emit_store_flag (out, code, ix86_compare_op0,
9693 ix86_compare_op1, VOIDmode, 0, -1);
9697 out = emit_store_flag (out, code, ix86_compare_op0,
9698 ix86_compare_op1, VOIDmode, 0, 1);
9700 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9701 copy_rtx (out), 1, OPTAB_DIRECT);
9704 out = expand_simple_binop (mode, AND, copy_rtx (out),
9705 gen_int_mode (cf - ct, mode),
9706 copy_rtx (out), 1, OPTAB_DIRECT);
9708 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9709 copy_rtx (out), 1, OPTAB_DIRECT);
9710 if (!rtx_equal_p (out, operands[0]))
9711 emit_move_insn (operands[0], copy_rtx (out));
9713 return 1; /* DONE */
9717 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9719 /* Try a few things more with specific constants and a variable. */
9722 rtx var, orig_out, out, tmp;
9724 if (BRANCH_COST <= 2)
9725 return 0; /* FAIL */
9727 /* If one of the two operands is an interesting constant, load a
9728 constant with the above and mask it in with a logical operation. */
9730 if (GET_CODE (operands[2]) == CONST_INT)
9733 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9734 operands[3] = constm1_rtx, op = and_optab;
9735 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9736 operands[3] = const0_rtx, op = ior_optab;
9738 return 0; /* FAIL */
9740 else if (GET_CODE (operands[3]) == CONST_INT)
9743 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9744 operands[2] = constm1_rtx, op = and_optab;
9745 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9746 operands[2] = const0_rtx, op = ior_optab;
9748 return 0; /* FAIL */
9751 return 0; /* FAIL */
9753 orig_out = operands[0];
9754 tmp = gen_reg_rtx (mode);
9757 /* Recurse to get the constant loaded. */
9758 if (ix86_expand_int_movcc (operands) == 0)
9759 return 0; /* FAIL */
9761 /* Mask in the interesting variable. */
9762 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9764 if (!rtx_equal_p (out, orig_out))
9765 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9767 return 1; /* DONE */
9771 * For comparison with above,
9781 if (! nonimmediate_operand (operands[2], mode))
9782 operands[2] = force_reg (mode, operands[2]);
9783 if (! nonimmediate_operand (operands[3], mode))
9784 operands[3] = force_reg (mode, operands[3]);
9786 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9788 rtx tmp = gen_reg_rtx (mode);
9789 emit_move_insn (tmp, operands[3]);
9792 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9794 rtx tmp = gen_reg_rtx (mode);
9795 emit_move_insn (tmp, operands[2]);
9799 if (! register_operand (operands[2], VOIDmode)
9801 || ! register_operand (operands[3], VOIDmode)))
9802 operands[2] = force_reg (mode, operands[2]);
9805 && ! register_operand (operands[3], VOIDmode))
9806 operands[3] = force_reg (mode, operands[3]);
9808 emit_insn (compare_seq);
9809 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9810 gen_rtx_IF_THEN_ELSE (mode,
9811 compare_op, operands[2],
9814 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9815 gen_rtx_IF_THEN_ELSE (mode,
9817 copy_rtx (operands[3]),
9818 copy_rtx (operands[0]))));
9820 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9821 gen_rtx_IF_THEN_ELSE (mode,
9823 copy_rtx (operands[2]),
9824 copy_rtx (operands[0]))));
9826 return 1; /* DONE */
9830 ix86_expand_fp_movcc (rtx operands[])
9832 enum machine_mode mode = GET_MODE (operands[0]);
9833 enum rtx_code code = GET_CODE (operands[1]);
9834 rtx tmp, compare_op, second_test, bypass_test;
9836 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
9838 rtx cmp_op0, cmp_op1, if_true, if_false;
9840 enum machine_mode vmode, cmode;
9841 bool is_minmax = false;
9843 cmp_op0 = ix86_compare_op0;
9844 cmp_op1 = ix86_compare_op1;
9845 if_true = operands[2];
9846 if_false = operands[3];
9848 /* Since we've no cmove for sse registers, don't force bad register
9849 allocation just to gain access to it. Deny movcc when the
9850 comparison mode doesn't match the move mode. */
9851 cmode = GET_MODE (cmp_op0);
9852 if (cmode == VOIDmode)
9853 cmode = GET_MODE (cmp_op1);
9857 /* We have no LTGT as an operator. We could implement it with
9858 NE & ORDERED, but this requires an extra temporary. It's
9859 not clear that it's worth it. */
9860 if (code == LTGT || code == UNEQ)
9863 /* Massage condition to satisfy sse_comparison_operator. Try
9864 to canonicalize the destination operand to be first in the
9865 comparison - this helps reload to avoid extra moves. */
9866 if (!sse_comparison_operator (operands[1], VOIDmode)
9867 || (COMMUTATIVE_P (operands[1])
9868 && rtx_equal_p (operands[0], cmp_op1)))
9873 code = swap_condition (code);
9876 /* Detect conditional moves that exactly match min/max operational
9877 semantics. Note that this is IEEE safe, as long as we don't
9878 interchange the operands. Which is why we keep this in the form
9879 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
9880 if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
9882 if (((cmp_op0 == if_true && cmp_op1 == if_false)
9883 || (cmp_op0 == if_false && cmp_op1 == if_true)))
9898 else if (mode == DFmode)
9903 cmp_op0 = force_reg (mode, cmp_op0);
9904 if (!nonimmediate_operand (cmp_op1, mode))
9905 cmp_op1 = force_reg (mode, cmp_op1);
9907 tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
9908 gcc_assert (sse_comparison_operator (tmp, VOIDmode));
9910 tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
9911 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
9915 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
9916 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9923 /* The floating point conditional move instructions don't directly
9924 support conditions resulting from a signed integer comparison. */
9926 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9928 /* The floating point conditional move instructions don't directly
9929 support signed integer comparisons. */
9931 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9933 if (second_test != NULL || bypass_test != NULL)
9935 tmp = gen_reg_rtx (QImode);
9936 ix86_expand_setcc (code, tmp);
9938 ix86_compare_op0 = tmp;
9939 ix86_compare_op1 = const0_rtx;
9940 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9942 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9944 tmp = gen_reg_rtx (mode);
9945 emit_move_insn (tmp, operands[3]);
9948 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9950 tmp = gen_reg_rtx (mode);
9951 emit_move_insn (tmp, operands[2]);
9955 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9956 gen_rtx_IF_THEN_ELSE (mode, compare_op,
9957 operands[2], operands[3])));
9959 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9960 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
9961 operands[3], operands[0])));
9963 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9964 gen_rtx_IF_THEN_ELSE (mode, second_test,
9965 operands[2], operands[0])));
9971 ix86_split_sse_movcc (rtx operands[])
9973 rtx dest, scratch, cmp, op_true, op_false, x;
9974 enum machine_mode mode, vmode;
9976 /* Note that the operator CMP has been set up with matching constraints
9977 such that dest is valid for the comparison. Unless one of the true
9978 or false operands are zero, the true operand has already been placed
9981 scratch = operands[1];
9982 op_true = operands[2];
9983 op_false = operands[3];
9986 mode = GET_MODE (dest);
9987 vmode = GET_MODE (scratch);
9989 /* We need to make sure that the TRUE and FALSE operands are out of the
9990 way of the destination. Marking the destination earlyclobber doesn't
9991 work, since we want matching constraints for the actual comparison, so
9992 at some point we always wind up having to do a copy ourselves here.
9993 We very much prefer the TRUE value to be in SCRATCH. If it turns out
9994 that FALSE overlaps DEST, then we invert the comparison so that we
9995 still only have to do one move. */
9996 if (rtx_equal_p (op_false, dest))
10000 if (rtx_equal_p (op_true, dest))
10002 /* ??? Really ought not happen. It means some optimizer managed
10003 to prove the operands were identical, but failed to fold the
10004 conditional move to a straight move. Do so here, because
10005 otherwise we'll generate incorrect code. And since they're
10006 both already in the destination register, nothing to do. */
10010 x = gen_rtx_REG (mode, REGNO (scratch));
10011 emit_move_insn (x, op_false);
10012 op_false = op_true;
10015 code = GET_CODE (cmp);
10016 code = reverse_condition_maybe_unordered (code);
10017 cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
10019 else if (op_true == CONST0_RTX (mode))
10021 else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
10025 x = gen_rtx_REG (mode, REGNO (scratch));
10026 emit_move_insn (x, op_true);
10030 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
10031 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10033 if (op_false == CONST0_RTX (mode))
10035 op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
10036 x = gen_rtx_AND (vmode, dest, op_true);
10037 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10041 op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
10043 if (op_true == CONST0_RTX (mode))
10045 x = gen_rtx_NOT (vmode, dest);
10046 x = gen_rtx_AND (vmode, x, op_false);
10047 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10051 x = gen_rtx_AND (vmode, scratch, dest);
10052 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10054 x = gen_rtx_NOT (vmode, dest);
10055 x = gen_rtx_AND (vmode, x, op_false);
10056 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10058 x = gen_rtx_IOR (vmode, dest, scratch);
10059 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10064 /* Expand conditional increment or decrement using adb/sbb instructions.
10065 The default case using setcc followed by the conditional move can be
10066 done by generic code. */
10068 ix86_expand_int_addcc (rtx operands[])
10070 enum rtx_code code = GET_CODE (operands[1]);
10072 rtx val = const0_rtx;
10073 bool fpcmp = false;
10074 enum machine_mode mode = GET_MODE (operands[0]);
10076 if (operands[3] != const1_rtx
10077 && operands[3] != constm1_rtx)
10079 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10080 ix86_compare_op1, &compare_op))
10082 code = GET_CODE (compare_op);
10084 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10085 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10088 code = ix86_fp_compare_code_to_integer (code);
10095 PUT_CODE (compare_op,
10096 reverse_condition_maybe_unordered
10097 (GET_CODE (compare_op)));
10099 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10101 PUT_MODE (compare_op, mode);
10103 /* Construct either adc or sbb insn. */
10104 if ((code == LTU) == (operands[3] == constm1_rtx))
10106 switch (GET_MODE (operands[0]))
10109 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10112 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10115 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10118 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10126 switch (GET_MODE (operands[0]))
10129 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10132 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10135 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10138 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10144 return 1; /* DONE */
10148 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10149 works for floating pointer parameters and nonoffsetable memories.
10150 For pushes, it returns just stack offsets; the values will be saved
10151 in the right order. Maximally three parts are generated. */
10154 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10159 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10161 size = (GET_MODE_SIZE (mode) + 4) / 8;
10163 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10165 if (size < 2 || size > 3)
10168 /* Optimize constant pool reference to immediates. This is used by fp
10169 moves, that force all constants to memory to allow combining. */
10170 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10172 rtx tmp = maybe_get_pool_constant (operand);
10177 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10179 /* The only non-offsetable memories we handle are pushes. */
10180 if (! push_operand (operand, VOIDmode))
10183 operand = copy_rtx (operand);
10184 PUT_MODE (operand, Pmode);
10185 parts[0] = parts[1] = parts[2] = operand;
10187 else if (!TARGET_64BIT)
10189 if (mode == DImode)
10190 split_di (&operand, 1, &parts[0], &parts[1]);
10193 if (REG_P (operand))
10195 if (!reload_completed)
10197 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10198 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10200 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10202 else if (offsettable_memref_p (operand))
10204 operand = adjust_address (operand, SImode, 0);
10205 parts[0] = operand;
10206 parts[1] = adjust_address (operand, SImode, 4);
10208 parts[2] = adjust_address (operand, SImode, 8);
10210 else if (GET_CODE (operand) == CONST_DOUBLE)
10215 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10219 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10220 parts[2] = gen_int_mode (l[2], SImode);
10223 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10228 parts[1] = gen_int_mode (l[1], SImode);
10229 parts[0] = gen_int_mode (l[0], SImode);
10237 if (mode == TImode)
10238 split_ti (&operand, 1, &parts[0], &parts[1]);
10239 if (mode == XFmode || mode == TFmode)
10241 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10242 if (REG_P (operand))
10244 if (!reload_completed)
10246 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10247 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10249 else if (offsettable_memref_p (operand))
10251 operand = adjust_address (operand, DImode, 0);
10252 parts[0] = operand;
10253 parts[1] = adjust_address (operand, upper_mode, 8);
10255 else if (GET_CODE (operand) == CONST_DOUBLE)
10260 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10261 real_to_target (l, &r, mode);
10263 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10264 if (HOST_BITS_PER_WIDE_INT >= 64)
10267 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10268 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10271 parts[0] = immed_double_const (l[0], l[1], DImode);
10273 if (upper_mode == SImode)
10274 parts[1] = gen_int_mode (l[2], SImode);
10275 else if (HOST_BITS_PER_WIDE_INT >= 64)
10278 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10279 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10282 parts[1] = immed_double_const (l[2], l[3], DImode);
10292 /* Emit insns to perform a move or push of DI, DF, and XF values.
10293 Return false when normal moves are needed; true when all required
10294 insns have been emitted. Operands 2-4 contain the input values
10295 int the correct order; operands 5-7 contain the output values. */
10298 ix86_split_long_move (rtx operands[])
10303 int collisions = 0;
10304 enum machine_mode mode = GET_MODE (operands[0]);
10306 /* The DFmode expanders may ask us to move double.
10307 For 64bit target this is single move. By hiding the fact
10308 here we simplify i386.md splitters. */
10309 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10311 /* Optimize constant pool reference to immediates. This is used by
10312 fp moves, that force all constants to memory to allow combining. */
10314 if (GET_CODE (operands[1]) == MEM
10315 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10316 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10317 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10318 if (push_operand (operands[0], VOIDmode))
10320 operands[0] = copy_rtx (operands[0]);
10321 PUT_MODE (operands[0], Pmode);
10324 operands[0] = gen_lowpart (DImode, operands[0]);
10325 operands[1] = gen_lowpart (DImode, operands[1]);
10326 emit_move_insn (operands[0], operands[1]);
10330 /* The only non-offsettable memory we handle is push. */
10331 if (push_operand (operands[0], VOIDmode))
10333 else if (GET_CODE (operands[0]) == MEM
10334 && ! offsettable_memref_p (operands[0]))
10337 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10338 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10340 /* When emitting push, take care for source operands on the stack. */
10341 if (push && GET_CODE (operands[1]) == MEM
10342 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10345 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10346 XEXP (part[1][2], 0));
10347 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10348 XEXP (part[1][1], 0));
10351 /* We need to do copy in the right order in case an address register
10352 of the source overlaps the destination. */
10353 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10355 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10357 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10360 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10363 /* Collision in the middle part can be handled by reordering. */
10364 if (collisions == 1 && nparts == 3
10365 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10368 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10369 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10372 /* If there are more collisions, we can't handle it by reordering.
10373 Do an lea to the last part and use only one colliding move. */
10374 else if (collisions > 1)
10380 base = part[0][nparts - 1];
10382 /* Handle the case when the last part isn't valid for lea.
10383 Happens in 64-bit mode storing the 12-byte XFmode. */
10384 if (GET_MODE (base) != Pmode)
10385 base = gen_rtx_REG (Pmode, REGNO (base));
10387 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10388 part[1][0] = replace_equiv_address (part[1][0], base);
10389 part[1][1] = replace_equiv_address (part[1][1],
10390 plus_constant (base, UNITS_PER_WORD));
10392 part[1][2] = replace_equiv_address (part[1][2],
10393 plus_constant (base, 8));
10403 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10404 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10405 emit_move_insn (part[0][2], part[1][2]);
10410 /* In 64bit mode we don't have 32bit push available. In case this is
10411 register, it is OK - we will just use larger counterpart. We also
10412 retype memory - these comes from attempt to avoid REX prefix on
10413 moving of second half of TFmode value. */
10414 if (GET_MODE (part[1][1]) == SImode)
10416 if (GET_CODE (part[1][1]) == MEM)
10417 part[1][1] = adjust_address (part[1][1], DImode, 0);
10418 else if (REG_P (part[1][1]))
10419 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10422 if (GET_MODE (part[1][0]) == SImode)
10423 part[1][0] = part[1][1];
10426 emit_move_insn (part[0][1], part[1][1]);
10427 emit_move_insn (part[0][0], part[1][0]);
10431 /* Choose correct order to not overwrite the source before it is copied. */
10432 if ((REG_P (part[0][0])
10433 && REG_P (part[1][1])
10434 && (REGNO (part[0][0]) == REGNO (part[1][1])
10436 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10438 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10442 operands[2] = part[0][2];
10443 operands[3] = part[0][1];
10444 operands[4] = part[0][0];
10445 operands[5] = part[1][2];
10446 operands[6] = part[1][1];
10447 operands[7] = part[1][0];
10451 operands[2] = part[0][1];
10452 operands[3] = part[0][0];
10453 operands[5] = part[1][1];
10454 operands[6] = part[1][0];
10461 operands[2] = part[0][0];
10462 operands[3] = part[0][1];
10463 operands[4] = part[0][2];
10464 operands[5] = part[1][0];
10465 operands[6] = part[1][1];
10466 operands[7] = part[1][2];
10470 operands[2] = part[0][0];
10471 operands[3] = part[0][1];
10472 operands[5] = part[1][0];
10473 operands[6] = part[1][1];
10477 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10480 if (GET_CODE (operands[5]) == CONST_INT
10481 && operands[5] != const0_rtx
10482 && REG_P (operands[2]))
10484 if (GET_CODE (operands[6]) == CONST_INT
10485 && INTVAL (operands[6]) == INTVAL (operands[5]))
10486 operands[6] = operands[2];
10489 && GET_CODE (operands[7]) == CONST_INT
10490 && INTVAL (operands[7]) == INTVAL (operands[5]))
10491 operands[7] = operands[2];
10495 && GET_CODE (operands[6]) == CONST_INT
10496 && operands[6] != const0_rtx
10497 && REG_P (operands[3])
10498 && GET_CODE (operands[7]) == CONST_INT
10499 && INTVAL (operands[7]) == INTVAL (operands[6]))
10500 operands[7] = operands[3];
10503 emit_move_insn (operands[2], operands[5]);
10504 emit_move_insn (operands[3], operands[6]);
10506 emit_move_insn (operands[4], operands[7]);
10511 /* Helper function of ix86_split_ashldi used to generate an SImode
10512 left shift by a constant, either using a single shift or
10513 a sequence of add instructions. */
10516 ix86_expand_ashlsi3_const (rtx operand, int count)
10519 emit_insn (gen_addsi3 (operand, operand, operand));
10520 else if (!optimize_size
10521 && count * ix86_cost->add <= ix86_cost->shift_const)
10524 for (i=0; i<count; i++)
10525 emit_insn (gen_addsi3 (operand, operand, operand));
10528 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10532 ix86_split_ashldi (rtx *operands, rtx scratch)
10534 rtx low[2], high[2];
10537 if (GET_CODE (operands[2]) == CONST_INT)
10539 split_di (operands, 2, low, high);
10540 count = INTVAL (operands[2]) & 63;
10544 emit_move_insn (high[0], low[1]);
10545 emit_move_insn (low[0], const0_rtx);
10548 ix86_expand_ashlsi3_const (high[0], count - 32);
10552 if (!rtx_equal_p (operands[0], operands[1]))
10553 emit_move_insn (operands[0], operands[1]);
10554 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10555 ix86_expand_ashlsi3_const (low[0], count);
10560 split_di (operands, 1, low, high);
10562 if (operands[1] == const1_rtx)
10564 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10565 can be done with two 32-bit shifts, no branches, no cmoves. */
10566 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10568 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10570 ix86_expand_clear (low[0]);
10571 ix86_expand_clear (high[0]);
10572 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10574 d = gen_lowpart (QImode, low[0]);
10575 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10576 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10577 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10579 d = gen_lowpart (QImode, high[0]);
10580 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10581 s = gen_rtx_NE (QImode, flags, const0_rtx);
10582 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10585 /* Otherwise, we can get the same results by manually performing
10586 a bit extract operation on bit 5, and then performing the two
10587 shifts. The two methods of getting 0/1 into low/high are exactly
10588 the same size. Avoiding the shift in the bit extract case helps
10589 pentium4 a bit; no one else seems to care much either way. */
10594 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10595 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10597 x = gen_lowpart (SImode, operands[2]);
10598 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10600 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10601 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10602 emit_move_insn (low[0], high[0]);
10603 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10606 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10607 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10611 if (operands[1] == constm1_rtx)
10613 /* For -1LL << N, we can avoid the shld instruction, because we
10614 know that we're shifting 0...31 ones into a -1. */
10615 emit_move_insn (low[0], constm1_rtx);
10617 emit_move_insn (high[0], low[0]);
10619 emit_move_insn (high[0], constm1_rtx);
10623 if (!rtx_equal_p (operands[0], operands[1]))
10624 emit_move_insn (operands[0], operands[1]);
10626 split_di (operands, 1, low, high);
10627 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10630 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10632 if (TARGET_CMOVE && scratch)
10634 ix86_expand_clear (scratch);
10635 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10638 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10642 ix86_split_ashrdi (rtx *operands, rtx scratch)
10644 rtx low[2], high[2];
10647 if (GET_CODE (operands[2]) == CONST_INT)
10649 split_di (operands, 2, low, high);
10650 count = INTVAL (operands[2]) & 63;
10654 emit_move_insn (high[0], high[1]);
10655 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10656 emit_move_insn (low[0], high[0]);
10659 else if (count >= 32)
10661 emit_move_insn (low[0], high[1]);
10662 emit_move_insn (high[0], low[0]);
10663 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10665 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10669 if (!rtx_equal_p (operands[0], operands[1]))
10670 emit_move_insn (operands[0], operands[1]);
10671 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10672 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10677 if (!rtx_equal_p (operands[0], operands[1]))
10678 emit_move_insn (operands[0], operands[1]);
10680 split_di (operands, 1, low, high);
10682 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10683 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10685 if (TARGET_CMOVE && scratch)
10687 emit_move_insn (scratch, high[0]);
10688 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10689 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10693 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10698 ix86_split_lshrdi (rtx *operands, rtx scratch)
10700 rtx low[2], high[2];
10703 if (GET_CODE (operands[2]) == CONST_INT)
10705 split_di (operands, 2, low, high);
10706 count = INTVAL (operands[2]) & 63;
10710 emit_move_insn (low[0], high[1]);
10711 ix86_expand_clear (high[0]);
10714 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10718 if (!rtx_equal_p (operands[0], operands[1]))
10719 emit_move_insn (operands[0], operands[1]);
10720 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10721 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10726 if (!rtx_equal_p (operands[0], operands[1]))
10727 emit_move_insn (operands[0], operands[1]);
10729 split_di (operands, 1, low, high);
10731 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10732 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10734 /* Heh. By reversing the arguments, we can reuse this pattern. */
10735 if (TARGET_CMOVE && scratch)
10737 ix86_expand_clear (scratch);
10738 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10742 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10746 /* Helper function for the string operations below. Dest VARIABLE whether
10747 it is aligned to VALUE bytes. If true, jump to the label. */
10749 ix86_expand_aligntest (rtx variable, int value)
10751 rtx label = gen_label_rtx ();
10752 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10753 if (GET_MODE (variable) == DImode)
10754 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10756 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10757 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10762 /* Adjust COUNTER by the VALUE. */
10764 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10766 if (GET_MODE (countreg) == DImode)
10767 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10769 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10772 /* Zero extend possibly SImode EXP to Pmode register. */
10774 ix86_zero_extend_to_Pmode (rtx exp)
10777 if (GET_MODE (exp) == VOIDmode)
10778 return force_reg (Pmode, exp);
10779 if (GET_MODE (exp) == Pmode)
10780 return copy_to_mode_reg (Pmode, exp);
10781 r = gen_reg_rtx (Pmode);
10782 emit_insn (gen_zero_extendsidi2 (r, exp));
10786 /* Expand string move (memcpy) operation. Use i386 string operations when
10787 profitable. expand_clrmem contains similar code. */
10789 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10791 rtx srcreg, destreg, countreg, srcexp, destexp;
10792 enum machine_mode counter_mode;
10793 HOST_WIDE_INT align = 0;
10794 unsigned HOST_WIDE_INT count = 0;
10796 if (GET_CODE (align_exp) == CONST_INT)
10797 align = INTVAL (align_exp);
10799 /* Can't use any of this if the user has appropriated esi or edi. */
10800 if (global_regs[4] || global_regs[5])
10803 /* This simple hack avoids all inlining code and simplifies code below. */
10804 if (!TARGET_ALIGN_STRINGOPS)
10807 if (GET_CODE (count_exp) == CONST_INT)
10809 count = INTVAL (count_exp);
10810 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10814 /* Figure out proper mode for counter. For 32bits it is always SImode,
10815 for 64bits use SImode when possible, otherwise DImode.
10816 Set count to number of bytes copied when known at compile time. */
10818 || GET_MODE (count_exp) == SImode
10819 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10820 counter_mode = SImode;
10822 counter_mode = DImode;
10824 if (counter_mode != SImode && counter_mode != DImode)
10827 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10828 if (destreg != XEXP (dst, 0))
10829 dst = replace_equiv_address_nv (dst, destreg);
10830 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10831 if (srcreg != XEXP (src, 0))
10832 src = replace_equiv_address_nv (src, srcreg);
10834 /* When optimizing for size emit simple rep ; movsb instruction for
10835 counts not divisible by 4. */
10837 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10839 emit_insn (gen_cld ());
10840 countreg = ix86_zero_extend_to_Pmode (count_exp);
10841 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10842 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10843 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10847 /* For constant aligned (or small unaligned) copies use rep movsl
10848 followed by code copying the rest. For PentiumPro ensure 8 byte
10849 alignment to allow rep movsl acceleration. */
10851 else if (count != 0
10853 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10854 || optimize_size || count < (unsigned int) 64))
10856 unsigned HOST_WIDE_INT offset = 0;
10857 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10858 rtx srcmem, dstmem;
10860 emit_insn (gen_cld ());
10861 if (count & ~(size - 1))
10863 countreg = copy_to_mode_reg (counter_mode,
10864 GEN_INT ((count >> (size == 4 ? 2 : 3))
10865 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10866 countreg = ix86_zero_extend_to_Pmode (countreg);
10868 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10869 GEN_INT (size == 4 ? 2 : 3));
10870 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10871 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10873 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10874 countreg, destexp, srcexp));
10875 offset = count & ~(size - 1);
10877 if (size == 8 && (count & 0x04))
10879 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10881 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10883 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10888 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10890 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10892 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10897 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10899 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10901 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10904 /* The generic code based on the glibc implementation:
10905 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10906 allowing accelerated copying there)
10907 - copy the data using rep movsl
10908 - copy the rest. */
10913 rtx srcmem, dstmem;
10914 int desired_alignment = (TARGET_PENTIUMPRO
10915 && (count == 0 || count >= (unsigned int) 260)
10916 ? 8 : UNITS_PER_WORD);
10917 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10918 dst = change_address (dst, BLKmode, destreg);
10919 src = change_address (src, BLKmode, srcreg);
10921 /* In case we don't know anything about the alignment, default to
10922 library version, since it is usually equally fast and result in
10925 Also emit call when we know that the count is large and call overhead
10926 will not be important. */
10927 if (!TARGET_INLINE_ALL_STRINGOPS
10928 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10931 if (TARGET_SINGLE_STRINGOP)
10932 emit_insn (gen_cld ());
10934 countreg2 = gen_reg_rtx (Pmode);
10935 countreg = copy_to_mode_reg (counter_mode, count_exp);
10937 /* We don't use loops to align destination and to copy parts smaller
10938 than 4 bytes, because gcc is able to optimize such code better (in
10939 the case the destination or the count really is aligned, gcc is often
10940 able to predict the branches) and also it is friendlier to the
10941 hardware branch prediction.
10943 Using loops is beneficial for generic case, because we can
10944 handle small counts using the loops. Many CPUs (such as Athlon)
10945 have large REP prefix setup costs.
10947 This is quite costly. Maybe we can revisit this decision later or
10948 add some customizability to this code. */
10950 if (count == 0 && align < desired_alignment)
10952 label = gen_label_rtx ();
10953 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10954 LEU, 0, counter_mode, 1, label);
10958 rtx label = ix86_expand_aligntest (destreg, 1);
10959 srcmem = change_address (src, QImode, srcreg);
10960 dstmem = change_address (dst, QImode, destreg);
10961 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10962 ix86_adjust_counter (countreg, 1);
10963 emit_label (label);
10964 LABEL_NUSES (label) = 1;
10968 rtx label = ix86_expand_aligntest (destreg, 2);
10969 srcmem = change_address (src, HImode, srcreg);
10970 dstmem = change_address (dst, HImode, destreg);
10971 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10972 ix86_adjust_counter (countreg, 2);
10973 emit_label (label);
10974 LABEL_NUSES (label) = 1;
10976 if (align <= 4 && desired_alignment > 4)
10978 rtx label = ix86_expand_aligntest (destreg, 4);
10979 srcmem = change_address (src, SImode, srcreg);
10980 dstmem = change_address (dst, SImode, destreg);
10981 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10982 ix86_adjust_counter (countreg, 4);
10983 emit_label (label);
10984 LABEL_NUSES (label) = 1;
10987 if (label && desired_alignment > 4 && !TARGET_64BIT)
10989 emit_label (label);
10990 LABEL_NUSES (label) = 1;
10993 if (!TARGET_SINGLE_STRINGOP)
10994 emit_insn (gen_cld ());
10997 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10999 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11003 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11004 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11006 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11007 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11008 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11009 countreg2, destexp, srcexp));
11013 emit_label (label);
11014 LABEL_NUSES (label) = 1;
11016 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11018 srcmem = change_address (src, SImode, srcreg);
11019 dstmem = change_address (dst, SImode, destreg);
11020 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11022 if ((align <= 4 || count == 0) && TARGET_64BIT)
11024 rtx label = ix86_expand_aligntest (countreg, 4);
11025 srcmem = change_address (src, SImode, srcreg);
11026 dstmem = change_address (dst, SImode, destreg);
11027 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11028 emit_label (label);
11029 LABEL_NUSES (label) = 1;
11031 if (align > 2 && count != 0 && (count & 2))
11033 srcmem = change_address (src, HImode, srcreg);
11034 dstmem = change_address (dst, HImode, destreg);
11035 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11037 if (align <= 2 || count == 0)
11039 rtx label = ix86_expand_aligntest (countreg, 2);
11040 srcmem = change_address (src, HImode, srcreg);
11041 dstmem = change_address (dst, HImode, destreg);
11042 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11043 emit_label (label);
11044 LABEL_NUSES (label) = 1;
11046 if (align > 1 && count != 0 && (count & 1))
11048 srcmem = change_address (src, QImode, srcreg);
11049 dstmem = change_address (dst, QImode, destreg);
11050 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11052 if (align <= 1 || count == 0)
11054 rtx label = ix86_expand_aligntest (countreg, 1);
11055 srcmem = change_address (src, QImode, srcreg);
11056 dstmem = change_address (dst, QImode, destreg);
11057 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11058 emit_label (label);
11059 LABEL_NUSES (label) = 1;
11066 /* Expand string clear operation (bzero). Use i386 string operations when
11067 profitable. expand_movmem contains similar code. */
11069 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11071 rtx destreg, zeroreg, countreg, destexp;
11072 enum machine_mode counter_mode;
11073 HOST_WIDE_INT align = 0;
11074 unsigned HOST_WIDE_INT count = 0;
11076 if (GET_CODE (align_exp) == CONST_INT)
11077 align = INTVAL (align_exp);
11079 /* Can't use any of this if the user has appropriated esi. */
11080 if (global_regs[4])
11083 /* This simple hack avoids all inlining code and simplifies code below. */
11084 if (!TARGET_ALIGN_STRINGOPS)
11087 if (GET_CODE (count_exp) == CONST_INT)
11089 count = INTVAL (count_exp);
11090 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11093 /* Figure out proper mode for counter. For 32bits it is always SImode,
11094 for 64bits use SImode when possible, otherwise DImode.
11095 Set count to number of bytes copied when known at compile time. */
11097 || GET_MODE (count_exp) == SImode
11098 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11099 counter_mode = SImode;
11101 counter_mode = DImode;
11103 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11104 if (destreg != XEXP (dst, 0))
11105 dst = replace_equiv_address_nv (dst, destreg);
11108 /* When optimizing for size emit simple rep ; movsb instruction for
11109 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11110 sequence is 7 bytes long, so if optimizing for size and count is
11111 small enough that some stosl, stosw and stosb instructions without
11112 rep are shorter, fall back into the next if. */
11114 if ((!optimize || optimize_size)
11117 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11119 emit_insn (gen_cld ());
11121 countreg = ix86_zero_extend_to_Pmode (count_exp);
11122 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11123 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11124 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11126 else if (count != 0
11128 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11129 || optimize_size || count < (unsigned int) 64))
11131 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11132 unsigned HOST_WIDE_INT offset = 0;
11134 emit_insn (gen_cld ());
11136 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11137 if (count & ~(size - 1))
11139 unsigned HOST_WIDE_INT repcount;
11140 unsigned int max_nonrep;
11142 repcount = count >> (size == 4 ? 2 : 3);
11144 repcount &= 0x3fffffff;
11146 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11147 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11148 bytes. In both cases the latter seems to be faster for small
11150 max_nonrep = size == 4 ? 7 : 4;
11151 if (!optimize_size)
11154 case PROCESSOR_PENTIUM4:
11155 case PROCESSOR_NOCONA:
11162 if (repcount <= max_nonrep)
11163 while (repcount-- > 0)
11165 rtx mem = adjust_automodify_address_nv (dst,
11166 GET_MODE (zeroreg),
11168 emit_insn (gen_strset (destreg, mem, zeroreg));
11173 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11174 countreg = ix86_zero_extend_to_Pmode (countreg);
11175 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11176 GEN_INT (size == 4 ? 2 : 3));
11177 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11178 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11180 offset = count & ~(size - 1);
11183 if (size == 8 && (count & 0x04))
11185 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11187 emit_insn (gen_strset (destreg, mem,
11188 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11193 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11195 emit_insn (gen_strset (destreg, mem,
11196 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11201 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11203 emit_insn (gen_strset (destreg, mem,
11204 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11211 /* Compute desired alignment of the string operation. */
11212 int desired_alignment = (TARGET_PENTIUMPRO
11213 && (count == 0 || count >= (unsigned int) 260)
11214 ? 8 : UNITS_PER_WORD);
11216 /* In case we don't know anything about the alignment, default to
11217 library version, since it is usually equally fast and result in
11220 Also emit call when we know that the count is large and call overhead
11221 will not be important. */
11222 if (!TARGET_INLINE_ALL_STRINGOPS
11223 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11226 if (TARGET_SINGLE_STRINGOP)
11227 emit_insn (gen_cld ());
11229 countreg2 = gen_reg_rtx (Pmode);
11230 countreg = copy_to_mode_reg (counter_mode, count_exp);
11231 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11232 /* Get rid of MEM_OFFSET, it won't be accurate. */
11233 dst = change_address (dst, BLKmode, destreg);
11235 if (count == 0 && align < desired_alignment)
11237 label = gen_label_rtx ();
11238 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11239 LEU, 0, counter_mode, 1, label);
11243 rtx label = ix86_expand_aligntest (destreg, 1);
11244 emit_insn (gen_strset (destreg, dst,
11245 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11246 ix86_adjust_counter (countreg, 1);
11247 emit_label (label);
11248 LABEL_NUSES (label) = 1;
11252 rtx label = ix86_expand_aligntest (destreg, 2);
11253 emit_insn (gen_strset (destreg, dst,
11254 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11255 ix86_adjust_counter (countreg, 2);
11256 emit_label (label);
11257 LABEL_NUSES (label) = 1;
11259 if (align <= 4 && desired_alignment > 4)
11261 rtx label = ix86_expand_aligntest (destreg, 4);
11262 emit_insn (gen_strset (destreg, dst,
11264 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11266 ix86_adjust_counter (countreg, 4);
11267 emit_label (label);
11268 LABEL_NUSES (label) = 1;
11271 if (label && desired_alignment > 4 && !TARGET_64BIT)
11273 emit_label (label);
11274 LABEL_NUSES (label) = 1;
11278 if (!TARGET_SINGLE_STRINGOP)
11279 emit_insn (gen_cld ());
11282 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11284 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11288 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11289 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11291 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11292 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11296 emit_label (label);
11297 LABEL_NUSES (label) = 1;
11300 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11301 emit_insn (gen_strset (destreg, dst,
11302 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11303 if (TARGET_64BIT && (align <= 4 || count == 0))
11305 rtx label = ix86_expand_aligntest (countreg, 4);
11306 emit_insn (gen_strset (destreg, dst,
11307 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11308 emit_label (label);
11309 LABEL_NUSES (label) = 1;
11311 if (align > 2 && count != 0 && (count & 2))
11312 emit_insn (gen_strset (destreg, dst,
11313 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11314 if (align <= 2 || count == 0)
11316 rtx label = ix86_expand_aligntest (countreg, 2);
11317 emit_insn (gen_strset (destreg, dst,
11318 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11319 emit_label (label);
11320 LABEL_NUSES (label) = 1;
11322 if (align > 1 && count != 0 && (count & 1))
11323 emit_insn (gen_strset (destreg, dst,
11324 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11325 if (align <= 1 || count == 0)
11327 rtx label = ix86_expand_aligntest (countreg, 1);
11328 emit_insn (gen_strset (destreg, dst,
11329 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11330 emit_label (label);
11331 LABEL_NUSES (label) = 1;
11337 /* Expand strlen. */
11339 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11341 rtx addr, scratch1, scratch2, scratch3, scratch4;
11343 /* The generic case of strlen expander is long. Avoid it's
11344 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11346 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11347 && !TARGET_INLINE_ALL_STRINGOPS
11349 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11352 addr = force_reg (Pmode, XEXP (src, 0));
11353 scratch1 = gen_reg_rtx (Pmode);
11355 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11358 /* Well it seems that some optimizer does not combine a call like
11359 foo(strlen(bar), strlen(bar));
11360 when the move and the subtraction is done here. It does calculate
11361 the length just once when these instructions are done inside of
11362 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11363 often used and I use one fewer register for the lifetime of
11364 output_strlen_unroll() this is better. */
11366 emit_move_insn (out, addr);
11368 ix86_expand_strlensi_unroll_1 (out, src, align);
11370 /* strlensi_unroll_1 returns the address of the zero at the end of
11371 the string, like memchr(), so compute the length by subtracting
11372 the start address. */
11374 emit_insn (gen_subdi3 (out, out, addr));
11376 emit_insn (gen_subsi3 (out, out, addr));
11381 scratch2 = gen_reg_rtx (Pmode);
11382 scratch3 = gen_reg_rtx (Pmode);
11383 scratch4 = force_reg (Pmode, constm1_rtx);
11385 emit_move_insn (scratch3, addr);
11386 eoschar = force_reg (QImode, eoschar);
11388 emit_insn (gen_cld ());
11389 src = replace_equiv_address_nv (src, scratch3);
11391 /* If .md starts supporting :P, this can be done in .md. */
11392 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11393 scratch4), UNSPEC_SCAS);
11394 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11397 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11398 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11402 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11403 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11409 /* Expand the appropriate insns for doing strlen if not just doing
11412 out = result, initialized with the start address
11413 align_rtx = alignment of the address.
11414 scratch = scratch register, initialized with the startaddress when
11415 not aligned, otherwise undefined
11417 This is just the body. It needs the initializations mentioned above and
11418 some address computing at the end. These things are done in i386.md. */
11421 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11425 rtx align_2_label = NULL_RTX;
11426 rtx align_3_label = NULL_RTX;
11427 rtx align_4_label = gen_label_rtx ();
11428 rtx end_0_label = gen_label_rtx ();
11430 rtx tmpreg = gen_reg_rtx (SImode);
11431 rtx scratch = gen_reg_rtx (SImode);
11435 if (GET_CODE (align_rtx) == CONST_INT)
11436 align = INTVAL (align_rtx);
11438 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11440 /* Is there a known alignment and is it less than 4? */
11443 rtx scratch1 = gen_reg_rtx (Pmode);
11444 emit_move_insn (scratch1, out);
11445 /* Is there a known alignment and is it not 2? */
11448 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11449 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11451 /* Leave just the 3 lower bits. */
11452 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11453 NULL_RTX, 0, OPTAB_WIDEN);
11455 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11456 Pmode, 1, align_4_label);
11457 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11458 Pmode, 1, align_2_label);
11459 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11460 Pmode, 1, align_3_label);
11464 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11465 check if is aligned to 4 - byte. */
11467 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11468 NULL_RTX, 0, OPTAB_WIDEN);
11470 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11471 Pmode, 1, align_4_label);
11474 mem = change_address (src, QImode, out);
11476 /* Now compare the bytes. */
11478 /* Compare the first n unaligned byte on a byte per byte basis. */
11479 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11480 QImode, 1, end_0_label);
11482 /* Increment the address. */
11484 emit_insn (gen_adddi3 (out, out, const1_rtx));
11486 emit_insn (gen_addsi3 (out, out, const1_rtx));
11488 /* Not needed with an alignment of 2 */
11491 emit_label (align_2_label);
11493 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11497 emit_insn (gen_adddi3 (out, out, const1_rtx));
11499 emit_insn (gen_addsi3 (out, out, const1_rtx));
11501 emit_label (align_3_label);
11504 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11508 emit_insn (gen_adddi3 (out, out, const1_rtx));
11510 emit_insn (gen_addsi3 (out, out, const1_rtx));
11513 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11514 align this loop. It gives only huge programs, but does not help to
11516 emit_label (align_4_label);
11518 mem = change_address (src, SImode, out);
11519 emit_move_insn (scratch, mem);
11521 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11523 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11525 /* This formula yields a nonzero result iff one of the bytes is zero.
11526 This saves three branches inside loop and many cycles. */
11528 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11529 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11530 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11531 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11532 gen_int_mode (0x80808080, SImode)));
11533 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11538 rtx reg = gen_reg_rtx (SImode);
11539 rtx reg2 = gen_reg_rtx (Pmode);
11540 emit_move_insn (reg, tmpreg);
11541 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11543 /* If zero is not in the first two bytes, move two bytes forward. */
11544 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11545 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11546 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11547 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11548 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11551 /* Emit lea manually to avoid clobbering of flags. */
11552 emit_insn (gen_rtx_SET (SImode, reg2,
11553 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11555 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11556 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11557 emit_insn (gen_rtx_SET (VOIDmode, out,
11558 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11565 rtx end_2_label = gen_label_rtx ();
11566 /* Is zero in the first two bytes? */
11568 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11569 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11570 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11571 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11572 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11574 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11575 JUMP_LABEL (tmp) = end_2_label;
11577 /* Not in the first two. Move two bytes forward. */
11578 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11580 emit_insn (gen_adddi3 (out, out, const2_rtx));
11582 emit_insn (gen_addsi3 (out, out, const2_rtx));
11584 emit_label (end_2_label);
11588 /* Avoid branch in fixing the byte. */
11589 tmpreg = gen_lowpart (QImode, tmpreg);
11590 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11591 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11593 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11595 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11597 emit_label (end_0_label);
11601 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11602 rtx callarg2 ATTRIBUTE_UNUSED,
11603 rtx pop, int sibcall)
11605 rtx use = NULL, call;
11607 if (pop == const0_rtx)
11609 if (TARGET_64BIT && pop)
11613 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11614 fnaddr = machopic_indirect_call_target (fnaddr);
11616 /* Static functions and indirect calls don't need the pic register. */
11617 if (! TARGET_64BIT && flag_pic
11618 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11619 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11620 use_reg (&use, pic_offset_table_rtx);
11622 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11624 rtx al = gen_rtx_REG (QImode, 0);
11625 emit_move_insn (al, callarg2);
11626 use_reg (&use, al);
11628 #endif /* TARGET_MACHO */
11630 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11632 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11633 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11635 if (sibcall && TARGET_64BIT
11636 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11639 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11640 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11641 emit_move_insn (fnaddr, addr);
11642 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11645 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11647 call = gen_rtx_SET (VOIDmode, retval, call);
11650 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11651 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11652 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11655 call = emit_call_insn (call);
11657 CALL_INSN_FUNCTION_USAGE (call) = use;
11661 /* Clear stack slot assignments remembered from previous functions.
11662 This is called from INIT_EXPANDERS once before RTL is emitted for each
11665 static struct machine_function *
11666 ix86_init_machine_status (void)
11668 struct machine_function *f;
11670 f = ggc_alloc_cleared (sizeof (struct machine_function));
11671 f->use_fast_prologue_epilogue_nregs = -1;
11676 /* Return a MEM corresponding to a stack slot with mode MODE.
11677 Allocate a new slot if necessary.
11679 The RTL for a function can have several slots available: N is
11680 which slot to use. */
11683 assign_386_stack_local (enum machine_mode mode, int n)
11685 struct stack_local_entry *s;
11687 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11690 for (s = ix86_stack_locals; s; s = s->next)
11691 if (s->mode == mode && s->n == n)
11694 s = (struct stack_local_entry *)
11695 ggc_alloc (sizeof (struct stack_local_entry));
11698 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11700 s->next = ix86_stack_locals;
11701 ix86_stack_locals = s;
11705 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11707 static GTY(()) rtx ix86_tls_symbol;
11709 ix86_tls_get_addr (void)
11712 if (!ix86_tls_symbol)
11714 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11715 (TARGET_GNU_TLS && !TARGET_64BIT)
11716 ? "___tls_get_addr"
11717 : "__tls_get_addr");
11720 return ix86_tls_symbol;
11723 /* Calculate the length of the memory address in the instruction
11724 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11727 memory_address_length (rtx addr)
11729 struct ix86_address parts;
11730 rtx base, index, disp;
11733 if (GET_CODE (addr) == PRE_DEC
11734 || GET_CODE (addr) == POST_INC
11735 || GET_CODE (addr) == PRE_MODIFY
11736 || GET_CODE (addr) == POST_MODIFY)
11739 if (! ix86_decompose_address (addr, &parts))
11743 index = parts.index;
11748 - esp as the base always wants an index,
11749 - ebp as the base always wants a displacement. */
11751 /* Register Indirect. */
11752 if (base && !index && !disp)
11754 /* esp (for its index) and ebp (for its displacement) need
11755 the two-byte modrm form. */
11756 if (addr == stack_pointer_rtx
11757 || addr == arg_pointer_rtx
11758 || addr == frame_pointer_rtx
11759 || addr == hard_frame_pointer_rtx)
11763 /* Direct Addressing. */
11764 else if (disp && !base && !index)
11769 /* Find the length of the displacement constant. */
11772 if (GET_CODE (disp) == CONST_INT
11773 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11779 /* ebp always wants a displacement. */
11780 else if (base == hard_frame_pointer_rtx)
11783 /* An index requires the two-byte modrm form.... */
11785 /* ...like esp, which always wants an index. */
11786 || base == stack_pointer_rtx
11787 || base == arg_pointer_rtx
11788 || base == frame_pointer_rtx)
11795 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11796 is set, expect that insn have 8bit immediate alternative. */
11798 ix86_attr_length_immediate_default (rtx insn, int shortform)
11802 extract_insn_cached (insn);
11803 for (i = recog_data.n_operands - 1; i >= 0; --i)
11804 if (CONSTANT_P (recog_data.operand[i]))
11809 && GET_CODE (recog_data.operand[i]) == CONST_INT
11810 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11814 switch (get_attr_mode (insn))
11825 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11830 fatal_insn ("unknown insn mode", insn);
11836 /* Compute default value for "length_address" attribute. */
11838 ix86_attr_length_address_default (rtx insn)
11842 if (get_attr_type (insn) == TYPE_LEA)
11844 rtx set = PATTERN (insn);
11845 if (GET_CODE (set) == SET)
11847 else if (GET_CODE (set) == PARALLEL
11848 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11849 set = XVECEXP (set, 0, 0);
11852 #ifdef ENABLE_CHECKING
11858 return memory_address_length (SET_SRC (set));
11861 extract_insn_cached (insn);
11862 for (i = recog_data.n_operands - 1; i >= 0; --i)
11863 if (GET_CODE (recog_data.operand[i]) == MEM)
11865 return memory_address_length (XEXP (recog_data.operand[i], 0));
11871 /* Return the maximum number of instructions a cpu can issue. */
11874 ix86_issue_rate (void)
11878 case PROCESSOR_PENTIUM:
11882 case PROCESSOR_PENTIUMPRO:
11883 case PROCESSOR_PENTIUM4:
11884 case PROCESSOR_ATHLON:
11886 case PROCESSOR_NOCONA:
11894 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11895 by DEP_INSN and nothing set by DEP_INSN. */
11898 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11902 /* Simplify the test for uninteresting insns. */
11903 if (insn_type != TYPE_SETCC
11904 && insn_type != TYPE_ICMOV
11905 && insn_type != TYPE_FCMOV
11906 && insn_type != TYPE_IBR)
11909 if ((set = single_set (dep_insn)) != 0)
11911 set = SET_DEST (set);
11914 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11915 && XVECLEN (PATTERN (dep_insn), 0) == 2
11916 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11917 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11919 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11920 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11925 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11928 /* This test is true if the dependent insn reads the flags but
11929 not any other potentially set register. */
11930 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11933 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11939 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11940 address with operands set by DEP_INSN. */
11943 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11947 if (insn_type == TYPE_LEA
11950 addr = PATTERN (insn);
11951 if (GET_CODE (addr) == SET)
11953 else if (GET_CODE (addr) == PARALLEL
11954 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11955 addr = XVECEXP (addr, 0, 0);
11958 addr = SET_SRC (addr);
11963 extract_insn_cached (insn);
11964 for (i = recog_data.n_operands - 1; i >= 0; --i)
11965 if (GET_CODE (recog_data.operand[i]) == MEM)
11967 addr = XEXP (recog_data.operand[i], 0);
11974 return modified_in_p (addr, dep_insn);
11978 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11980 enum attr_type insn_type, dep_insn_type;
11981 enum attr_memory memory;
11983 int dep_insn_code_number;
11985 /* Anti and output dependencies have zero cost on all CPUs. */
11986 if (REG_NOTE_KIND (link) != 0)
11989 dep_insn_code_number = recog_memoized (dep_insn);
11991 /* If we can't recognize the insns, we can't really do anything. */
11992 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11995 insn_type = get_attr_type (insn);
11996 dep_insn_type = get_attr_type (dep_insn);
12000 case PROCESSOR_PENTIUM:
12001 /* Address Generation Interlock adds a cycle of latency. */
12002 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12005 /* ??? Compares pair with jump/setcc. */
12006 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12009 /* Floating point stores require value to be ready one cycle earlier. */
12010 if (insn_type == TYPE_FMOV
12011 && get_attr_memory (insn) == MEMORY_STORE
12012 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12016 case PROCESSOR_PENTIUMPRO:
12017 memory = get_attr_memory (insn);
12019 /* INT->FP conversion is expensive. */
12020 if (get_attr_fp_int_src (dep_insn))
12023 /* There is one cycle extra latency between an FP op and a store. */
12024 if (insn_type == TYPE_FMOV
12025 && (set = single_set (dep_insn)) != NULL_RTX
12026 && (set2 = single_set (insn)) != NULL_RTX
12027 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12028 && GET_CODE (SET_DEST (set2)) == MEM)
12031 /* Show ability of reorder buffer to hide latency of load by executing
12032 in parallel with previous instruction in case
12033 previous instruction is not needed to compute the address. */
12034 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12035 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12037 /* Claim moves to take one cycle, as core can issue one load
12038 at time and the next load can start cycle later. */
12039 if (dep_insn_type == TYPE_IMOV
12040 || dep_insn_type == TYPE_FMOV)
12048 memory = get_attr_memory (insn);
12050 /* The esp dependency is resolved before the instruction is really
12052 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12053 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12056 /* INT->FP conversion is expensive. */
12057 if (get_attr_fp_int_src (dep_insn))
12060 /* Show ability of reorder buffer to hide latency of load by executing
12061 in parallel with previous instruction in case
12062 previous instruction is not needed to compute the address. */
12063 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12064 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12066 /* Claim moves to take one cycle, as core can issue one load
12067 at time and the next load can start cycle later. */
12068 if (dep_insn_type == TYPE_IMOV
12069 || dep_insn_type == TYPE_FMOV)
12078 case PROCESSOR_ATHLON:
12080 memory = get_attr_memory (insn);
12082 /* Show ability of reorder buffer to hide latency of load by executing
12083 in parallel with previous instruction in case
12084 previous instruction is not needed to compute the address. */
12085 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12086 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12088 enum attr_unit unit = get_attr_unit (insn);
12091 /* Because of the difference between the length of integer and
12092 floating unit pipeline preparation stages, the memory operands
12093 for floating point are cheaper.
12095 ??? For Athlon it the difference is most probably 2. */
12096 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12099 loadcost = TARGET_ATHLON ? 2 : 0;
12101 if (cost >= loadcost)
12114 /* How many alternative schedules to try. This should be as wide as the
12115 scheduling freedom in the DFA, but no wider. Making this value too
12116 large results extra work for the scheduler. */
12119 ia32_multipass_dfa_lookahead (void)
12121 if (ix86_tune == PROCESSOR_PENTIUM)
12124 if (ix86_tune == PROCESSOR_PENTIUMPRO
12125 || ix86_tune == PROCESSOR_K6)
12133 /* Compute the alignment given to a constant that is being placed in memory.
12134 EXP is the constant and ALIGN is the alignment that the object would
12136 The value of this function is used instead of that alignment to align
12140 ix86_constant_alignment (tree exp, int align)
12142 if (TREE_CODE (exp) == REAL_CST)
12144 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12146 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12149 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12150 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12151 return BITS_PER_WORD;
12156 /* Compute the alignment for a static variable.
12157 TYPE is the data type, and ALIGN is the alignment that
12158 the object would ordinarily have. The value of this function is used
12159 instead of that alignment to align the object. */
12162 ix86_data_alignment (tree type, int align)
12164 if (AGGREGATE_TYPE_P (type)
12165 && TYPE_SIZE (type)
12166 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12167 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12168 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12171 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12172 to 16byte boundary. */
12175 if (AGGREGATE_TYPE_P (type)
12176 && TYPE_SIZE (type)
12177 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12178 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12179 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12183 if (TREE_CODE (type) == ARRAY_TYPE)
12185 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12187 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12190 else if (TREE_CODE (type) == COMPLEX_TYPE)
12193 if (TYPE_MODE (type) == DCmode && align < 64)
12195 if (TYPE_MODE (type) == XCmode && align < 128)
12198 else if ((TREE_CODE (type) == RECORD_TYPE
12199 || TREE_CODE (type) == UNION_TYPE
12200 || TREE_CODE (type) == QUAL_UNION_TYPE)
12201 && TYPE_FIELDS (type))
12203 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12205 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12208 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12209 || TREE_CODE (type) == INTEGER_TYPE)
12211 if (TYPE_MODE (type) == DFmode && align < 64)
12213 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12220 /* Compute the alignment for a local variable.
12221 TYPE is the data type, and ALIGN is the alignment that
12222 the object would ordinarily have. The value of this macro is used
12223 instead of that alignment to align the object. */
12226 ix86_local_alignment (tree type, int align)
12228 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12229 to 16byte boundary. */
12232 if (AGGREGATE_TYPE_P (type)
12233 && TYPE_SIZE (type)
12234 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12235 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12236 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12239 if (TREE_CODE (type) == ARRAY_TYPE)
12241 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12243 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12246 else if (TREE_CODE (type) == COMPLEX_TYPE)
12248 if (TYPE_MODE (type) == DCmode && align < 64)
12250 if (TYPE_MODE (type) == XCmode && align < 128)
12253 else if ((TREE_CODE (type) == RECORD_TYPE
12254 || TREE_CODE (type) == UNION_TYPE
12255 || TREE_CODE (type) == QUAL_UNION_TYPE)
12256 && TYPE_FIELDS (type))
12258 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12260 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12263 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12264 || TREE_CODE (type) == INTEGER_TYPE)
12267 if (TYPE_MODE (type) == DFmode && align < 64)
12269 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12275 /* Emit RTL insns to initialize the variable parts of a trampoline.
12276 FNADDR is an RTX for the address of the function's pure code.
12277 CXT is an RTX for the static chain value for the function. */
12279 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12283 /* Compute offset from the end of the jmp to the target function. */
12284 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12285 plus_constant (tramp, 10),
12286 NULL_RTX, 1, OPTAB_DIRECT);
12287 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12288 gen_int_mode (0xb9, QImode));
12289 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12290 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12291 gen_int_mode (0xe9, QImode));
12292 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12297 /* Try to load address using shorter movl instead of movabs.
12298 We may want to support movq for kernel mode, but kernel does not use
12299 trampolines at the moment. */
12300 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12302 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12303 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12304 gen_int_mode (0xbb41, HImode));
12305 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12306 gen_lowpart (SImode, fnaddr));
12311 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12312 gen_int_mode (0xbb49, HImode));
12313 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12317 /* Load static chain using movabs to r10. */
12318 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12319 gen_int_mode (0xba49, HImode));
12320 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12323 /* Jump to the r11 */
12324 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12325 gen_int_mode (0xff49, HImode));
12326 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12327 gen_int_mode (0xe3, QImode));
12329 if (offset > TRAMPOLINE_SIZE)
12333 #ifdef ENABLE_EXECUTE_STACK
12334 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12335 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12339 /* Codes for all the SSE/MMX builtins. */
12342 IX86_BUILTIN_ADDPS,
12343 IX86_BUILTIN_ADDSS,
12344 IX86_BUILTIN_DIVPS,
12345 IX86_BUILTIN_DIVSS,
12346 IX86_BUILTIN_MULPS,
12347 IX86_BUILTIN_MULSS,
12348 IX86_BUILTIN_SUBPS,
12349 IX86_BUILTIN_SUBSS,
12351 IX86_BUILTIN_CMPEQPS,
12352 IX86_BUILTIN_CMPLTPS,
12353 IX86_BUILTIN_CMPLEPS,
12354 IX86_BUILTIN_CMPGTPS,
12355 IX86_BUILTIN_CMPGEPS,
12356 IX86_BUILTIN_CMPNEQPS,
12357 IX86_BUILTIN_CMPNLTPS,
12358 IX86_BUILTIN_CMPNLEPS,
12359 IX86_BUILTIN_CMPNGTPS,
12360 IX86_BUILTIN_CMPNGEPS,
12361 IX86_BUILTIN_CMPORDPS,
12362 IX86_BUILTIN_CMPUNORDPS,
12363 IX86_BUILTIN_CMPNEPS,
12364 IX86_BUILTIN_CMPEQSS,
12365 IX86_BUILTIN_CMPLTSS,
12366 IX86_BUILTIN_CMPLESS,
12367 IX86_BUILTIN_CMPNEQSS,
12368 IX86_BUILTIN_CMPNLTSS,
12369 IX86_BUILTIN_CMPNLESS,
12370 IX86_BUILTIN_CMPNGTSS,
12371 IX86_BUILTIN_CMPNGESS,
12372 IX86_BUILTIN_CMPORDSS,
12373 IX86_BUILTIN_CMPUNORDSS,
12374 IX86_BUILTIN_CMPNESS,
12376 IX86_BUILTIN_COMIEQSS,
12377 IX86_BUILTIN_COMILTSS,
12378 IX86_BUILTIN_COMILESS,
12379 IX86_BUILTIN_COMIGTSS,
12380 IX86_BUILTIN_COMIGESS,
12381 IX86_BUILTIN_COMINEQSS,
12382 IX86_BUILTIN_UCOMIEQSS,
12383 IX86_BUILTIN_UCOMILTSS,
12384 IX86_BUILTIN_UCOMILESS,
12385 IX86_BUILTIN_UCOMIGTSS,
12386 IX86_BUILTIN_UCOMIGESS,
12387 IX86_BUILTIN_UCOMINEQSS,
12389 IX86_BUILTIN_CVTPI2PS,
12390 IX86_BUILTIN_CVTPS2PI,
12391 IX86_BUILTIN_CVTSI2SS,
12392 IX86_BUILTIN_CVTSI642SS,
12393 IX86_BUILTIN_CVTSS2SI,
12394 IX86_BUILTIN_CVTSS2SI64,
12395 IX86_BUILTIN_CVTTPS2PI,
12396 IX86_BUILTIN_CVTTSS2SI,
12397 IX86_BUILTIN_CVTTSS2SI64,
12399 IX86_BUILTIN_MAXPS,
12400 IX86_BUILTIN_MAXSS,
12401 IX86_BUILTIN_MINPS,
12402 IX86_BUILTIN_MINSS,
12404 IX86_BUILTIN_LOADUPS,
12405 IX86_BUILTIN_STOREUPS,
12406 IX86_BUILTIN_MOVSS,
12408 IX86_BUILTIN_MOVHLPS,
12409 IX86_BUILTIN_MOVLHPS,
12410 IX86_BUILTIN_LOADHPS,
12411 IX86_BUILTIN_LOADLPS,
12412 IX86_BUILTIN_STOREHPS,
12413 IX86_BUILTIN_STORELPS,
12415 IX86_BUILTIN_MASKMOVQ,
12416 IX86_BUILTIN_MOVMSKPS,
12417 IX86_BUILTIN_PMOVMSKB,
12419 IX86_BUILTIN_MOVNTPS,
12420 IX86_BUILTIN_MOVNTQ,
12422 IX86_BUILTIN_LOADDQU,
12423 IX86_BUILTIN_STOREDQU,
12425 IX86_BUILTIN_PACKSSWB,
12426 IX86_BUILTIN_PACKSSDW,
12427 IX86_BUILTIN_PACKUSWB,
12429 IX86_BUILTIN_PADDB,
12430 IX86_BUILTIN_PADDW,
12431 IX86_BUILTIN_PADDD,
12432 IX86_BUILTIN_PADDQ,
12433 IX86_BUILTIN_PADDSB,
12434 IX86_BUILTIN_PADDSW,
12435 IX86_BUILTIN_PADDUSB,
12436 IX86_BUILTIN_PADDUSW,
12437 IX86_BUILTIN_PSUBB,
12438 IX86_BUILTIN_PSUBW,
12439 IX86_BUILTIN_PSUBD,
12440 IX86_BUILTIN_PSUBQ,
12441 IX86_BUILTIN_PSUBSB,
12442 IX86_BUILTIN_PSUBSW,
12443 IX86_BUILTIN_PSUBUSB,
12444 IX86_BUILTIN_PSUBUSW,
12447 IX86_BUILTIN_PANDN,
12451 IX86_BUILTIN_PAVGB,
12452 IX86_BUILTIN_PAVGW,
12454 IX86_BUILTIN_PCMPEQB,
12455 IX86_BUILTIN_PCMPEQW,
12456 IX86_BUILTIN_PCMPEQD,
12457 IX86_BUILTIN_PCMPGTB,
12458 IX86_BUILTIN_PCMPGTW,
12459 IX86_BUILTIN_PCMPGTD,
12461 IX86_BUILTIN_PMADDWD,
12463 IX86_BUILTIN_PMAXSW,
12464 IX86_BUILTIN_PMAXUB,
12465 IX86_BUILTIN_PMINSW,
12466 IX86_BUILTIN_PMINUB,
12468 IX86_BUILTIN_PMULHUW,
12469 IX86_BUILTIN_PMULHW,
12470 IX86_BUILTIN_PMULLW,
12472 IX86_BUILTIN_PSADBW,
12473 IX86_BUILTIN_PSHUFW,
12475 IX86_BUILTIN_PSLLW,
12476 IX86_BUILTIN_PSLLD,
12477 IX86_BUILTIN_PSLLQ,
12478 IX86_BUILTIN_PSRAW,
12479 IX86_BUILTIN_PSRAD,
12480 IX86_BUILTIN_PSRLW,
12481 IX86_BUILTIN_PSRLD,
12482 IX86_BUILTIN_PSRLQ,
12483 IX86_BUILTIN_PSLLWI,
12484 IX86_BUILTIN_PSLLDI,
12485 IX86_BUILTIN_PSLLQI,
12486 IX86_BUILTIN_PSRAWI,
12487 IX86_BUILTIN_PSRADI,
12488 IX86_BUILTIN_PSRLWI,
12489 IX86_BUILTIN_PSRLDI,
12490 IX86_BUILTIN_PSRLQI,
12492 IX86_BUILTIN_PUNPCKHBW,
12493 IX86_BUILTIN_PUNPCKHWD,
12494 IX86_BUILTIN_PUNPCKHDQ,
12495 IX86_BUILTIN_PUNPCKLBW,
12496 IX86_BUILTIN_PUNPCKLWD,
12497 IX86_BUILTIN_PUNPCKLDQ,
12499 IX86_BUILTIN_SHUFPS,
12501 IX86_BUILTIN_RCPPS,
12502 IX86_BUILTIN_RCPSS,
12503 IX86_BUILTIN_RSQRTPS,
12504 IX86_BUILTIN_RSQRTSS,
12505 IX86_BUILTIN_SQRTPS,
12506 IX86_BUILTIN_SQRTSS,
12508 IX86_BUILTIN_UNPCKHPS,
12509 IX86_BUILTIN_UNPCKLPS,
12511 IX86_BUILTIN_ANDPS,
12512 IX86_BUILTIN_ANDNPS,
12514 IX86_BUILTIN_XORPS,
12517 IX86_BUILTIN_LDMXCSR,
12518 IX86_BUILTIN_STMXCSR,
12519 IX86_BUILTIN_SFENCE,
12521 /* 3DNow! Original */
12522 IX86_BUILTIN_FEMMS,
12523 IX86_BUILTIN_PAVGUSB,
12524 IX86_BUILTIN_PF2ID,
12525 IX86_BUILTIN_PFACC,
12526 IX86_BUILTIN_PFADD,
12527 IX86_BUILTIN_PFCMPEQ,
12528 IX86_BUILTIN_PFCMPGE,
12529 IX86_BUILTIN_PFCMPGT,
12530 IX86_BUILTIN_PFMAX,
12531 IX86_BUILTIN_PFMIN,
12532 IX86_BUILTIN_PFMUL,
12533 IX86_BUILTIN_PFRCP,
12534 IX86_BUILTIN_PFRCPIT1,
12535 IX86_BUILTIN_PFRCPIT2,
12536 IX86_BUILTIN_PFRSQIT1,
12537 IX86_BUILTIN_PFRSQRT,
12538 IX86_BUILTIN_PFSUB,
12539 IX86_BUILTIN_PFSUBR,
12540 IX86_BUILTIN_PI2FD,
12541 IX86_BUILTIN_PMULHRW,
12543 /* 3DNow! Athlon Extensions */
12544 IX86_BUILTIN_PF2IW,
12545 IX86_BUILTIN_PFNACC,
12546 IX86_BUILTIN_PFPNACC,
12547 IX86_BUILTIN_PI2FW,
12548 IX86_BUILTIN_PSWAPDSI,
12549 IX86_BUILTIN_PSWAPDSF,
12552 IX86_BUILTIN_ADDPD,
12553 IX86_BUILTIN_ADDSD,
12554 IX86_BUILTIN_DIVPD,
12555 IX86_BUILTIN_DIVSD,
12556 IX86_BUILTIN_MULPD,
12557 IX86_BUILTIN_MULSD,
12558 IX86_BUILTIN_SUBPD,
12559 IX86_BUILTIN_SUBSD,
12561 IX86_BUILTIN_CMPEQPD,
12562 IX86_BUILTIN_CMPLTPD,
12563 IX86_BUILTIN_CMPLEPD,
12564 IX86_BUILTIN_CMPGTPD,
12565 IX86_BUILTIN_CMPGEPD,
12566 IX86_BUILTIN_CMPNEQPD,
12567 IX86_BUILTIN_CMPNLTPD,
12568 IX86_BUILTIN_CMPNLEPD,
12569 IX86_BUILTIN_CMPNGTPD,
12570 IX86_BUILTIN_CMPNGEPD,
12571 IX86_BUILTIN_CMPORDPD,
12572 IX86_BUILTIN_CMPUNORDPD,
12573 IX86_BUILTIN_CMPNEPD,
12574 IX86_BUILTIN_CMPEQSD,
12575 IX86_BUILTIN_CMPLTSD,
12576 IX86_BUILTIN_CMPLESD,
12577 IX86_BUILTIN_CMPNEQSD,
12578 IX86_BUILTIN_CMPNLTSD,
12579 IX86_BUILTIN_CMPNLESD,
12580 IX86_BUILTIN_CMPORDSD,
12581 IX86_BUILTIN_CMPUNORDSD,
12582 IX86_BUILTIN_CMPNESD,
12584 IX86_BUILTIN_COMIEQSD,
12585 IX86_BUILTIN_COMILTSD,
12586 IX86_BUILTIN_COMILESD,
12587 IX86_BUILTIN_COMIGTSD,
12588 IX86_BUILTIN_COMIGESD,
12589 IX86_BUILTIN_COMINEQSD,
12590 IX86_BUILTIN_UCOMIEQSD,
12591 IX86_BUILTIN_UCOMILTSD,
12592 IX86_BUILTIN_UCOMILESD,
12593 IX86_BUILTIN_UCOMIGTSD,
12594 IX86_BUILTIN_UCOMIGESD,
12595 IX86_BUILTIN_UCOMINEQSD,
12597 IX86_BUILTIN_MAXPD,
12598 IX86_BUILTIN_MAXSD,
12599 IX86_BUILTIN_MINPD,
12600 IX86_BUILTIN_MINSD,
12602 IX86_BUILTIN_ANDPD,
12603 IX86_BUILTIN_ANDNPD,
12605 IX86_BUILTIN_XORPD,
12607 IX86_BUILTIN_SQRTPD,
12608 IX86_BUILTIN_SQRTSD,
12610 IX86_BUILTIN_UNPCKHPD,
12611 IX86_BUILTIN_UNPCKLPD,
12613 IX86_BUILTIN_SHUFPD,
12615 IX86_BUILTIN_LOADUPD,
12616 IX86_BUILTIN_STOREUPD,
12617 IX86_BUILTIN_MOVSD,
12619 IX86_BUILTIN_LOADHPD,
12620 IX86_BUILTIN_LOADLPD,
12622 IX86_BUILTIN_CVTDQ2PD,
12623 IX86_BUILTIN_CVTDQ2PS,
12625 IX86_BUILTIN_CVTPD2DQ,
12626 IX86_BUILTIN_CVTPD2PI,
12627 IX86_BUILTIN_CVTPD2PS,
12628 IX86_BUILTIN_CVTTPD2DQ,
12629 IX86_BUILTIN_CVTTPD2PI,
12631 IX86_BUILTIN_CVTPI2PD,
12632 IX86_BUILTIN_CVTSI2SD,
12633 IX86_BUILTIN_CVTSI642SD,
12635 IX86_BUILTIN_CVTSD2SI,
12636 IX86_BUILTIN_CVTSD2SI64,
12637 IX86_BUILTIN_CVTSD2SS,
12638 IX86_BUILTIN_CVTSS2SD,
12639 IX86_BUILTIN_CVTTSD2SI,
12640 IX86_BUILTIN_CVTTSD2SI64,
12642 IX86_BUILTIN_CVTPS2DQ,
12643 IX86_BUILTIN_CVTPS2PD,
12644 IX86_BUILTIN_CVTTPS2DQ,
12646 IX86_BUILTIN_MOVNTI,
12647 IX86_BUILTIN_MOVNTPD,
12648 IX86_BUILTIN_MOVNTDQ,
12651 IX86_BUILTIN_MASKMOVDQU,
12652 IX86_BUILTIN_MOVMSKPD,
12653 IX86_BUILTIN_PMOVMSKB128,
12655 IX86_BUILTIN_PACKSSWB128,
12656 IX86_BUILTIN_PACKSSDW128,
12657 IX86_BUILTIN_PACKUSWB128,
12659 IX86_BUILTIN_PADDB128,
12660 IX86_BUILTIN_PADDW128,
12661 IX86_BUILTIN_PADDD128,
12662 IX86_BUILTIN_PADDQ128,
12663 IX86_BUILTIN_PADDSB128,
12664 IX86_BUILTIN_PADDSW128,
12665 IX86_BUILTIN_PADDUSB128,
12666 IX86_BUILTIN_PADDUSW128,
12667 IX86_BUILTIN_PSUBB128,
12668 IX86_BUILTIN_PSUBW128,
12669 IX86_BUILTIN_PSUBD128,
12670 IX86_BUILTIN_PSUBQ128,
12671 IX86_BUILTIN_PSUBSB128,
12672 IX86_BUILTIN_PSUBSW128,
12673 IX86_BUILTIN_PSUBUSB128,
12674 IX86_BUILTIN_PSUBUSW128,
12676 IX86_BUILTIN_PAND128,
12677 IX86_BUILTIN_PANDN128,
12678 IX86_BUILTIN_POR128,
12679 IX86_BUILTIN_PXOR128,
12681 IX86_BUILTIN_PAVGB128,
12682 IX86_BUILTIN_PAVGW128,
12684 IX86_BUILTIN_PCMPEQB128,
12685 IX86_BUILTIN_PCMPEQW128,
12686 IX86_BUILTIN_PCMPEQD128,
12687 IX86_BUILTIN_PCMPGTB128,
12688 IX86_BUILTIN_PCMPGTW128,
12689 IX86_BUILTIN_PCMPGTD128,
12691 IX86_BUILTIN_PMADDWD128,
12693 IX86_BUILTIN_PMAXSW128,
12694 IX86_BUILTIN_PMAXUB128,
12695 IX86_BUILTIN_PMINSW128,
12696 IX86_BUILTIN_PMINUB128,
12698 IX86_BUILTIN_PMULUDQ,
12699 IX86_BUILTIN_PMULUDQ128,
12700 IX86_BUILTIN_PMULHUW128,
12701 IX86_BUILTIN_PMULHW128,
12702 IX86_BUILTIN_PMULLW128,
12704 IX86_BUILTIN_PSADBW128,
12705 IX86_BUILTIN_PSHUFHW,
12706 IX86_BUILTIN_PSHUFLW,
12707 IX86_BUILTIN_PSHUFD,
12709 IX86_BUILTIN_PSLLW128,
12710 IX86_BUILTIN_PSLLD128,
12711 IX86_BUILTIN_PSLLQ128,
12712 IX86_BUILTIN_PSRAW128,
12713 IX86_BUILTIN_PSRAD128,
12714 IX86_BUILTIN_PSRLW128,
12715 IX86_BUILTIN_PSRLD128,
12716 IX86_BUILTIN_PSRLQ128,
12717 IX86_BUILTIN_PSLLDQI128,
12718 IX86_BUILTIN_PSLLWI128,
12719 IX86_BUILTIN_PSLLDI128,
12720 IX86_BUILTIN_PSLLQI128,
12721 IX86_BUILTIN_PSRAWI128,
12722 IX86_BUILTIN_PSRADI128,
12723 IX86_BUILTIN_PSRLDQI128,
12724 IX86_BUILTIN_PSRLWI128,
12725 IX86_BUILTIN_PSRLDI128,
12726 IX86_BUILTIN_PSRLQI128,
12728 IX86_BUILTIN_PUNPCKHBW128,
12729 IX86_BUILTIN_PUNPCKHWD128,
12730 IX86_BUILTIN_PUNPCKHDQ128,
12731 IX86_BUILTIN_PUNPCKHQDQ128,
12732 IX86_BUILTIN_PUNPCKLBW128,
12733 IX86_BUILTIN_PUNPCKLWD128,
12734 IX86_BUILTIN_PUNPCKLDQ128,
12735 IX86_BUILTIN_PUNPCKLQDQ128,
12737 IX86_BUILTIN_CLFLUSH,
12738 IX86_BUILTIN_MFENCE,
12739 IX86_BUILTIN_LFENCE,
12741 /* Prescott New Instructions. */
12742 IX86_BUILTIN_ADDSUBPS,
12743 IX86_BUILTIN_HADDPS,
12744 IX86_BUILTIN_HSUBPS,
12745 IX86_BUILTIN_MOVSHDUP,
12746 IX86_BUILTIN_MOVSLDUP,
12747 IX86_BUILTIN_ADDSUBPD,
12748 IX86_BUILTIN_HADDPD,
12749 IX86_BUILTIN_HSUBPD,
12750 IX86_BUILTIN_LDDQU,
12752 IX86_BUILTIN_MONITOR,
12753 IX86_BUILTIN_MWAIT,
12755 IX86_BUILTIN_VEC_INIT_V2SI,
12756 IX86_BUILTIN_VEC_INIT_V4HI,
12757 IX86_BUILTIN_VEC_INIT_V8QI,
12758 IX86_BUILTIN_VEC_EXT_V2DF,
12759 IX86_BUILTIN_VEC_EXT_V2DI,
12760 IX86_BUILTIN_VEC_EXT_V4SF,
12761 IX86_BUILTIN_VEC_EXT_V4SI,
12762 IX86_BUILTIN_VEC_EXT_V8HI,
12763 IX86_BUILTIN_VEC_EXT_V2SI,
12764 IX86_BUILTIN_VEC_EXT_V4HI,
12765 IX86_BUILTIN_VEC_SET_V8HI,
12766 IX86_BUILTIN_VEC_SET_V4HI,
12771 #define def_builtin(MASK, NAME, TYPE, CODE) \
12773 if ((MASK) & target_flags \
12774 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12775 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12776 NULL, NULL_TREE); \
12779 /* Bits for builtin_description.flag. */
12781 /* Set when we don't support the comparison natively, and should
12782 swap_comparison in order to support it. */
12783 #define BUILTIN_DESC_SWAP_OPERANDS 1
12785 struct builtin_description
12787 const unsigned int mask;
12788 const enum insn_code icode;
12789 const char *const name;
12790 const enum ix86_builtins code;
12791 const enum rtx_code comparison;
12792 const unsigned int flag;
12795 static const struct builtin_description bdesc_comi[] =
12797 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12798 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12799 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12800 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12801 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12802 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12803 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12804 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12805 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12806 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12807 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12808 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12810 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12812 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12813 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12816 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12817 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12818 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12819 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12820 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12823 static const struct builtin_description bdesc_2arg[] =
12826 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12827 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12828 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12829 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12830 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12831 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12832 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12833 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12835 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12836 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12837 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12838 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
12839 BUILTIN_DESC_SWAP_OPERANDS },
12840 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
12841 BUILTIN_DESC_SWAP_OPERANDS },
12842 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12843 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
12844 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
12845 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
12846 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
12847 BUILTIN_DESC_SWAP_OPERANDS },
12848 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
12849 BUILTIN_DESC_SWAP_OPERANDS },
12850 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
12851 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12852 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12853 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12854 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12855 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
12856 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
12857 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
12858 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
12859 BUILTIN_DESC_SWAP_OPERANDS },
12860 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
12861 BUILTIN_DESC_SWAP_OPERANDS },
12862 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12864 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12865 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12866 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12867 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12869 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12870 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12871 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12872 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12874 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12875 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12876 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12877 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12878 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12881 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12882 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12883 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12884 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12885 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12886 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12887 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12888 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12890 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12891 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12892 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12893 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12894 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12895 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12896 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12897 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12899 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12900 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12901 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12903 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12904 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12905 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12906 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12908 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12909 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12911 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12912 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12913 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12914 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12915 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12916 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12918 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12919 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12920 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12921 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12923 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12924 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12925 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12926 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12927 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12928 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12931 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12932 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12933 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12935 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12936 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12937 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12939 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12940 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12941 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12942 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12943 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12944 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12946 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12947 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12948 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12949 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12950 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12951 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12953 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12954 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12955 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12956 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12958 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12959 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12963 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12964 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12972 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12973 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12974 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
12975 BUILTIN_DESC_SWAP_OPERANDS },
12976 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
12977 BUILTIN_DESC_SWAP_OPERANDS },
12978 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
12980 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
12982 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
12983 BUILTIN_DESC_SWAP_OPERANDS },
12984 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
12985 BUILTIN_DESC_SWAP_OPERANDS },
12986 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12989 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12990 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12991 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
12992 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
12993 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
12994 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
12996 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13020 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13021 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13022 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13023 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13024 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13025 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13026 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13027 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13071 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13072 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13073 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13075 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13076 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13077 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13079 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13080 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13082 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13084 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13085 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13087 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13090 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13091 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13092 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13093 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13094 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13095 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13098 static const struct builtin_description bdesc_1arg[] =
13100 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13101 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13103 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13104 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13105 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13107 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13108 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13109 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13110 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13111 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13112 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13115 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13117 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13120 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13126 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13128 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13130 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13131 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13132 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13133 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13135 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13136 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13137 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13140 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13141 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13145 ix86_init_builtins (void)
13148 ix86_init_mmx_sse_builtins ();
13151 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13152 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13155 ix86_init_mmx_sse_builtins (void)
13157 const struct builtin_description * d;
13160 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13161 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13162 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13163 tree V2DI_type_node
13164 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13165 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13166 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13167 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13168 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13169 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13170 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13172 tree pchar_type_node = build_pointer_type (char_type_node);
13173 tree pcchar_type_node = build_pointer_type (
13174 build_type_variant (char_type_node, 1, 0));
13175 tree pfloat_type_node = build_pointer_type (float_type_node);
13176 tree pcfloat_type_node = build_pointer_type (
13177 build_type_variant (float_type_node, 1, 0));
13178 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13179 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13180 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13183 tree int_ftype_v4sf_v4sf
13184 = build_function_type_list (integer_type_node,
13185 V4SF_type_node, V4SF_type_node, NULL_TREE);
13186 tree v4si_ftype_v4sf_v4sf
13187 = build_function_type_list (V4SI_type_node,
13188 V4SF_type_node, V4SF_type_node, NULL_TREE);
13189 /* MMX/SSE/integer conversions. */
13190 tree int_ftype_v4sf
13191 = build_function_type_list (integer_type_node,
13192 V4SF_type_node, NULL_TREE);
13193 tree int64_ftype_v4sf
13194 = build_function_type_list (long_long_integer_type_node,
13195 V4SF_type_node, NULL_TREE);
13196 tree int_ftype_v8qi
13197 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13198 tree v4sf_ftype_v4sf_int
13199 = build_function_type_list (V4SF_type_node,
13200 V4SF_type_node, integer_type_node, NULL_TREE);
13201 tree v4sf_ftype_v4sf_int64
13202 = build_function_type_list (V4SF_type_node,
13203 V4SF_type_node, long_long_integer_type_node,
13205 tree v4sf_ftype_v4sf_v2si
13206 = build_function_type_list (V4SF_type_node,
13207 V4SF_type_node, V2SI_type_node, NULL_TREE);
13209 /* Miscellaneous. */
13210 tree v8qi_ftype_v4hi_v4hi
13211 = build_function_type_list (V8QI_type_node,
13212 V4HI_type_node, V4HI_type_node, NULL_TREE);
13213 tree v4hi_ftype_v2si_v2si
13214 = build_function_type_list (V4HI_type_node,
13215 V2SI_type_node, V2SI_type_node, NULL_TREE);
13216 tree v4sf_ftype_v4sf_v4sf_int
13217 = build_function_type_list (V4SF_type_node,
13218 V4SF_type_node, V4SF_type_node,
13219 integer_type_node, NULL_TREE);
13220 tree v2si_ftype_v4hi_v4hi
13221 = build_function_type_list (V2SI_type_node,
13222 V4HI_type_node, V4HI_type_node, NULL_TREE);
13223 tree v4hi_ftype_v4hi_int
13224 = build_function_type_list (V4HI_type_node,
13225 V4HI_type_node, integer_type_node, NULL_TREE);
13226 tree v4hi_ftype_v4hi_di
13227 = build_function_type_list (V4HI_type_node,
13228 V4HI_type_node, long_long_unsigned_type_node,
13230 tree v2si_ftype_v2si_di
13231 = build_function_type_list (V2SI_type_node,
13232 V2SI_type_node, long_long_unsigned_type_node,
13234 tree void_ftype_void
13235 = build_function_type (void_type_node, void_list_node);
13236 tree void_ftype_unsigned
13237 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13238 tree void_ftype_unsigned_unsigned
13239 = build_function_type_list (void_type_node, unsigned_type_node,
13240 unsigned_type_node, NULL_TREE);
13241 tree void_ftype_pcvoid_unsigned_unsigned
13242 = build_function_type_list (void_type_node, const_ptr_type_node,
13243 unsigned_type_node, unsigned_type_node,
13245 tree unsigned_ftype_void
13246 = build_function_type (unsigned_type_node, void_list_node);
13247 tree v2si_ftype_v4sf
13248 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13249 /* Loads/stores. */
13250 tree void_ftype_v8qi_v8qi_pchar
13251 = build_function_type_list (void_type_node,
13252 V8QI_type_node, V8QI_type_node,
13253 pchar_type_node, NULL_TREE);
13254 tree v4sf_ftype_pcfloat
13255 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13256 /* @@@ the type is bogus */
13257 tree v4sf_ftype_v4sf_pv2si
13258 = build_function_type_list (V4SF_type_node,
13259 V4SF_type_node, pv2si_type_node, NULL_TREE);
13260 tree void_ftype_pv2si_v4sf
13261 = build_function_type_list (void_type_node,
13262 pv2si_type_node, V4SF_type_node, NULL_TREE);
13263 tree void_ftype_pfloat_v4sf
13264 = build_function_type_list (void_type_node,
13265 pfloat_type_node, V4SF_type_node, NULL_TREE);
13266 tree void_ftype_pdi_di
13267 = build_function_type_list (void_type_node,
13268 pdi_type_node, long_long_unsigned_type_node,
13270 tree void_ftype_pv2di_v2di
13271 = build_function_type_list (void_type_node,
13272 pv2di_type_node, V2DI_type_node, NULL_TREE);
13273 /* Normal vector unops. */
13274 tree v4sf_ftype_v4sf
13275 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13277 /* Normal vector binops. */
13278 tree v4sf_ftype_v4sf_v4sf
13279 = build_function_type_list (V4SF_type_node,
13280 V4SF_type_node, V4SF_type_node, NULL_TREE);
13281 tree v8qi_ftype_v8qi_v8qi
13282 = build_function_type_list (V8QI_type_node,
13283 V8QI_type_node, V8QI_type_node, NULL_TREE);
13284 tree v4hi_ftype_v4hi_v4hi
13285 = build_function_type_list (V4HI_type_node,
13286 V4HI_type_node, V4HI_type_node, NULL_TREE);
13287 tree v2si_ftype_v2si_v2si
13288 = build_function_type_list (V2SI_type_node,
13289 V2SI_type_node, V2SI_type_node, NULL_TREE);
13290 tree di_ftype_di_di
13291 = build_function_type_list (long_long_unsigned_type_node,
13292 long_long_unsigned_type_node,
13293 long_long_unsigned_type_node, NULL_TREE);
13295 tree v2si_ftype_v2sf
13296 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13297 tree v2sf_ftype_v2si
13298 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13299 tree v2si_ftype_v2si
13300 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13301 tree v2sf_ftype_v2sf
13302 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13303 tree v2sf_ftype_v2sf_v2sf
13304 = build_function_type_list (V2SF_type_node,
13305 V2SF_type_node, V2SF_type_node, NULL_TREE);
13306 tree v2si_ftype_v2sf_v2sf
13307 = build_function_type_list (V2SI_type_node,
13308 V2SF_type_node, V2SF_type_node, NULL_TREE);
13309 tree pint_type_node = build_pointer_type (integer_type_node);
13310 tree pdouble_type_node = build_pointer_type (double_type_node);
13311 tree pcdouble_type_node = build_pointer_type (
13312 build_type_variant (double_type_node, 1, 0));
13313 tree int_ftype_v2df_v2df
13314 = build_function_type_list (integer_type_node,
13315 V2DF_type_node, V2DF_type_node, NULL_TREE);
13317 tree ti_ftype_ti_ti
13318 = build_function_type_list (intTI_type_node,
13319 intTI_type_node, intTI_type_node, NULL_TREE);
13320 tree void_ftype_pcvoid
13321 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13322 tree v4sf_ftype_v4si
13323 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13324 tree v4si_ftype_v4sf
13325 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13326 tree v2df_ftype_v4si
13327 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13328 tree v4si_ftype_v2df
13329 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13330 tree v2si_ftype_v2df
13331 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13332 tree v4sf_ftype_v2df
13333 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13334 tree v2df_ftype_v2si
13335 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13336 tree v2df_ftype_v4sf
13337 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13338 tree int_ftype_v2df
13339 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13340 tree int64_ftype_v2df
13341 = build_function_type_list (long_long_integer_type_node,
13342 V2DF_type_node, NULL_TREE);
13343 tree v2df_ftype_v2df_int
13344 = build_function_type_list (V2DF_type_node,
13345 V2DF_type_node, integer_type_node, NULL_TREE);
13346 tree v2df_ftype_v2df_int64
13347 = build_function_type_list (V2DF_type_node,
13348 V2DF_type_node, long_long_integer_type_node,
13350 tree v4sf_ftype_v4sf_v2df
13351 = build_function_type_list (V4SF_type_node,
13352 V4SF_type_node, V2DF_type_node, NULL_TREE);
13353 tree v2df_ftype_v2df_v4sf
13354 = build_function_type_list (V2DF_type_node,
13355 V2DF_type_node, V4SF_type_node, NULL_TREE);
13356 tree v2df_ftype_v2df_v2df_int
13357 = build_function_type_list (V2DF_type_node,
13358 V2DF_type_node, V2DF_type_node,
13361 tree v2df_ftype_v2df_pcdouble
13362 = build_function_type_list (V2DF_type_node,
13363 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13364 tree void_ftype_pdouble_v2df
13365 = build_function_type_list (void_type_node,
13366 pdouble_type_node, V2DF_type_node, NULL_TREE);
13367 tree void_ftype_pint_int
13368 = build_function_type_list (void_type_node,
13369 pint_type_node, integer_type_node, NULL_TREE);
13370 tree void_ftype_v16qi_v16qi_pchar
13371 = build_function_type_list (void_type_node,
13372 V16QI_type_node, V16QI_type_node,
13373 pchar_type_node, NULL_TREE);
13374 tree v2df_ftype_pcdouble
13375 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13376 tree v2df_ftype_v2df_v2df
13377 = build_function_type_list (V2DF_type_node,
13378 V2DF_type_node, V2DF_type_node, NULL_TREE);
13379 tree v16qi_ftype_v16qi_v16qi
13380 = build_function_type_list (V16QI_type_node,
13381 V16QI_type_node, V16QI_type_node, NULL_TREE);
13382 tree v8hi_ftype_v8hi_v8hi
13383 = build_function_type_list (V8HI_type_node,
13384 V8HI_type_node, V8HI_type_node, NULL_TREE);
13385 tree v4si_ftype_v4si_v4si
13386 = build_function_type_list (V4SI_type_node,
13387 V4SI_type_node, V4SI_type_node, NULL_TREE);
13388 tree v2di_ftype_v2di_v2di
13389 = build_function_type_list (V2DI_type_node,
13390 V2DI_type_node, V2DI_type_node, NULL_TREE);
13391 tree v2di_ftype_v2df_v2df
13392 = build_function_type_list (V2DI_type_node,
13393 V2DF_type_node, V2DF_type_node, NULL_TREE);
13394 tree v2df_ftype_v2df
13395 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13396 tree v2di_ftype_v2di_int
13397 = build_function_type_list (V2DI_type_node,
13398 V2DI_type_node, integer_type_node, NULL_TREE);
13399 tree v4si_ftype_v4si_int
13400 = build_function_type_list (V4SI_type_node,
13401 V4SI_type_node, integer_type_node, NULL_TREE);
13402 tree v8hi_ftype_v8hi_int
13403 = build_function_type_list (V8HI_type_node,
13404 V8HI_type_node, integer_type_node, NULL_TREE);
13405 tree v8hi_ftype_v8hi_v2di
13406 = build_function_type_list (V8HI_type_node,
13407 V8HI_type_node, V2DI_type_node, NULL_TREE);
13408 tree v4si_ftype_v4si_v2di
13409 = build_function_type_list (V4SI_type_node,
13410 V4SI_type_node, V2DI_type_node, NULL_TREE);
13411 tree v4si_ftype_v8hi_v8hi
13412 = build_function_type_list (V4SI_type_node,
13413 V8HI_type_node, V8HI_type_node, NULL_TREE);
13414 tree di_ftype_v8qi_v8qi
13415 = build_function_type_list (long_long_unsigned_type_node,
13416 V8QI_type_node, V8QI_type_node, NULL_TREE);
13417 tree di_ftype_v2si_v2si
13418 = build_function_type_list (long_long_unsigned_type_node,
13419 V2SI_type_node, V2SI_type_node, NULL_TREE);
13420 tree v2di_ftype_v16qi_v16qi
13421 = build_function_type_list (V2DI_type_node,
13422 V16QI_type_node, V16QI_type_node, NULL_TREE);
13423 tree v2di_ftype_v4si_v4si
13424 = build_function_type_list (V2DI_type_node,
13425 V4SI_type_node, V4SI_type_node, NULL_TREE);
13426 tree int_ftype_v16qi
13427 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13428 tree v16qi_ftype_pcchar
13429 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13430 tree void_ftype_pchar_v16qi
13431 = build_function_type_list (void_type_node,
13432 pchar_type_node, V16QI_type_node, NULL_TREE);
13435 tree float128_type;
13438 /* The __float80 type. */
13439 if (TYPE_MODE (long_double_type_node) == XFmode)
13440 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13444 /* The __float80 type. */
13445 float80_type = make_node (REAL_TYPE);
13446 TYPE_PRECISION (float80_type) = 80;
13447 layout_type (float80_type);
13448 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13451 float128_type = make_node (REAL_TYPE);
13452 TYPE_PRECISION (float128_type) = 128;
13453 layout_type (float128_type);
13454 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13456 /* Add all builtins that are more or less simple operations on two
13458 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13460 /* Use one of the operands; the target can have a different mode for
13461 mask-generating compares. */
13462 enum machine_mode mode;
13467 mode = insn_data[d->icode].operand[1].mode;
13472 type = v16qi_ftype_v16qi_v16qi;
13475 type = v8hi_ftype_v8hi_v8hi;
13478 type = v4si_ftype_v4si_v4si;
13481 type = v2di_ftype_v2di_v2di;
13484 type = v2df_ftype_v2df_v2df;
13487 type = ti_ftype_ti_ti;
13490 type = v4sf_ftype_v4sf_v4sf;
13493 type = v8qi_ftype_v8qi_v8qi;
13496 type = v4hi_ftype_v4hi_v4hi;
13499 type = v2si_ftype_v2si_v2si;
13502 type = di_ftype_di_di;
13509 /* Override for comparisons. */
13510 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13511 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13512 type = v4si_ftype_v4sf_v4sf;
13514 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13515 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13516 type = v2di_ftype_v2df_v2df;
13518 def_builtin (d->mask, d->name, type, d->code);
13521 /* Add the remaining MMX insns with somewhat more complicated types. */
13522 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13523 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13524 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13525 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13527 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13528 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13529 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13531 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13532 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13534 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13535 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13537 /* comi/ucomi insns. */
13538 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13539 if (d->mask == MASK_SSE2)
13540 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13542 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13544 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13545 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13546 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13548 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13549 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13550 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13551 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13552 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13553 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13554 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13555 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13556 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13557 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13558 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13560 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13562 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13563 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13565 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13566 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13567 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13568 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13570 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13571 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13572 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13573 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13575 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13577 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13579 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13580 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13581 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13582 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13583 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13584 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13586 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13588 /* Original 3DNow! */
13589 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13590 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13591 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13592 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13593 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13594 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13595 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13596 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13597 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13598 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13599 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13600 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13601 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13602 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13603 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13604 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13605 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13606 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13607 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13608 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13610 /* 3DNow! extension as used in the Athlon CPU. */
13611 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13612 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13613 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13614 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13615 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13616 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13644 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13647 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13648 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13656 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13657 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13664 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13697 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13704 /* Prescott New Instructions. */
13705 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13706 void_ftype_pcvoid_unsigned_unsigned,
13707 IX86_BUILTIN_MONITOR);
13708 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13709 void_ftype_unsigned_unsigned,
13710 IX86_BUILTIN_MWAIT);
13711 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13713 IX86_BUILTIN_MOVSHDUP);
13714 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13716 IX86_BUILTIN_MOVSLDUP);
13717 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13718 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13720 /* Access to the vec_init patterns. */
13721 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
13722 integer_type_node, NULL_TREE);
13723 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
13724 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
13726 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
13727 short_integer_type_node,
13728 short_integer_type_node,
13729 short_integer_type_node, NULL_TREE);
13730 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
13731 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
13733 ftype = build_function_type_list (V8QI_type_node, char_type_node,
13734 char_type_node, char_type_node,
13735 char_type_node, char_type_node,
13736 char_type_node, char_type_node,
13737 char_type_node, NULL_TREE);
13738 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
13739 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
13741 /* Access to the vec_extract patterns. */
13742 ftype = build_function_type_list (double_type_node, V2DF_type_node,
13743 integer_type_node, NULL_TREE);
13744 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
13745 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
13747 ftype = build_function_type_list (long_long_integer_type_node,
13748 V2DI_type_node, integer_type_node,
13750 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
13751 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
13753 ftype = build_function_type_list (float_type_node, V4SF_type_node,
13754 integer_type_node, NULL_TREE);
13755 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
13756 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
13758 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
13759 integer_type_node, NULL_TREE);
13760 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
13761 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
13763 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
13764 integer_type_node, NULL_TREE);
13765 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
13766 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
13768 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
13769 integer_type_node, NULL_TREE);
13770 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
13771 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
13773 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
13774 integer_type_node, NULL_TREE);
13775 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
13776 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
13778 /* Access to the vec_set patterns. */
13779 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
13781 integer_type_node, NULL_TREE);
13782 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
13783 ftype, IX86_BUILTIN_VEC_SET_V8HI);
13785 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
13787 integer_type_node, NULL_TREE);
13788 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
13789 ftype, IX86_BUILTIN_VEC_SET_V4HI);
13792 /* Errors in the source file can cause expand_expr to return const0_rtx
13793 where we expect a vector. To avoid crashing, use one of the vector
13794 clear instructions. */
13796 safe_vector_operand (rtx x, enum machine_mode mode)
13798 if (x == const0_rtx)
13799 x = CONST0_RTX (mode);
13803 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13806 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13809 tree arg0 = TREE_VALUE (arglist);
13810 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13811 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13812 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13813 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13814 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13815 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13817 if (VECTOR_MODE_P (mode0))
13818 op0 = safe_vector_operand (op0, mode0);
13819 if (VECTOR_MODE_P (mode1))
13820 op1 = safe_vector_operand (op1, mode1);
13822 if (optimize || !target
13823 || GET_MODE (target) != tmode
13824 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13825 target = gen_reg_rtx (tmode);
13827 if (GET_MODE (op1) == SImode && mode1 == TImode)
13829 rtx x = gen_reg_rtx (V4SImode);
13830 emit_insn (gen_sse2_loadd (x, op1));
13831 op1 = gen_lowpart (TImode, x);
13834 /* In case the insn wants input operands in modes different from
13835 the result, abort. */
13836 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13837 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13840 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13841 op0 = copy_to_mode_reg (mode0, op0);
13842 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13843 op1 = copy_to_mode_reg (mode1, op1);
13845 /* ??? Using ix86_fixup_binary_operands is problematic when
13846 we've got mismatched modes. Fake it. */
13852 if (tmode == mode0 && tmode == mode1)
13854 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
13858 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
13860 op0 = force_reg (mode0, op0);
13861 op1 = force_reg (mode1, op1);
13862 target = gen_reg_rtx (tmode);
13865 pat = GEN_FCN (icode) (target, op0, op1);
13872 /* Subroutine of ix86_expand_builtin to take care of stores. */
13875 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13878 tree arg0 = TREE_VALUE (arglist);
13879 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13880 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13881 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13882 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13883 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13885 if (VECTOR_MODE_P (mode1))
13886 op1 = safe_vector_operand (op1, mode1);
13888 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13889 op1 = copy_to_mode_reg (mode1, op1);
13891 pat = GEN_FCN (icode) (op0, op1);
13897 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13900 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13901 rtx target, int do_load)
13904 tree arg0 = TREE_VALUE (arglist);
13905 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13906 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13907 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13909 if (optimize || !target
13910 || GET_MODE (target) != tmode
13911 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13912 target = gen_reg_rtx (tmode);
13914 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13917 if (VECTOR_MODE_P (mode0))
13918 op0 = safe_vector_operand (op0, mode0);
13920 if ((optimize && !register_operand (op0, mode0))
13921 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13922 op0 = copy_to_mode_reg (mode0, op0);
13925 pat = GEN_FCN (icode) (target, op0);
13932 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13933 sqrtss, rsqrtss, rcpss. */
13936 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13939 tree arg0 = TREE_VALUE (arglist);
13940 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13941 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13942 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13944 if (optimize || !target
13945 || GET_MODE (target) != tmode
13946 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13947 target = gen_reg_rtx (tmode);
13949 if (VECTOR_MODE_P (mode0))
13950 op0 = safe_vector_operand (op0, mode0);
13952 if ((optimize && !register_operand (op0, mode0))
13953 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13954 op0 = copy_to_mode_reg (mode0, op0);
13957 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13958 op1 = copy_to_mode_reg (mode0, op1);
13960 pat = GEN_FCN (icode) (target, op0, op1);
13967 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13970 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13974 tree arg0 = TREE_VALUE (arglist);
13975 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13976 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13977 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13979 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13980 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13981 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13982 enum rtx_code comparison = d->comparison;
13984 if (VECTOR_MODE_P (mode0))
13985 op0 = safe_vector_operand (op0, mode0);
13986 if (VECTOR_MODE_P (mode1))
13987 op1 = safe_vector_operand (op1, mode1);
13989 /* Swap operands if we have a comparison that isn't available in
13991 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
13993 rtx tmp = gen_reg_rtx (mode1);
13994 emit_move_insn (tmp, op1);
13999 if (optimize || !target
14000 || GET_MODE (target) != tmode
14001 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14002 target = gen_reg_rtx (tmode);
14004 if ((optimize && !register_operand (op0, mode0))
14005 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14006 op0 = copy_to_mode_reg (mode0, op0);
14007 if ((optimize && !register_operand (op1, mode1))
14008 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14009 op1 = copy_to_mode_reg (mode1, op1);
14011 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14012 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14019 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14022 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14026 tree arg0 = TREE_VALUE (arglist);
14027 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14028 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14029 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14031 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14032 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14033 enum rtx_code comparison = d->comparison;
14035 if (VECTOR_MODE_P (mode0))
14036 op0 = safe_vector_operand (op0, mode0);
14037 if (VECTOR_MODE_P (mode1))
14038 op1 = safe_vector_operand (op1, mode1);
14040 /* Swap operands if we have a comparison that isn't available in
14042 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14049 target = gen_reg_rtx (SImode);
14050 emit_move_insn (target, const0_rtx);
14051 target = gen_rtx_SUBREG (QImode, target, 0);
14053 if ((optimize && !register_operand (op0, mode0))
14054 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14055 op0 = copy_to_mode_reg (mode0, op0);
14056 if ((optimize && !register_operand (op1, mode1))
14057 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14058 op1 = copy_to_mode_reg (mode1, op1);
14060 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14061 pat = GEN_FCN (d->icode) (op0, op1);
14065 emit_insn (gen_rtx_SET (VOIDmode,
14066 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14067 gen_rtx_fmt_ee (comparison, QImode,
14071 return SUBREG_REG (target);
14074 /* Return the integer constant in ARG. Constrain it to be in the range
14075 of the subparts of VEC_TYPE; issue an error if not. */
14078 get_element_number (tree vec_type, tree arg)
14080 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14082 if (!host_integerp (arg, 1)
14083 || (elt = tree_low_cst (arg, 1), elt > max))
14085 error ("selector must be an integer constant in the range 0..%i", max);
14092 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14093 ix86_expand_vector_init. We DO have language-level syntax for this, in
14094 the form of (type){ init-list }. Except that since we can't place emms
14095 instructions from inside the compiler, we can't allow the use of MMX
14096 registers unless the user explicitly asks for it. So we do *not* define
14097 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14098 we have builtins invoked by mmintrin.h that gives us license to emit
14099 these sorts of instructions. */
14102 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14104 enum machine_mode tmode = TYPE_MODE (type);
14105 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14106 int i, n_elt = GET_MODE_NUNITS (tmode);
14107 rtvec v = rtvec_alloc (n_elt);
14109 gcc_assert (VECTOR_MODE_P (tmode));
14111 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14113 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14114 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14117 gcc_assert (arglist == NULL);
14119 if (!target || !register_operand (target, tmode))
14120 target = gen_reg_rtx (tmode);
14122 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14126 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14127 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14128 had a language-level syntax for referencing vector elements. */
14131 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14133 enum machine_mode tmode, mode0;
14138 arg0 = TREE_VALUE (arglist);
14139 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14141 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14142 elt = get_element_number (TREE_TYPE (arg0), arg1);
14144 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14145 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14146 gcc_assert (VECTOR_MODE_P (mode0));
14148 op0 = force_reg (mode0, op0);
14150 if (optimize || !target || !register_operand (target, tmode))
14151 target = gen_reg_rtx (tmode);
14153 ix86_expand_vector_extract (true, target, op0, elt);
14158 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14159 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14160 a language-level syntax for referencing vector elements. */
14163 ix86_expand_vec_set_builtin (tree arglist)
14165 enum machine_mode tmode, mode1;
14166 tree arg0, arg1, arg2;
14170 arg0 = TREE_VALUE (arglist);
14171 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14172 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14174 tmode = TYPE_MODE (TREE_TYPE (arg0));
14175 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14176 gcc_assert (VECTOR_MODE_P (tmode));
14178 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14179 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14180 elt = get_element_number (TREE_TYPE (arg0), arg2);
14182 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14183 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14185 op0 = force_reg (tmode, op0);
14186 op1 = force_reg (mode1, op1);
14188 ix86_expand_vector_set (true, op0, op1, elt);
14193 /* Expand an expression EXP that calls a built-in function,
14194 with result going to TARGET if that's convenient
14195 (and in mode MODE if that's convenient).
14196 SUBTARGET may be used as the target for computing one of EXP's operands.
14197 IGNORE is nonzero if the value is to be ignored. */
14200 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14201 enum machine_mode mode ATTRIBUTE_UNUSED,
14202 int ignore ATTRIBUTE_UNUSED)
14204 const struct builtin_description *d;
14206 enum insn_code icode;
14207 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14208 tree arglist = TREE_OPERAND (exp, 1);
14209 tree arg0, arg1, arg2;
14210 rtx op0, op1, op2, pat;
14211 enum machine_mode tmode, mode0, mode1, mode2;
14212 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14216 case IX86_BUILTIN_EMMS:
14217 emit_insn (gen_mmx_emms ());
14220 case IX86_BUILTIN_SFENCE:
14221 emit_insn (gen_sse_sfence ());
14224 case IX86_BUILTIN_MASKMOVQ:
14225 case IX86_BUILTIN_MASKMOVDQU:
14226 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14227 ? CODE_FOR_mmx_maskmovq
14228 : CODE_FOR_sse2_maskmovdqu);
14229 /* Note the arg order is different from the operand order. */
14230 arg1 = TREE_VALUE (arglist);
14231 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14232 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14233 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14234 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14235 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14236 mode0 = insn_data[icode].operand[0].mode;
14237 mode1 = insn_data[icode].operand[1].mode;
14238 mode2 = insn_data[icode].operand[2].mode;
14240 op0 = force_reg (Pmode, op0);
14241 op0 = gen_rtx_MEM (mode1, op0);
14243 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14244 op0 = copy_to_mode_reg (mode0, op0);
14245 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14246 op1 = copy_to_mode_reg (mode1, op1);
14247 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14248 op2 = copy_to_mode_reg (mode2, op2);
14249 pat = GEN_FCN (icode) (op0, op1, op2);
14255 case IX86_BUILTIN_SQRTSS:
14256 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14257 case IX86_BUILTIN_RSQRTSS:
14258 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14259 case IX86_BUILTIN_RCPSS:
14260 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14262 case IX86_BUILTIN_LOADUPS:
14263 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14265 case IX86_BUILTIN_STOREUPS:
14266 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14268 case IX86_BUILTIN_LOADHPS:
14269 case IX86_BUILTIN_LOADLPS:
14270 case IX86_BUILTIN_LOADHPD:
14271 case IX86_BUILTIN_LOADLPD:
14272 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14273 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14274 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14275 : CODE_FOR_sse2_loadlpd);
14276 arg0 = TREE_VALUE (arglist);
14277 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14278 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14279 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14280 tmode = insn_data[icode].operand[0].mode;
14281 mode0 = insn_data[icode].operand[1].mode;
14282 mode1 = insn_data[icode].operand[2].mode;
14284 op0 = force_reg (mode0, op0);
14285 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14286 if (optimize || target == 0
14287 || GET_MODE (target) != tmode
14288 || !register_operand (target, tmode))
14289 target = gen_reg_rtx (tmode);
14290 pat = GEN_FCN (icode) (target, op0, op1);
14296 case IX86_BUILTIN_STOREHPS:
14297 case IX86_BUILTIN_STORELPS:
14298 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14299 : CODE_FOR_sse_storelps);
14300 arg0 = TREE_VALUE (arglist);
14301 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14302 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14303 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14304 mode0 = insn_data[icode].operand[0].mode;
14305 mode1 = insn_data[icode].operand[1].mode;
14307 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14308 op1 = force_reg (mode1, op1);
14310 pat = GEN_FCN (icode) (op0, op1);
14316 case IX86_BUILTIN_MOVNTPS:
14317 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14318 case IX86_BUILTIN_MOVNTQ:
14319 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14321 case IX86_BUILTIN_LDMXCSR:
14322 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14323 target = assign_386_stack_local (SImode, 0);
14324 emit_move_insn (target, op0);
14325 emit_insn (gen_sse_ldmxcsr (target));
14328 case IX86_BUILTIN_STMXCSR:
14329 target = assign_386_stack_local (SImode, 0);
14330 emit_insn (gen_sse_stmxcsr (target));
14331 return copy_to_mode_reg (SImode, target);
14333 case IX86_BUILTIN_SHUFPS:
14334 case IX86_BUILTIN_SHUFPD:
14335 icode = (fcode == IX86_BUILTIN_SHUFPS
14336 ? CODE_FOR_sse_shufps
14337 : CODE_FOR_sse2_shufpd);
14338 arg0 = TREE_VALUE (arglist);
14339 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14340 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14341 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14342 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14343 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14344 tmode = insn_data[icode].operand[0].mode;
14345 mode0 = insn_data[icode].operand[1].mode;
14346 mode1 = insn_data[icode].operand[2].mode;
14347 mode2 = insn_data[icode].operand[3].mode;
14349 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14350 op0 = copy_to_mode_reg (mode0, op0);
14351 if ((optimize && !register_operand (op1, mode1))
14352 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14353 op1 = copy_to_mode_reg (mode1, op1);
14354 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14356 /* @@@ better error message */
14357 error ("mask must be an immediate");
14358 return gen_reg_rtx (tmode);
14360 if (optimize || target == 0
14361 || GET_MODE (target) != tmode
14362 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14363 target = gen_reg_rtx (tmode);
14364 pat = GEN_FCN (icode) (target, op0, op1, op2);
14370 case IX86_BUILTIN_PSHUFW:
14371 case IX86_BUILTIN_PSHUFD:
14372 case IX86_BUILTIN_PSHUFHW:
14373 case IX86_BUILTIN_PSHUFLW:
14374 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14375 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14376 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14377 : CODE_FOR_mmx_pshufw);
14378 arg0 = TREE_VALUE (arglist);
14379 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14380 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14381 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14382 tmode = insn_data[icode].operand[0].mode;
14383 mode1 = insn_data[icode].operand[1].mode;
14384 mode2 = insn_data[icode].operand[2].mode;
14386 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14387 op0 = copy_to_mode_reg (mode1, op0);
14388 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14390 /* @@@ better error message */
14391 error ("mask must be an immediate");
14395 || GET_MODE (target) != tmode
14396 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14397 target = gen_reg_rtx (tmode);
14398 pat = GEN_FCN (icode) (target, op0, op1);
14404 case IX86_BUILTIN_PSLLDQI128:
14405 case IX86_BUILTIN_PSRLDQI128:
14406 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14407 : CODE_FOR_sse2_lshrti3);
14408 arg0 = TREE_VALUE (arglist);
14409 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14410 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14411 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14412 tmode = insn_data[icode].operand[0].mode;
14413 mode1 = insn_data[icode].operand[1].mode;
14414 mode2 = insn_data[icode].operand[2].mode;
14416 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14418 op0 = copy_to_reg (op0);
14419 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14421 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14423 error ("shift must be an immediate");
14426 target = gen_reg_rtx (V2DImode);
14427 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14433 case IX86_BUILTIN_FEMMS:
14434 emit_insn (gen_mmx_femms ());
14437 case IX86_BUILTIN_PAVGUSB:
14438 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14440 case IX86_BUILTIN_PF2ID:
14441 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14443 case IX86_BUILTIN_PFACC:
14444 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14446 case IX86_BUILTIN_PFADD:
14447 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14449 case IX86_BUILTIN_PFCMPEQ:
14450 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14452 case IX86_BUILTIN_PFCMPGE:
14453 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14455 case IX86_BUILTIN_PFCMPGT:
14456 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14458 case IX86_BUILTIN_PFMAX:
14459 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14461 case IX86_BUILTIN_PFMIN:
14462 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14464 case IX86_BUILTIN_PFMUL:
14465 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14467 case IX86_BUILTIN_PFRCP:
14468 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14470 case IX86_BUILTIN_PFRCPIT1:
14471 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14473 case IX86_BUILTIN_PFRCPIT2:
14474 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14476 case IX86_BUILTIN_PFRSQIT1:
14477 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14479 case IX86_BUILTIN_PFRSQRT:
14480 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14482 case IX86_BUILTIN_PFSUB:
14483 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14485 case IX86_BUILTIN_PFSUBR:
14486 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14488 case IX86_BUILTIN_PI2FD:
14489 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14491 case IX86_BUILTIN_PMULHRW:
14492 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14494 case IX86_BUILTIN_PF2IW:
14495 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14497 case IX86_BUILTIN_PFNACC:
14498 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14500 case IX86_BUILTIN_PFPNACC:
14501 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14503 case IX86_BUILTIN_PI2FW:
14504 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14506 case IX86_BUILTIN_PSWAPDSI:
14507 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14509 case IX86_BUILTIN_PSWAPDSF:
14510 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14512 case IX86_BUILTIN_SQRTSD:
14513 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14514 case IX86_BUILTIN_LOADUPD:
14515 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14516 case IX86_BUILTIN_STOREUPD:
14517 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14519 case IX86_BUILTIN_MFENCE:
14520 emit_insn (gen_sse2_mfence ());
14522 case IX86_BUILTIN_LFENCE:
14523 emit_insn (gen_sse2_lfence ());
14526 case IX86_BUILTIN_CLFLUSH:
14527 arg0 = TREE_VALUE (arglist);
14528 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14529 icode = CODE_FOR_sse2_clflush;
14530 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14531 op0 = copy_to_mode_reg (Pmode, op0);
14533 emit_insn (gen_sse2_clflush (op0));
14536 case IX86_BUILTIN_MOVNTPD:
14537 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14538 case IX86_BUILTIN_MOVNTDQ:
14539 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14540 case IX86_BUILTIN_MOVNTI:
14541 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14543 case IX86_BUILTIN_LOADDQU:
14544 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14545 case IX86_BUILTIN_STOREDQU:
14546 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14548 case IX86_BUILTIN_MONITOR:
14549 arg0 = TREE_VALUE (arglist);
14550 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14551 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14552 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14553 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14554 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14556 op0 = copy_to_mode_reg (SImode, op0);
14558 op1 = copy_to_mode_reg (SImode, op1);
14560 op2 = copy_to_mode_reg (SImode, op2);
14561 emit_insn (gen_sse3_monitor (op0, op1, op2));
14564 case IX86_BUILTIN_MWAIT:
14565 arg0 = TREE_VALUE (arglist);
14566 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14567 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14568 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14570 op0 = copy_to_mode_reg (SImode, op0);
14572 op1 = copy_to_mode_reg (SImode, op1);
14573 emit_insn (gen_sse3_mwait (op0, op1));
14576 case IX86_BUILTIN_LDDQU:
14577 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
14580 case IX86_BUILTIN_VEC_INIT_V2SI:
14581 case IX86_BUILTIN_VEC_INIT_V4HI:
14582 case IX86_BUILTIN_VEC_INIT_V8QI:
14583 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
14585 case IX86_BUILTIN_VEC_EXT_V2DF:
14586 case IX86_BUILTIN_VEC_EXT_V2DI:
14587 case IX86_BUILTIN_VEC_EXT_V4SF:
14588 case IX86_BUILTIN_VEC_EXT_V4SI:
14589 case IX86_BUILTIN_VEC_EXT_V8HI:
14590 case IX86_BUILTIN_VEC_EXT_V2SI:
14591 case IX86_BUILTIN_VEC_EXT_V4HI:
14592 return ix86_expand_vec_ext_builtin (arglist, target);
14594 case IX86_BUILTIN_VEC_SET_V8HI:
14595 case IX86_BUILTIN_VEC_SET_V4HI:
14596 return ix86_expand_vec_set_builtin (arglist);
14602 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14603 if (d->code == fcode)
14605 /* Compares are treated specially. */
14606 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14607 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
14608 || d->icode == CODE_FOR_sse2_maskcmpv2df3
14609 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14610 return ix86_expand_sse_compare (d, arglist, target);
14612 return ix86_expand_binop_builtin (d->icode, arglist, target);
14615 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14616 if (d->code == fcode)
14617 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14619 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14620 if (d->code == fcode)
14621 return ix86_expand_sse_comi (d, arglist, target);
14623 gcc_unreachable ();
14626 /* Store OPERAND to the memory after reload is completed. This means
14627 that we can't easily use assign_stack_local. */
14629 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14632 if (!reload_completed)
14634 if (TARGET_RED_ZONE)
14636 result = gen_rtx_MEM (mode,
14637 gen_rtx_PLUS (Pmode,
14639 GEN_INT (-RED_ZONE_SIZE)));
14640 emit_move_insn (result, operand);
14642 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14648 operand = gen_lowpart (DImode, operand);
14652 gen_rtx_SET (VOIDmode,
14653 gen_rtx_MEM (DImode,
14654 gen_rtx_PRE_DEC (DImode,
14655 stack_pointer_rtx)),
14661 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14670 split_di (&operand, 1, operands, operands + 1);
14672 gen_rtx_SET (VOIDmode,
14673 gen_rtx_MEM (SImode,
14674 gen_rtx_PRE_DEC (Pmode,
14675 stack_pointer_rtx)),
14678 gen_rtx_SET (VOIDmode,
14679 gen_rtx_MEM (SImode,
14680 gen_rtx_PRE_DEC (Pmode,
14681 stack_pointer_rtx)),
14686 /* It is better to store HImodes as SImodes. */
14687 if (!TARGET_PARTIAL_REG_STALL)
14688 operand = gen_lowpart (SImode, operand);
14692 gen_rtx_SET (VOIDmode,
14693 gen_rtx_MEM (GET_MODE (operand),
14694 gen_rtx_PRE_DEC (SImode,
14695 stack_pointer_rtx)),
14701 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14706 /* Free operand from the memory. */
14708 ix86_free_from_memory (enum machine_mode mode)
14710 if (!TARGET_RED_ZONE)
14714 if (mode == DImode || TARGET_64BIT)
14716 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14720 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14721 to pop or add instruction if registers are available. */
14722 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14723 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14728 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14729 QImode must go into class Q_REGS.
14730 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14731 movdf to do mem-to-mem moves through integer regs. */
14733 ix86_preferred_reload_class (rtx x, enum reg_class class)
14735 /* We're only allowed to return a subclass of CLASS. Many of the
14736 following checks fail for NO_REGS, so eliminate that early. */
14737 if (class == NO_REGS)
14740 /* All classes can load zeros. */
14741 if (x == CONST0_RTX (GET_MODE (x)))
14744 /* Floating-point constants need more complex checks. */
14745 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14747 /* General regs can load everything. */
14748 if (reg_class_subset_p (class, GENERAL_REGS))
14751 /* Floats can load 0 and 1 plus some others. Note that we eliminated
14752 zero above. We only want to wind up preferring 80387 registers if
14753 we plan on doing computation with them. */
14755 && (TARGET_MIX_SSE_I387
14756 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
14757 && standard_80387_constant_p (x))
14759 /* Limit class to non-sse. */
14760 if (class == FLOAT_SSE_REGS)
14762 if (class == FP_TOP_SSE_REGS)
14764 if (class == FP_SECOND_SSE_REGS)
14765 return FP_SECOND_REG;
14766 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
14772 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14774 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
14777 /* Generally when we see PLUS here, it's the function invariant
14778 (plus soft-fp const_int). Which can only be computed into general
14780 if (GET_CODE (x) == PLUS)
14781 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
14783 /* QImode constants are easy to load, but non-constant QImode data
14784 must go into Q_REGS. */
14785 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
14787 if (reg_class_subset_p (class, Q_REGS))
14789 if (reg_class_subset_p (Q_REGS, class))
14797 /* If we are copying between general and FP registers, we need a memory
14798 location. The same is true for SSE and MMX registers.
14800 The macro can't work reliably when one of the CLASSES is class containing
14801 registers from multiple units (SSE, MMX, integer). We avoid this by never
14802 combining those units in single alternative in the machine description.
14803 Ensure that this constraint holds to avoid unexpected surprises.
14805 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14806 enforce these sanity checks. */
14809 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14810 enum machine_mode mode, int strict)
14812 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14813 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14814 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14815 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14816 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14817 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14824 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
14827 /* ??? This is a lie. We do have moves between mmx/general, and for
14828 mmx/sse2. But by saying we need secondary memory we discourage the
14829 register allocator from using the mmx registers unless needed. */
14830 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14833 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14835 /* SSE1 doesn't have any direct moves from other classes. */
14839 /* If the target says that inter-unit moves are more expensive
14840 than moving through memory, then don't generate them. */
14841 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
14844 /* Between SSE and general, we have moves no larger than word size. */
14845 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14848 /* ??? For the cost of one register reformat penalty, we could use
14849 the same instructions to move SFmode and DFmode data, but the
14850 relevant move patterns don't support those alternatives. */
14851 if (mode == SFmode || mode == DFmode)
14858 /* Return the cost of moving data from a register in class CLASS1 to
14859 one in class CLASS2.
14861 It is not required that the cost always equal 2 when FROM is the same as TO;
14862 on some machines it is expensive to move between registers if they are not
14863 general registers. */
14866 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14867 enum reg_class class2)
14869 /* In case we require secondary memory, compute cost of the store followed
14870 by load. In order to avoid bad register allocation choices, we need
14871 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14873 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14877 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14878 MEMORY_MOVE_COST (mode, class1, 1));
14879 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14880 MEMORY_MOVE_COST (mode, class2, 1));
14882 /* In case of copying from general_purpose_register we may emit multiple
14883 stores followed by single load causing memory size mismatch stall.
14884 Count this as arbitrarily high cost of 20. */
14885 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14888 /* In the case of FP/MMX moves, the registers actually overlap, and we
14889 have to switch modes in order to treat them differently. */
14890 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14891 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14897 /* Moves between SSE/MMX and integer unit are expensive. */
14898 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14899 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14900 return ix86_cost->mmxsse_to_integer;
14901 if (MAYBE_FLOAT_CLASS_P (class1))
14902 return ix86_cost->fp_move;
14903 if (MAYBE_SSE_CLASS_P (class1))
14904 return ix86_cost->sse_move;
14905 if (MAYBE_MMX_CLASS_P (class1))
14906 return ix86_cost->mmx_move;
14910 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14912 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14914 /* Flags and only flags can only hold CCmode values. */
14915 if (CC_REGNO_P (regno))
14916 return GET_MODE_CLASS (mode) == MODE_CC;
14917 if (GET_MODE_CLASS (mode) == MODE_CC
14918 || GET_MODE_CLASS (mode) == MODE_RANDOM
14919 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14921 if (FP_REGNO_P (regno))
14922 return VALID_FP_MODE_P (mode);
14923 if (SSE_REGNO_P (regno))
14925 /* We implement the move patterns for all vector modes into and
14926 out of SSE registers, even when no operation instructions
14928 return (VALID_SSE_REG_MODE (mode)
14929 || VALID_SSE2_REG_MODE (mode)
14930 || VALID_MMX_REG_MODE (mode)
14931 || VALID_MMX_REG_MODE_3DNOW (mode));
14933 if (MMX_REGNO_P (regno))
14935 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14936 so if the register is available at all, then we can move data of
14937 the given mode into or out of it. */
14938 return (VALID_MMX_REG_MODE (mode)
14939 || VALID_MMX_REG_MODE_3DNOW (mode));
14941 /* We handle both integer and floats in the general purpose registers.
14942 In future we should be able to handle vector modes as well. */
14943 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14945 /* Take care for QImode values - they can be in non-QI regs, but then
14946 they do cause partial register stalls. */
14947 if (regno < 4 || mode != QImode || TARGET_64BIT)
14949 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14952 /* Return the cost of moving data of mode M between a
14953 register and memory. A value of 2 is the default; this cost is
14954 relative to those in `REGISTER_MOVE_COST'.
14956 If moving between registers and memory is more expensive than
14957 between two registers, you should define this macro to express the
14960 Model also increased moving costs of QImode registers in non
14964 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14966 if (FLOAT_CLASS_P (class))
14983 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14985 if (SSE_CLASS_P (class))
14988 switch (GET_MODE_SIZE (mode))
15002 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15004 if (MMX_CLASS_P (class))
15007 switch (GET_MODE_SIZE (mode))
15018 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15020 switch (GET_MODE_SIZE (mode))
15024 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15025 : ix86_cost->movzbl_load);
15027 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15028 : ix86_cost->int_store[0] + 4);
15031 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15033 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15034 if (mode == TFmode)
15036 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15037 * (((int) GET_MODE_SIZE (mode)
15038 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15042 /* Compute a (partial) cost for rtx X. Return true if the complete
15043 cost has been computed, and false if subexpressions should be
15044 scanned. In either case, *TOTAL contains the cost result. */
15047 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15049 enum machine_mode mode = GET_MODE (x);
15057 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15059 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15061 else if (flag_pic && SYMBOLIC_CONST (x)
15063 || (!GET_CODE (x) != LABEL_REF
15064 && (GET_CODE (x) != SYMBOL_REF
15065 || !SYMBOL_REF_LOCAL_P (x)))))
15072 if (mode == VOIDmode)
15075 switch (standard_80387_constant_p (x))
15080 default: /* Other constants */
15085 /* Start with (MEM (SYMBOL_REF)), since that's where
15086 it'll probably end up. Add a penalty for size. */
15087 *total = (COSTS_N_INSNS (1)
15088 + (flag_pic != 0 && !TARGET_64BIT)
15089 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15095 /* The zero extensions is often completely free on x86_64, so make
15096 it as cheap as possible. */
15097 if (TARGET_64BIT && mode == DImode
15098 && GET_MODE (XEXP (x, 0)) == SImode)
15100 else if (TARGET_ZERO_EXTEND_WITH_AND)
15101 *total = COSTS_N_INSNS (ix86_cost->add);
15103 *total = COSTS_N_INSNS (ix86_cost->movzx);
15107 *total = COSTS_N_INSNS (ix86_cost->movsx);
15111 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15112 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15114 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15117 *total = COSTS_N_INSNS (ix86_cost->add);
15120 if ((value == 2 || value == 3)
15121 && ix86_cost->lea <= ix86_cost->shift_const)
15123 *total = COSTS_N_INSNS (ix86_cost->lea);
15133 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15135 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15137 if (INTVAL (XEXP (x, 1)) > 32)
15138 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15140 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15144 if (GET_CODE (XEXP (x, 1)) == AND)
15145 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15147 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15152 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15153 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15155 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15160 if (FLOAT_MODE_P (mode))
15162 *total = COSTS_N_INSNS (ix86_cost->fmul);
15167 rtx op0 = XEXP (x, 0);
15168 rtx op1 = XEXP (x, 1);
15170 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15172 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15173 for (nbits = 0; value != 0; value &= value - 1)
15177 /* This is arbitrary. */
15180 /* Compute costs correctly for widening multiplication. */
15181 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15182 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15183 == GET_MODE_SIZE (mode))
15185 int is_mulwiden = 0;
15186 enum machine_mode inner_mode = GET_MODE (op0);
15188 if (GET_CODE (op0) == GET_CODE (op1))
15189 is_mulwiden = 1, op1 = XEXP (op1, 0);
15190 else if (GET_CODE (op1) == CONST_INT)
15192 if (GET_CODE (op0) == SIGN_EXTEND)
15193 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15196 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15200 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15203 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15204 + nbits * ix86_cost->mult_bit)
15205 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15214 if (FLOAT_MODE_P (mode))
15215 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15217 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15221 if (FLOAT_MODE_P (mode))
15222 *total = COSTS_N_INSNS (ix86_cost->fadd);
15223 else if (GET_MODE_CLASS (mode) == MODE_INT
15224 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15226 if (GET_CODE (XEXP (x, 0)) == PLUS
15227 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15228 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15229 && CONSTANT_P (XEXP (x, 1)))
15231 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15232 if (val == 2 || val == 4 || val == 8)
15234 *total = COSTS_N_INSNS (ix86_cost->lea);
15235 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15236 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15238 *total += rtx_cost (XEXP (x, 1), outer_code);
15242 else if (GET_CODE (XEXP (x, 0)) == MULT
15243 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15245 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15246 if (val == 2 || val == 4 || val == 8)
15248 *total = COSTS_N_INSNS (ix86_cost->lea);
15249 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15250 *total += rtx_cost (XEXP (x, 1), outer_code);
15254 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15256 *total = COSTS_N_INSNS (ix86_cost->lea);
15257 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15258 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15259 *total += rtx_cost (XEXP (x, 1), outer_code);
15266 if (FLOAT_MODE_P (mode))
15268 *total = COSTS_N_INSNS (ix86_cost->fadd);
15276 if (!TARGET_64BIT && mode == DImode)
15278 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15279 + (rtx_cost (XEXP (x, 0), outer_code)
15280 << (GET_MODE (XEXP (x, 0)) != DImode))
15281 + (rtx_cost (XEXP (x, 1), outer_code)
15282 << (GET_MODE (XEXP (x, 1)) != DImode)));
15288 if (FLOAT_MODE_P (mode))
15290 *total = COSTS_N_INSNS (ix86_cost->fchs);
15296 if (!TARGET_64BIT && mode == DImode)
15297 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15299 *total = COSTS_N_INSNS (ix86_cost->add);
15303 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15304 && XEXP (XEXP (x, 0), 1) == const1_rtx
15305 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15306 && XEXP (x, 1) == const0_rtx)
15308 /* This kind of construct is implemented using test[bwl].
15309 Treat it as if we had an AND. */
15310 *total = (COSTS_N_INSNS (ix86_cost->add)
15311 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15312 + rtx_cost (const1_rtx, outer_code));
15318 if (!TARGET_SSE_MATH
15320 || (mode == DFmode && !TARGET_SSE2))
15325 if (FLOAT_MODE_P (mode))
15326 *total = COSTS_N_INSNS (ix86_cost->fabs);
15330 if (FLOAT_MODE_P (mode))
15331 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15335 if (XINT (x, 1) == UNSPEC_TP)
15346 static int current_machopic_label_num;
15348 /* Given a symbol name and its associated stub, write out the
15349 definition of the stub. */
15352 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15354 unsigned int length;
15355 char *binder_name, *symbol_name, lazy_ptr_name[32];
15356 int label = ++current_machopic_label_num;
15358 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15359 symb = (*targetm.strip_name_encoding) (symb);
15361 length = strlen (stub);
15362 binder_name = alloca (length + 32);
15363 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15365 length = strlen (symb);
15366 symbol_name = alloca (length + 32);
15367 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15369 sprintf (lazy_ptr_name, "L%d$lz", label);
15372 machopic_picsymbol_stub_section ();
15374 machopic_symbol_stub_section ();
15376 fprintf (file, "%s:\n", stub);
15377 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15381 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15382 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15383 fprintf (file, "\tjmp %%edx\n");
15386 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15388 fprintf (file, "%s:\n", binder_name);
15392 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15393 fprintf (file, "\tpushl %%eax\n");
15396 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15398 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15400 machopic_lazy_symbol_ptr_section ();
15401 fprintf (file, "%s:\n", lazy_ptr_name);
15402 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15403 fprintf (file, "\t.long %s\n", binder_name);
15405 #endif /* TARGET_MACHO */
15407 /* Order the registers for register allocator. */
15410 x86_order_regs_for_local_alloc (void)
15415 /* First allocate the local general purpose registers. */
15416 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15417 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15418 reg_alloc_order [pos++] = i;
15420 /* Global general purpose registers. */
15421 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15422 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15423 reg_alloc_order [pos++] = i;
15425 /* x87 registers come first in case we are doing FP math
15427 if (!TARGET_SSE_MATH)
15428 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15429 reg_alloc_order [pos++] = i;
15431 /* SSE registers. */
15432 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15433 reg_alloc_order [pos++] = i;
15434 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15435 reg_alloc_order [pos++] = i;
15437 /* x87 registers. */
15438 if (TARGET_SSE_MATH)
15439 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15440 reg_alloc_order [pos++] = i;
15442 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15443 reg_alloc_order [pos++] = i;
15445 /* Initialize the rest of array as we do not allocate some registers
15447 while (pos < FIRST_PSEUDO_REGISTER)
15448 reg_alloc_order [pos++] = 0;
15451 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15452 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15455 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15456 struct attribute_spec.handler. */
15458 ix86_handle_struct_attribute (tree *node, tree name,
15459 tree args ATTRIBUTE_UNUSED,
15460 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15463 if (DECL_P (*node))
15465 if (TREE_CODE (*node) == TYPE_DECL)
15466 type = &TREE_TYPE (*node);
15471 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15472 || TREE_CODE (*type) == UNION_TYPE)))
15474 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
15475 *no_add_attrs = true;
15478 else if ((is_attribute_p ("ms_struct", name)
15479 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15480 || ((is_attribute_p ("gcc_struct", name)
15481 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15483 warning ("%qs incompatible attribute ignored",
15484 IDENTIFIER_POINTER (name));
15485 *no_add_attrs = true;
15492 ix86_ms_bitfield_layout_p (tree record_type)
15494 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15495 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15496 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15499 /* Returns an expression indicating where the this parameter is
15500 located on entry to the FUNCTION. */
15503 x86_this_parameter (tree function)
15505 tree type = TREE_TYPE (function);
15509 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15510 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15513 if (ix86_function_regparm (type, function) > 0)
15517 parm = TYPE_ARG_TYPES (type);
15518 /* Figure out whether or not the function has a variable number of
15520 for (; parm; parm = TREE_CHAIN (parm))
15521 if (TREE_VALUE (parm) == void_type_node)
15523 /* If not, the this parameter is in the first argument. */
15527 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15529 return gen_rtx_REG (SImode, regno);
15533 if (aggregate_value_p (TREE_TYPE (type), type))
15534 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15536 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15539 /* Determine whether x86_output_mi_thunk can succeed. */
15542 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15543 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15544 HOST_WIDE_INT vcall_offset, tree function)
15546 /* 64-bit can handle anything. */
15550 /* For 32-bit, everything's fine if we have one free register. */
15551 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15554 /* Need a free register for vcall_offset. */
15558 /* Need a free register for GOT references. */
15559 if (flag_pic && !(*targetm.binds_local_p) (function))
15562 /* Otherwise ok. */
15566 /* Output the assembler code for a thunk function. THUNK_DECL is the
15567 declaration for the thunk function itself, FUNCTION is the decl for
15568 the target function. DELTA is an immediate constant offset to be
15569 added to THIS. If VCALL_OFFSET is nonzero, the word at
15570 *(*this + vcall_offset) should be added to THIS. */
15573 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15574 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15575 HOST_WIDE_INT vcall_offset, tree function)
15578 rtx this = x86_this_parameter (function);
15581 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15582 pull it in now and let DELTA benefit. */
15585 else if (vcall_offset)
15587 /* Put the this parameter into %eax. */
15589 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15590 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15593 this_reg = NULL_RTX;
15595 /* Adjust the this parameter by a fixed constant. */
15598 xops[0] = GEN_INT (delta);
15599 xops[1] = this_reg ? this_reg : this;
15602 if (!x86_64_general_operand (xops[0], DImode))
15604 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15606 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15610 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15613 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15616 /* Adjust the this parameter by a value stored in the vtable. */
15620 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15623 int tmp_regno = 2 /* ECX */;
15624 if (lookup_attribute ("fastcall",
15625 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15626 tmp_regno = 0 /* EAX */;
15627 tmp = gen_rtx_REG (SImode, tmp_regno);
15630 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15633 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15635 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15637 /* Adjust the this parameter. */
15638 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15639 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15641 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15642 xops[0] = GEN_INT (vcall_offset);
15644 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15645 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15647 xops[1] = this_reg;
15649 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15651 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15654 /* If necessary, drop THIS back to its stack slot. */
15655 if (this_reg && this_reg != this)
15657 xops[0] = this_reg;
15659 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15662 xops[0] = XEXP (DECL_RTL (function), 0);
15665 if (!flag_pic || (*targetm.binds_local_p) (function))
15666 output_asm_insn ("jmp\t%P0", xops);
15669 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15670 tmp = gen_rtx_CONST (Pmode, tmp);
15671 tmp = gen_rtx_MEM (QImode, tmp);
15673 output_asm_insn ("jmp\t%A0", xops);
15678 if (!flag_pic || (*targetm.binds_local_p) (function))
15679 output_asm_insn ("jmp\t%P0", xops);
15684 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15685 tmp = (gen_rtx_SYMBOL_REF
15687 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15688 tmp = gen_rtx_MEM (QImode, tmp);
15690 output_asm_insn ("jmp\t%0", xops);
15693 #endif /* TARGET_MACHO */
15695 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15696 output_set_got (tmp);
15699 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15700 output_asm_insn ("jmp\t{*}%1", xops);
15706 x86_file_start (void)
15708 default_file_start ();
15709 if (X86_FILE_START_VERSION_DIRECTIVE)
15710 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15711 if (X86_FILE_START_FLTUSED)
15712 fputs ("\t.global\t__fltused\n", asm_out_file);
15713 if (ix86_asm_dialect == ASM_INTEL)
15714 fputs ("\t.intel_syntax\n", asm_out_file);
15718 x86_field_alignment (tree field, int computed)
15720 enum machine_mode mode;
15721 tree type = TREE_TYPE (field);
15723 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15725 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15726 ? get_inner_array_type (type) : type);
15727 if (mode == DFmode || mode == DCmode
15728 || GET_MODE_CLASS (mode) == MODE_INT
15729 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15730 return MIN (32, computed);
15734 /* Output assembler code to FILE to increment profiler label # LABELNO
15735 for profiling a function entry. */
15737 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15742 #ifndef NO_PROFILE_COUNTERS
15743 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15745 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15749 #ifndef NO_PROFILE_COUNTERS
15750 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15752 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15756 #ifndef NO_PROFILE_COUNTERS
15757 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15758 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15760 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15764 #ifndef NO_PROFILE_COUNTERS
15765 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15766 PROFILE_COUNT_REGISTER);
15768 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15772 /* We don't have exact information about the insn sizes, but we may assume
15773 quite safely that we are informed about all 1 byte insns and memory
15774 address sizes. This is enough to eliminate unnecessary padding in
15778 min_insn_size (rtx insn)
15782 if (!INSN_P (insn) || !active_insn_p (insn))
15785 /* Discard alignments we've emit and jump instructions. */
15786 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15787 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15789 if (GET_CODE (insn) == JUMP_INSN
15790 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15791 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15794 /* Important case - calls are always 5 bytes.
15795 It is common to have many calls in the row. */
15796 if (GET_CODE (insn) == CALL_INSN
15797 && symbolic_reference_mentioned_p (PATTERN (insn))
15798 && !SIBLING_CALL_P (insn))
15800 if (get_attr_length (insn) <= 1)
15803 /* For normal instructions we may rely on the sizes of addresses
15804 and the presence of symbol to require 4 bytes of encoding.
15805 This is not the case for jumps where references are PC relative. */
15806 if (GET_CODE (insn) != JUMP_INSN)
15808 l = get_attr_length_address (insn);
15809 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15818 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15822 ix86_avoid_jump_misspredicts (void)
15824 rtx insn, start = get_insns ();
15825 int nbytes = 0, njumps = 0;
15828 /* Look for all minimal intervals of instructions containing 4 jumps.
15829 The intervals are bounded by START and INSN. NBYTES is the total
15830 size of instructions in the interval including INSN and not including
15831 START. When the NBYTES is smaller than 16 bytes, it is possible
15832 that the end of START and INSN ends up in the same 16byte page.
15834 The smallest offset in the page INSN can start is the case where START
15835 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15836 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15838 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15841 nbytes += min_insn_size (insn);
15843 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15844 INSN_UID (insn), min_insn_size (insn));
15845 if ((GET_CODE (insn) == JUMP_INSN
15846 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15847 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15848 || GET_CODE (insn) == CALL_INSN)
15855 start = NEXT_INSN (start);
15856 if ((GET_CODE (start) == JUMP_INSN
15857 && GET_CODE (PATTERN (start)) != ADDR_VEC
15858 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15859 || GET_CODE (start) == CALL_INSN)
15860 njumps--, isjump = 1;
15863 nbytes -= min_insn_size (start);
15868 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15869 INSN_UID (start), INSN_UID (insn), nbytes);
15871 if (njumps == 3 && isjump && nbytes < 16)
15873 int padsize = 15 - nbytes + min_insn_size (insn);
15876 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15877 INSN_UID (insn), padsize);
15878 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15883 /* AMD Athlon works faster
15884 when RET is not destination of conditional jump or directly preceded
15885 by other jump instruction. We avoid the penalty by inserting NOP just
15886 before the RET instructions in such cases. */
15888 ix86_pad_returns (void)
15893 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15895 basic_block bb = e->src;
15896 rtx ret = BB_END (bb);
15898 bool replace = false;
15900 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15901 || !maybe_hot_bb_p (bb))
15903 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15904 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15906 if (prev && GET_CODE (prev) == CODE_LABEL)
15911 FOR_EACH_EDGE (e, ei, bb->preds)
15912 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15913 && !(e->flags & EDGE_FALLTHRU))
15918 prev = prev_active_insn (ret);
15920 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15921 || GET_CODE (prev) == CALL_INSN))
15923 /* Empty functions get branch mispredict even when the jump destination
15924 is not visible to us. */
15925 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15930 emit_insn_before (gen_return_internal_long (), ret);
15936 /* Implement machine specific optimizations. We implement padding of returns
15937 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15941 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15942 ix86_pad_returns ();
15943 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15944 ix86_avoid_jump_misspredicts ();
15947 /* Return nonzero when QImode register that must be represented via REX prefix
15950 x86_extended_QIreg_mentioned_p (rtx insn)
15953 extract_insn_cached (insn);
15954 for (i = 0; i < recog_data.n_operands; i++)
15955 if (REG_P (recog_data.operand[i])
15956 && REGNO (recog_data.operand[i]) >= 4)
15961 /* Return nonzero when P points to register encoded via REX prefix.
15962 Called via for_each_rtx. */
15964 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15966 unsigned int regno;
15969 regno = REGNO (*p);
15970 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15973 /* Return true when INSN mentions register that must be encoded using REX
15976 x86_extended_reg_mentioned_p (rtx insn)
15978 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15981 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15982 optabs would emit if we didn't have TFmode patterns. */
15985 x86_emit_floatuns (rtx operands[2])
15987 rtx neglab, donelab, i0, i1, f0, in, out;
15988 enum machine_mode mode, inmode;
15990 inmode = GET_MODE (operands[1]);
15991 if (inmode != SImode
15992 && inmode != DImode)
15996 in = force_reg (inmode, operands[1]);
15997 mode = GET_MODE (out);
15998 neglab = gen_label_rtx ();
15999 donelab = gen_label_rtx ();
16000 i1 = gen_reg_rtx (Pmode);
16001 f0 = gen_reg_rtx (mode);
16003 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16005 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16006 emit_jump_insn (gen_jump (donelab));
16009 emit_label (neglab);
16011 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16012 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16013 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16014 expand_float (f0, i0, 0);
16015 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16017 emit_label (donelab);
16020 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16021 with all elements equal to VAR. Return true if successful. */
16024 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16025 rtx target, rtx val)
16027 enum machine_mode smode, wsmode, wvmode;
16034 if (!mmx_ok && !TARGET_SSE)
16042 val = force_reg (GET_MODE_INNER (mode), val);
16043 x = gen_rtx_VEC_DUPLICATE (mode, val);
16044 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16050 val = gen_lowpart (SImode, val);
16051 x = gen_rtx_TRUNCATE (HImode, val);
16052 x = gen_rtx_VEC_DUPLICATE (mode, x);
16053 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16074 /* Replicate the value once into the next wider mode and recurse. */
16075 val = convert_modes (wsmode, smode, val, true);
16076 x = expand_simple_binop (wsmode, ASHIFT, val,
16077 GEN_INT (GET_MODE_BITSIZE (smode)),
16078 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16079 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16081 x = gen_reg_rtx (wvmode);
16082 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16083 gcc_unreachable ();
16084 emit_move_insn (target, gen_lowpart (mode, x));
16092 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16093 whose low element is VAR, and other elements are zero. Return true
16097 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16098 rtx target, rtx var)
16100 enum machine_mode vsimode;
16107 if (!mmx_ok && !TARGET_SSE)
16113 var = force_reg (GET_MODE_INNER (mode), var);
16114 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16115 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16120 var = force_reg (GET_MODE_INNER (mode), var);
16121 x = gen_rtx_VEC_DUPLICATE (mode, var);
16122 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16123 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16128 vsimode = V4SImode;
16134 vsimode = V2SImode;
16137 /* Zero extend the variable element to SImode and recurse. */
16138 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16140 x = gen_reg_rtx (vsimode);
16141 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16142 gcc_unreachable ();
16144 emit_move_insn (target, gen_lowpart (mode, x));
16152 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16153 consisting of the values in VALS. It is known that all elements
16154 except ONE_VAR are constants. Return true if successful. */
16157 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16158 rtx target, rtx vals, int one_var)
16160 rtx var = XVECEXP (vals, 0, one_var);
16161 enum machine_mode wmode;
16164 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16165 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16173 /* For the two element vectors, it's just as easy to use
16174 the general case. */
16190 /* There's no way to set one QImode entry easily. Combine
16191 the variable value with its adjacent constant value, and
16192 promote to an HImode set. */
16193 x = XVECEXP (vals, 0, one_var ^ 1);
16196 var = convert_modes (HImode, QImode, var, true);
16197 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16198 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16199 x = GEN_INT (INTVAL (x) & 0xff);
16203 var = convert_modes (HImode, QImode, var, true);
16204 x = gen_int_mode (INTVAL (x) << 8, HImode);
16206 if (x != const0_rtx)
16207 var = expand_simple_binop (HImode, IOR, var, x, var,
16208 1, OPTAB_LIB_WIDEN);
16210 x = gen_reg_rtx (wmode);
16211 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16212 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
16214 emit_move_insn (target, gen_lowpart (mode, x));
16221 emit_move_insn (target, const_vec);
16222 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16226 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16227 all values variable, and none identical. */
16230 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16231 rtx target, rtx vals)
16233 enum machine_mode half_mode = GET_MODE_INNER (mode);
16234 rtx op0 = NULL, op1 = NULL;
16235 bool use_vec_concat = false;
16241 if (!mmx_ok && !TARGET_SSE)
16247 /* For the two element vectors, we always implement VEC_CONCAT. */
16248 op0 = XVECEXP (vals, 0, 0);
16249 op1 = XVECEXP (vals, 0, 1);
16250 use_vec_concat = true;
16254 half_mode = V2SFmode;
16257 half_mode = V2SImode;
16263 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16264 Recurse to load the two halves. */
16266 op0 = gen_reg_rtx (half_mode);
16267 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16268 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16270 op1 = gen_reg_rtx (half_mode);
16271 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16272 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16274 use_vec_concat = true;
16285 gcc_unreachable ();
16288 if (use_vec_concat)
16290 if (!register_operand (op0, half_mode))
16291 op0 = force_reg (half_mode, op0);
16292 if (!register_operand (op1, half_mode))
16293 op1 = force_reg (half_mode, op1);
16295 emit_insn (gen_rtx_SET (VOIDmode, target,
16296 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16300 int i, j, n_elts, n_words, n_elt_per_word;
16301 enum machine_mode inner_mode;
16302 rtx words[4], shift;
16304 inner_mode = GET_MODE_INNER (mode);
16305 n_elts = GET_MODE_NUNITS (mode);
16306 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16307 n_elt_per_word = n_elts / n_words;
16308 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16310 for (i = 0; i < n_words; ++i)
16312 rtx word = NULL_RTX;
16314 for (j = 0; j < n_elt_per_word; ++j)
16316 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16317 elt = convert_modes (word_mode, inner_mode, elt, true);
16323 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16324 word, 1, OPTAB_LIB_WIDEN);
16325 word = expand_simple_binop (word_mode, IOR, word, elt,
16326 word, 1, OPTAB_LIB_WIDEN);
16334 emit_move_insn (target, gen_lowpart (mode, words[0]));
16335 else if (n_words == 2)
16337 rtx tmp = gen_reg_rtx (mode);
16338 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16339 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16340 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16341 emit_move_insn (target, tmp);
16343 else if (n_words == 4)
16345 rtx tmp = gen_reg_rtx (V4SImode);
16346 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16347 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
16348 emit_move_insn (target, gen_lowpart (mode, tmp));
16351 gcc_unreachable ();
16355 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16356 instructions unless MMX_OK is true. */
16359 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16361 enum machine_mode mode = GET_MODE (target);
16362 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16363 int n_elts = GET_MODE_NUNITS (mode);
16364 int n_var = 0, one_var = -1;
16365 bool all_same = true, all_const_zero = true;
16369 for (i = 0; i < n_elts; ++i)
16371 x = XVECEXP (vals, 0, i);
16372 if (!CONSTANT_P (x))
16373 n_var++, one_var = i;
16374 else if (x != CONST0_RTX (inner_mode))
16375 all_const_zero = false;
16376 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16380 /* Constants are best loaded from the constant pool. */
16383 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16387 /* If all values are identical, broadcast the value. */
16389 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16390 XVECEXP (vals, 0, 0)))
16393 /* Values where only one field is non-constant are best loaded from
16394 the pool and overwritten via move later. */
16397 if (all_const_zero && one_var == 0
16398 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16399 XVECEXP (vals, 0, 0)))
16402 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16406 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16410 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16412 enum machine_mode mode = GET_MODE (target);
16413 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16414 bool use_vec_merge = false;
16423 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
16424 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
16426 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
16428 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
16429 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16439 /* For the two element vectors, we implement a VEC_CONCAT with
16440 the extraction of the other element. */
16442 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
16443 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
16446 op0 = val, op1 = tmp;
16448 op0 = tmp, op1 = val;
16450 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
16451 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16459 use_vec_merge = true;
16463 /* tmp = op0 = A B C D */
16464 tmp = copy_to_reg (target);
16466 /* op0 = C C D D */
16467 emit_insn (gen_sse_unpcklps (target, target, target));
16469 /* op0 = C C D X */
16470 ix86_expand_vector_set (false, target, val, 0);
16472 /* op0 = A B X D */
16473 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16474 GEN_INT (1), GEN_INT (0),
16475 GEN_INT (2+4), GEN_INT (3+4)));
16479 tmp = copy_to_reg (target);
16480 ix86_expand_vector_set (false, target, val, 0);
16481 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16482 GEN_INT (0), GEN_INT (1),
16483 GEN_INT (0+4), GEN_INT (3+4)));
16487 tmp = copy_to_reg (target);
16488 ix86_expand_vector_set (false, target, val, 0);
16489 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16490 GEN_INT (0), GEN_INT (1),
16491 GEN_INT (2+4), GEN_INT (0+4)));
16495 gcc_unreachable ();
16500 /* Element 0 handled by vec_merge below. */
16503 use_vec_merge = true;
16509 /* With SSE2, use integer shuffles to swap element 0 and ELT,
16510 store into element 0, then shuffle them back. */
16514 order[0] = GEN_INT (elt);
16515 order[1] = const1_rtx;
16516 order[2] = const2_rtx;
16517 order[3] = GEN_INT (3);
16518 order[elt] = const0_rtx;
16520 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16521 order[1], order[2], order[3]));
16523 ix86_expand_vector_set (false, target, val, 0);
16525 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16526 order[1], order[2], order[3]));
16530 /* For SSE1, we have to reuse the V4SF code. */
16531 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
16532 gen_lowpart (SFmode, val), elt);
16537 use_vec_merge = TARGET_SSE2;
16540 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16551 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
16552 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
16553 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16557 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16559 emit_move_insn (mem, target);
16561 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16562 emit_move_insn (tmp, val);
16564 emit_move_insn (target, mem);
16569 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
16571 enum machine_mode mode = GET_MODE (vec);
16572 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16573 bool use_vec_extr = false;
16586 use_vec_extr = true;
16598 tmp = gen_reg_rtx (mode);
16599 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
16600 GEN_INT (elt), GEN_INT (elt),
16601 GEN_INT (elt+4), GEN_INT (elt+4)));
16605 tmp = gen_reg_rtx (mode);
16606 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
16610 gcc_unreachable ();
16613 use_vec_extr = true;
16628 tmp = gen_reg_rtx (mode);
16629 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
16630 GEN_INT (elt), GEN_INT (elt),
16631 GEN_INT (elt), GEN_INT (elt)));
16635 tmp = gen_reg_rtx (mode);
16636 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
16640 gcc_unreachable ();
16643 use_vec_extr = true;
16648 /* For SSE1, we have to reuse the V4SF code. */
16649 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
16650 gen_lowpart (V4SFmode, vec), elt);
16656 use_vec_extr = TARGET_SSE2;
16659 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16664 /* ??? Could extract the appropriate HImode element and shift. */
16671 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
16672 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
16674 /* Let the rtl optimizers know about the zero extension performed. */
16675 if (inner_mode == HImode)
16677 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
16678 target = gen_lowpart (SImode, target);
16681 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16685 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16687 emit_move_insn (mem, vec);
16689 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16690 emit_move_insn (target, tmp);
16694 /* Implements target hook vector_mode_supported_p. */
16696 ix86_vector_mode_supported_p (enum machine_mode mode)
16698 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
16700 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
16702 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
16704 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
16709 /* Worker function for TARGET_MD_ASM_CLOBBERS.
16711 We do this in the new i386 backend to maintain source compatibility
16712 with the old cc0-based compiler. */
16715 ix86_md_asm_clobbers (tree clobbers)
16717 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
16719 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
16721 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
16726 /* Worker function for REVERSE_CONDITION. */
16729 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
16731 return (mode != CCFPmode && mode != CCFPUmode
16732 ? reverse_condition (code)
16733 : reverse_condition_maybe_unordered (code));
16736 /* Output code to perform an x87 FP register move, from OPERANDS[1]
16740 output_387_reg_move (rtx insn, rtx *operands)
16742 if (REG_P (operands[1])
16743 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16745 if (REGNO (operands[0]) == FIRST_STACK_REG
16746 && TARGET_USE_FFREEP)
16747 return "ffreep\t%y0";
16748 return "fstp\t%y0";
16750 if (STACK_TOP_P (operands[0]))
16751 return "fld%z1\t%y1";
16755 /* Output code to perform a conditional jump to LABEL, if C2 flag in
16756 FP status register is set. */
16759 ix86_emit_fp_unordered_jump (rtx label)
16761 rtx reg = gen_reg_rtx (HImode);
16764 emit_insn (gen_x86_fnstsw_1 (reg));
16766 if (TARGET_USE_SAHF)
16768 emit_insn (gen_x86_sahf_1 (reg));
16770 temp = gen_rtx_REG (CCmode, FLAGS_REG);
16771 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16775 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16777 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16778 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16781 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16782 gen_rtx_LABEL_REF (VOIDmode, label),
16784 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16785 emit_jump_insn (temp);
16788 /* Output code to perform a log1p XFmode calculation. */
16790 void ix86_emit_i387_log1p (rtx op0, rtx op1)
16792 rtx label1 = gen_label_rtx ();
16793 rtx label2 = gen_label_rtx ();
16795 rtx tmp = gen_reg_rtx (XFmode);
16796 rtx tmp2 = gen_reg_rtx (XFmode);
16798 emit_insn (gen_absxf2 (tmp, op1));
16799 emit_insn (gen_cmpxf (tmp,
16800 CONST_DOUBLE_FROM_REAL_VALUE (
16801 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16803 emit_jump_insn (gen_bge (label1));
16805 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16806 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16807 emit_jump (label2);
16809 emit_label (label1);
16810 emit_move_insn (tmp, CONST1_RTX (XFmode));
16811 emit_insn (gen_addxf3 (tmp, op1, tmp));
16812 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16813 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16815 emit_label (label2);
16818 /* Solaris named-section hook. Parameters are as for
16819 named_section_real. */
16822 i386_solaris_elf_named_section (const char *name, unsigned int flags,
16825 /* With Binutils 2.15, the "@unwind" marker must be specified on
16826 every occurrence of the ".eh_frame" section, not just the first
16829 && strcmp (name, ".eh_frame") == 0)
16831 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
16832 flags & SECTION_WRITE ? "aw" : "a");
16835 default_elf_asm_named_section (name, flags, decl);
16838 #include "gt-i386.h"