1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
569 const int x86_sse_split_regs = m_ATHLON_K8;
570 const int x86_sse_typeless_stores = m_ATHLON_K8;
571 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
572 const int x86_use_ffreep = m_ATHLON_K8;
573 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
574 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
575 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
576 /* Some CPU cores are not able to predict more than 4 branch instructions in
577 the 16 byte window. */
578 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
579 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
580 const int x86_use_bt = m_ATHLON_K8;
582 /* In case the average insn count for single function invocation is
583 lower than this constant, emit fast (but longer) prologue and
585 #define FAST_PROLOGUE_INSN_COUNT 20
587 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
588 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
589 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
590 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
592 /* Array of the smallest class containing reg number REGNO, indexed by
593 REGNO. Used by REGNO_REG_CLASS in i386.h. */
595 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
598 AREG, DREG, CREG, BREG,
600 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
602 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
603 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
606 /* flags, fpsr, dirflag, frame */
607 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
610 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
612 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
618 /* The "default" register map used in 32bit mode. */
620 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
622 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
623 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
624 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
625 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
626 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
631 static int const x86_64_int_parameter_registers[6] =
633 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
634 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
637 static int const x86_64_int_return_registers[4] =
639 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
642 /* The "default" register map used in 64bit mode. */
643 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
645 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
646 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
647 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
648 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
649 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
650 8,9,10,11,12,13,14,15, /* extended integer registers */
651 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
654 /* Define the register numbers to be used in Dwarf debugging information.
655 The SVR4 reference port C compiler uses the following register numbers
656 in its Dwarf output code:
657 0 for %eax (gcc regno = 0)
658 1 for %ecx (gcc regno = 2)
659 2 for %edx (gcc regno = 1)
660 3 for %ebx (gcc regno = 3)
661 4 for %esp (gcc regno = 7)
662 5 for %ebp (gcc regno = 6)
663 6 for %esi (gcc regno = 4)
664 7 for %edi (gcc regno = 5)
665 The following three DWARF register numbers are never generated by
666 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
667 believes these numbers have these meanings.
668 8 for %eip (no gcc equivalent)
669 9 for %eflags (gcc regno = 17)
670 10 for %trapno (no gcc equivalent)
671 It is not at all clear how we should number the FP stack registers
672 for the x86 architecture. If the version of SDB on x86/svr4 were
673 a bit less brain dead with respect to floating-point then we would
674 have a precedent to follow with respect to DWARF register numbers
675 for x86 FP registers, but the SDB on x86/svr4 is so completely
676 broken with respect to FP registers that it is hardly worth thinking
677 of it as something to strive for compatibility with.
678 The version of x86/svr4 SDB I have at the moment does (partially)
679 seem to believe that DWARF register number 11 is associated with
680 the x86 register %st(0), but that's about all. Higher DWARF
681 register numbers don't seem to be associated with anything in
682 particular, and even for DWARF regno 11, SDB only seems to under-
683 stand that it should say that a variable lives in %st(0) (when
684 asked via an `=' command) if we said it was in DWARF regno 11,
685 but SDB still prints garbage when asked for the value of the
686 variable in question (via a `/' command).
687 (Also note that the labels SDB prints for various FP stack regs
688 when doing an `x' command are all wrong.)
689 Note that these problems generally don't affect the native SVR4
690 C compiler because it doesn't allow the use of -O with -g and
691 because when it is *not* optimizing, it allocates a memory
692 location for each floating-point variable, and the memory
693 location is what gets described in the DWARF AT_location
694 attribute for the variable in question.
695 Regardless of the severe mental illness of the x86/svr4 SDB, we
696 do something sensible here and we use the following DWARF
697 register numbers. Note that these are all stack-top-relative
699 11 for %st(0) (gcc regno = 8)
700 12 for %st(1) (gcc regno = 9)
701 13 for %st(2) (gcc regno = 10)
702 14 for %st(3) (gcc regno = 11)
703 15 for %st(4) (gcc regno = 12)
704 16 for %st(5) (gcc regno = 13)
705 17 for %st(6) (gcc regno = 14)
706 18 for %st(7) (gcc regno = 15)
708 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
710 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
711 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
712 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
713 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
714 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
715 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
719 /* Test and compare insns in i386.md store the information needed to
720 generate branch and scc insns here. */
722 rtx ix86_compare_op0 = NULL_RTX;
723 rtx ix86_compare_op1 = NULL_RTX;
725 #define MAX_386_STACK_LOCALS 3
726 /* Size of the register save area. */
727 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
729 /* Define the structure for the machine field in struct function. */
731 struct stack_local_entry GTY(())
736 struct stack_local_entry *next;
739 /* Structure describing stack frame layout.
740 Stack grows downward:
746 saved frame pointer if frame_pointer_needed
747 <- HARD_FRAME_POINTER
753 > to_allocate <- FRAME_POINTER
765 int outgoing_arguments_size;
768 HOST_WIDE_INT to_allocate;
769 /* The offsets relative to ARG_POINTER. */
770 HOST_WIDE_INT frame_pointer_offset;
771 HOST_WIDE_INT hard_frame_pointer_offset;
772 HOST_WIDE_INT stack_pointer_offset;
774 /* When save_regs_using_mov is set, emit prologue using
775 move instead of push instructions. */
776 bool save_regs_using_mov;
779 /* Used to enable/disable debugging features. */
780 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
781 /* Code model option as passed by user. */
782 const char *ix86_cmodel_string;
784 enum cmodel ix86_cmodel;
786 const char *ix86_asm_string;
787 enum asm_dialect ix86_asm_dialect = ASM_ATT;
789 const char *ix86_tls_dialect_string;
790 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
792 /* Which unit we are generating floating point math for. */
793 enum fpmath_unit ix86_fpmath;
795 /* Which cpu are we scheduling for. */
796 enum processor_type ix86_tune;
797 /* Which instruction set architecture to use. */
798 enum processor_type ix86_arch;
800 /* Strings to hold which cpu and instruction set architecture to use. */
801 const char *ix86_tune_string; /* for -mtune=<xxx> */
802 const char *ix86_arch_string; /* for -march=<xxx> */
803 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
805 /* # of registers to use to pass arguments. */
806 const char *ix86_regparm_string;
808 /* true if sse prefetch instruction is not NOOP. */
809 int x86_prefetch_sse;
811 /* ix86_regparm_string as a number */
814 /* Alignment to use for loops and jumps: */
816 /* Power of two alignment for loops. */
817 const char *ix86_align_loops_string;
819 /* Power of two alignment for non-loop jumps. */
820 const char *ix86_align_jumps_string;
822 /* Power of two alignment for stack boundary in bytes. */
823 const char *ix86_preferred_stack_boundary_string;
825 /* Preferred alignment for stack boundary in bits. */
826 unsigned int ix86_preferred_stack_boundary;
828 /* Values 1-5: see jump.c */
829 int ix86_branch_cost;
830 const char *ix86_branch_cost_string;
832 /* Power of two alignment for functions. */
833 const char *ix86_align_funcs_string;
835 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
836 char internal_label_prefix[16];
837 int internal_label_prefix_len;
839 static void output_pic_addr_const (FILE *, rtx, int);
840 static void put_condition_code (enum rtx_code, enum machine_mode,
842 static const char *get_some_local_dynamic_name (void);
843 static int get_some_local_dynamic_name_1 (rtx *, void *);
844 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
845 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
847 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
848 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
850 static rtx get_thread_pointer (int);
851 static rtx legitimize_tls_address (rtx, enum tls_model, int);
852 static void get_pc_thunk_name (char [32], unsigned int);
853 static rtx gen_push (rtx);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static bool ix86_misaligned_mem_ok (enum machine_mode);
871 static void ix86_init_mmx_sse_builtins (void);
872 static rtx x86_this_parameter (tree);
873 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
874 HOST_WIDE_INT, tree);
875 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
876 static void x86_file_start (void);
877 static void ix86_reorg (void);
878 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
879 static tree ix86_build_builtin_va_list (void);
880 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
882 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
883 static bool ix86_vector_mode_supported_p (enum machine_mode);
885 static int ix86_address_cost (rtx);
886 static bool ix86_cannot_force_const_mem (rtx);
887 static rtx ix86_delegitimize_address (rtx);
889 struct builtin_description;
890 static rtx ix86_expand_sse_comi (const struct builtin_description *,
892 static rtx ix86_expand_sse_compare (const struct builtin_description *,
894 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
895 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
896 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
897 static rtx ix86_expand_store_builtin (enum insn_code, tree);
898 static rtx safe_vector_operand (rtx, enum machine_mode);
899 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
900 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
901 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
902 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
903 static int ix86_fp_comparison_cost (enum rtx_code code);
904 static unsigned int ix86_select_alt_pic_regnum (void);
905 static int ix86_save_reg (unsigned int, int);
906 static void ix86_compute_frame_layout (struct ix86_frame *);
907 static int ix86_comp_type_attributes (tree, tree);
908 static int ix86_function_regparm (tree, tree);
909 const struct attribute_spec ix86_attribute_table[];
910 static bool ix86_function_ok_for_sibcall (tree, tree);
911 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
912 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
913 static int ix86_value_regno (enum machine_mode);
914 static bool contains_128bit_aligned_vector_p (tree);
915 static rtx ix86_struct_value_rtx (tree, int);
916 static bool ix86_ms_bitfield_layout_p (tree);
917 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
918 static int extended_reg_mentioned_1 (rtx *, void *);
919 static bool ix86_rtx_costs (rtx, int, int, int *);
920 static int min_insn_size (rtx);
921 static tree ix86_md_asm_clobbers (tree clobbers);
922 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
923 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
926 /* This function is only used on Solaris. */
927 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
930 /* Register class used for passing given 64bit part of the argument.
931 These represent classes as documented by the PS ABI, with the exception
932 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
933 use SF or DFmode move instead of DImode to avoid reformatting penalties.
935 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
936 whenever possible (upper half does contain padding).
938 enum x86_64_reg_class
941 X86_64_INTEGER_CLASS,
942 X86_64_INTEGERSI_CLASS,
949 X86_64_COMPLEX_X87_CLASS,
952 static const char * const x86_64_reg_class_name[] = {
953 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
954 "sseup", "x87", "x87up", "cplx87", "no"
957 #define MAX_CLASSES 4
959 /* Table of constants used by fldpi, fldln2, etc.... */
960 static REAL_VALUE_TYPE ext_80387_constants_table [5];
961 static bool ext_80387_constants_init = 0;
962 static void init_ext_80387_constants (void);
964 /* Initialize the GCC target structure. */
965 #undef TARGET_ATTRIBUTE_TABLE
966 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
967 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
968 # undef TARGET_MERGE_DECL_ATTRIBUTES
969 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
972 #undef TARGET_COMP_TYPE_ATTRIBUTES
973 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
975 #undef TARGET_INIT_BUILTINS
976 #define TARGET_INIT_BUILTINS ix86_init_builtins
978 #undef TARGET_EXPAND_BUILTIN
979 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
981 #undef TARGET_ASM_FUNCTION_EPILOGUE
982 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
984 #undef TARGET_ASM_OPEN_PAREN
985 #define TARGET_ASM_OPEN_PAREN ""
986 #undef TARGET_ASM_CLOSE_PAREN
987 #define TARGET_ASM_CLOSE_PAREN ""
989 #undef TARGET_ASM_ALIGNED_HI_OP
990 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
991 #undef TARGET_ASM_ALIGNED_SI_OP
992 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
994 #undef TARGET_ASM_ALIGNED_DI_OP
995 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
998 #undef TARGET_ASM_UNALIGNED_HI_OP
999 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1000 #undef TARGET_ASM_UNALIGNED_SI_OP
1001 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1002 #undef TARGET_ASM_UNALIGNED_DI_OP
1003 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1005 #undef TARGET_SCHED_ADJUST_COST
1006 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1007 #undef TARGET_SCHED_ISSUE_RATE
1008 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1009 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1010 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1011 ia32_multipass_dfa_lookahead
1013 #undef TARGET_VECTORIZE_MISALIGNED_MEM_OK
1014 #define TARGET_VECTORIZE_MISALIGNED_MEM_OK ix86_misaligned_mem_ok
1016 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1017 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1020 #undef TARGET_HAVE_TLS
1021 #define TARGET_HAVE_TLS true
1023 #undef TARGET_CANNOT_FORCE_CONST_MEM
1024 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1026 #undef TARGET_DELEGITIMIZE_ADDRESS
1027 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1029 #undef TARGET_MS_BITFIELD_LAYOUT_P
1030 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1032 #undef TARGET_ASM_OUTPUT_MI_THUNK
1033 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1034 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1035 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1037 #undef TARGET_ASM_FILE_START
1038 #define TARGET_ASM_FILE_START x86_file_start
1040 #undef TARGET_RTX_COSTS
1041 #define TARGET_RTX_COSTS ix86_rtx_costs
1042 #undef TARGET_ADDRESS_COST
1043 #define TARGET_ADDRESS_COST ix86_address_cost
1045 #undef TARGET_FIXED_CONDITION_CODE_REGS
1046 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1047 #undef TARGET_CC_MODES_COMPATIBLE
1048 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1050 #undef TARGET_MACHINE_DEPENDENT_REORG
1051 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1053 #undef TARGET_BUILD_BUILTIN_VA_LIST
1054 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1056 #undef TARGET_MD_ASM_CLOBBERS
1057 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1059 #undef TARGET_PROMOTE_PROTOTYPES
1060 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1061 #undef TARGET_STRUCT_VALUE_RTX
1062 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1063 #undef TARGET_SETUP_INCOMING_VARARGS
1064 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1065 #undef TARGET_MUST_PASS_IN_STACK
1066 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1067 #undef TARGET_PASS_BY_REFERENCE
1068 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1070 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1071 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1073 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1074 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1076 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1077 #undef TARGET_INSERT_ATTRIBUTES
1078 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1081 struct gcc_target targetm = TARGET_INITIALIZER;
1084 /* The svr4 ABI for the i386 says that records and unions are returned
1086 #ifndef DEFAULT_PCC_STRUCT_RETURN
1087 #define DEFAULT_PCC_STRUCT_RETURN 1
1090 /* Sometimes certain combinations of command options do not make
1091 sense on a particular target machine. You can define a macro
1092 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1093 defined, is executed once just after all the command options have
1096 Don't use this macro to turn on various extra optimizations for
1097 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1100 override_options (void)
1103 int ix86_tune_defaulted = 0;
1105 /* Comes from final.c -- no real reason to change it. */
1106 #define MAX_CODE_ALIGN 16
1110 const struct processor_costs *cost; /* Processor costs */
1111 const int target_enable; /* Target flags to enable. */
1112 const int target_disable; /* Target flags to disable. */
1113 const int align_loop; /* Default alignments. */
1114 const int align_loop_max_skip;
1115 const int align_jump;
1116 const int align_jump_max_skip;
1117 const int align_func;
1119 const processor_target_table[PROCESSOR_max] =
1121 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1122 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1123 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1124 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1125 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1126 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1127 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1128 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1129 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1132 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1135 const char *const name; /* processor name or nickname. */
1136 const enum processor_type processor;
1137 const enum pta_flags
1143 PTA_PREFETCH_SSE = 16,
1149 const processor_alias_table[] =
1151 {"i386", PROCESSOR_I386, 0},
1152 {"i486", PROCESSOR_I486, 0},
1153 {"i586", PROCESSOR_PENTIUM, 0},
1154 {"pentium", PROCESSOR_PENTIUM, 0},
1155 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1156 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1157 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1158 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1159 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1160 {"i686", PROCESSOR_PENTIUMPRO, 0},
1161 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1162 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1163 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1164 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1165 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1166 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1167 | PTA_MMX | PTA_PREFETCH_SSE},
1168 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1169 | PTA_MMX | PTA_PREFETCH_SSE},
1170 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1171 | PTA_MMX | PTA_PREFETCH_SSE},
1172 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1173 | PTA_MMX | PTA_PREFETCH_SSE},
1174 {"k6", PROCESSOR_K6, PTA_MMX},
1175 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1176 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1177 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1179 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1180 | PTA_3DNOW | PTA_3DNOW_A},
1181 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1182 | PTA_3DNOW_A | PTA_SSE},
1183 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1184 | PTA_3DNOW_A | PTA_SSE},
1185 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1186 | PTA_3DNOW_A | PTA_SSE},
1187 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1188 | PTA_SSE | PTA_SSE2 },
1189 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1190 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1191 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1192 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1193 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1194 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1195 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1196 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1199 int const pta_size = ARRAY_SIZE (processor_alias_table);
1201 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1202 SUBTARGET_OVERRIDE_OPTIONS;
1205 /* Set the default values for switches whose default depends on TARGET_64BIT
1206 in case they weren't overwritten by command line options. */
1209 if (flag_omit_frame_pointer == 2)
1210 flag_omit_frame_pointer = 1;
1211 if (flag_asynchronous_unwind_tables == 2)
1212 flag_asynchronous_unwind_tables = 1;
1213 if (flag_pcc_struct_return == 2)
1214 flag_pcc_struct_return = 0;
1218 if (flag_omit_frame_pointer == 2)
1219 flag_omit_frame_pointer = 0;
1220 if (flag_asynchronous_unwind_tables == 2)
1221 flag_asynchronous_unwind_tables = 0;
1222 if (flag_pcc_struct_return == 2)
1223 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1226 if (!ix86_tune_string && ix86_arch_string)
1227 ix86_tune_string = ix86_arch_string;
1228 if (!ix86_tune_string)
1230 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1231 ix86_tune_defaulted = 1;
1233 if (!ix86_arch_string)
1234 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1236 if (ix86_cmodel_string != 0)
1238 if (!strcmp (ix86_cmodel_string, "small"))
1239 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1241 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1242 else if (!strcmp (ix86_cmodel_string, "32"))
1243 ix86_cmodel = CM_32;
1244 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1245 ix86_cmodel = CM_KERNEL;
1246 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1247 ix86_cmodel = CM_MEDIUM;
1248 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1249 ix86_cmodel = CM_LARGE;
1251 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1255 ix86_cmodel = CM_32;
1257 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1259 if (ix86_asm_string != 0)
1261 if (!strcmp (ix86_asm_string, "intel"))
1262 ix86_asm_dialect = ASM_INTEL;
1263 else if (!strcmp (ix86_asm_string, "att"))
1264 ix86_asm_dialect = ASM_ATT;
1266 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1268 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1269 error ("code model %qs not supported in the %s bit mode",
1270 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1271 if (ix86_cmodel == CM_LARGE)
1272 sorry ("code model %<large%> not supported yet");
1273 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1274 sorry ("%i-bit mode not compiled in",
1275 (target_flags & MASK_64BIT) ? 64 : 32);
1277 for (i = 0; i < pta_size; i++)
1278 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1280 ix86_arch = processor_alias_table[i].processor;
1281 /* Default cpu tuning to the architecture. */
1282 ix86_tune = ix86_arch;
1283 if (processor_alias_table[i].flags & PTA_MMX
1284 && !(target_flags_explicit & MASK_MMX))
1285 target_flags |= MASK_MMX;
1286 if (processor_alias_table[i].flags & PTA_3DNOW
1287 && !(target_flags_explicit & MASK_3DNOW))
1288 target_flags |= MASK_3DNOW;
1289 if (processor_alias_table[i].flags & PTA_3DNOW_A
1290 && !(target_flags_explicit & MASK_3DNOW_A))
1291 target_flags |= MASK_3DNOW_A;
1292 if (processor_alias_table[i].flags & PTA_SSE
1293 && !(target_flags_explicit & MASK_SSE))
1294 target_flags |= MASK_SSE;
1295 if (processor_alias_table[i].flags & PTA_SSE2
1296 && !(target_flags_explicit & MASK_SSE2))
1297 target_flags |= MASK_SSE2;
1298 if (processor_alias_table[i].flags & PTA_SSE3
1299 && !(target_flags_explicit & MASK_SSE3))
1300 target_flags |= MASK_SSE3;
1301 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1302 x86_prefetch_sse = true;
1303 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1304 error ("CPU you selected does not support x86-64 "
1310 error ("bad value (%s) for -march= switch", ix86_arch_string);
1312 for (i = 0; i < pta_size; i++)
1313 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1315 ix86_tune = processor_alias_table[i].processor;
1316 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1318 if (ix86_tune_defaulted)
1320 ix86_tune_string = "x86-64";
1321 for (i = 0; i < pta_size; i++)
1322 if (! strcmp (ix86_tune_string,
1323 processor_alias_table[i].name))
1325 ix86_tune = processor_alias_table[i].processor;
1328 error ("CPU you selected does not support x86-64 "
1331 /* Intel CPUs have always interpreted SSE prefetch instructions as
1332 NOPs; so, we can enable SSE prefetch instructions even when
1333 -mtune (rather than -march) points us to a processor that has them.
1334 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1335 higher processors. */
1336 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1337 x86_prefetch_sse = true;
1341 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1344 ix86_cost = &size_cost;
1346 ix86_cost = processor_target_table[ix86_tune].cost;
1347 target_flags |= processor_target_table[ix86_tune].target_enable;
1348 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1350 /* Arrange to set up i386_stack_locals for all functions. */
1351 init_machine_status = ix86_init_machine_status;
1353 /* Validate -mregparm= value. */
1354 if (ix86_regparm_string)
1356 i = atoi (ix86_regparm_string);
1357 if (i < 0 || i > REGPARM_MAX)
1358 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1364 ix86_regparm = REGPARM_MAX;
1366 /* If the user has provided any of the -malign-* options,
1367 warn and use that value only if -falign-* is not set.
1368 Remove this code in GCC 3.2 or later. */
1369 if (ix86_align_loops_string)
1371 warning ("-malign-loops is obsolete, use -falign-loops");
1372 if (align_loops == 0)
1374 i = atoi (ix86_align_loops_string);
1375 if (i < 0 || i > MAX_CODE_ALIGN)
1376 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1378 align_loops = 1 << i;
1382 if (ix86_align_jumps_string)
1384 warning ("-malign-jumps is obsolete, use -falign-jumps");
1385 if (align_jumps == 0)
1387 i = atoi (ix86_align_jumps_string);
1388 if (i < 0 || i > MAX_CODE_ALIGN)
1389 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1391 align_jumps = 1 << i;
1395 if (ix86_align_funcs_string)
1397 warning ("-malign-functions is obsolete, use -falign-functions");
1398 if (align_functions == 0)
1400 i = atoi (ix86_align_funcs_string);
1401 if (i < 0 || i > MAX_CODE_ALIGN)
1402 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1404 align_functions = 1 << i;
1408 /* Default align_* from the processor table. */
1409 if (align_loops == 0)
1411 align_loops = processor_target_table[ix86_tune].align_loop;
1412 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1414 if (align_jumps == 0)
1416 align_jumps = processor_target_table[ix86_tune].align_jump;
1417 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1419 if (align_functions == 0)
1421 align_functions = processor_target_table[ix86_tune].align_func;
1424 /* Validate -mpreferred-stack-boundary= value, or provide default.
1425 The default of 128 bits is for Pentium III's SSE __m128, but we
1426 don't want additional code to keep the stack aligned when
1427 optimizing for code size. */
1428 ix86_preferred_stack_boundary = (optimize_size
1429 ? TARGET_64BIT ? 128 : 32
1431 if (ix86_preferred_stack_boundary_string)
1433 i = atoi (ix86_preferred_stack_boundary_string);
1434 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1435 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1436 TARGET_64BIT ? 4 : 2);
1438 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1441 /* Validate -mbranch-cost= value, or provide default. */
1442 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1443 if (ix86_branch_cost_string)
1445 i = atoi (ix86_branch_cost_string);
1447 error ("-mbranch-cost=%d is not between 0 and 5", i);
1449 ix86_branch_cost = i;
1452 if (ix86_tls_dialect_string)
1454 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1455 ix86_tls_dialect = TLS_DIALECT_GNU;
1456 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1457 ix86_tls_dialect = TLS_DIALECT_SUN;
1459 error ("bad value (%s) for -mtls-dialect= switch",
1460 ix86_tls_dialect_string);
1463 /* Keep nonleaf frame pointers. */
1464 if (flag_omit_frame_pointer)
1465 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1466 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1467 flag_omit_frame_pointer = 1;
1469 /* If we're doing fast math, we don't care about comparison order
1470 wrt NaNs. This lets us use a shorter comparison sequence. */
1471 if (flag_unsafe_math_optimizations)
1472 target_flags &= ~MASK_IEEE_FP;
1474 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1475 since the insns won't need emulation. */
1476 if (!(target_flags_explicit & MASK_NO_FANCY_MATH_387)
1477 && (x86_arch_always_fancy_math_387 & (1 << ix86_arch)))
1478 target_flags &= ~MASK_NO_FANCY_MATH_387;
1480 /* Likewise, if the target doesn't have a 387, or we've specified
1481 software floating point, don't use 387 inline instrinsics. */
1483 target_flags |= MASK_NO_FANCY_MATH_387;
1485 /* Turn on SSE2 builtins for -msse3. */
1487 target_flags |= MASK_SSE2;
1489 /* Turn on SSE builtins for -msse2. */
1491 target_flags |= MASK_SSE;
1493 /* Turn on MMX builtins for -msse. */
1496 target_flags |= MASK_MMX & ~target_flags_explicit;
1497 x86_prefetch_sse = true;
1500 /* Turn on MMX builtins for 3Dnow. */
1502 target_flags |= MASK_MMX;
1506 if (TARGET_ALIGN_DOUBLE)
1507 error ("-malign-double makes no sense in the 64bit mode");
1509 error ("-mrtd calling convention not supported in the 64bit mode");
1511 /* Enable by default the SSE and MMX builtins. Do allow the user to
1512 explicitly disable any of these. In particular, disabling SSE and
1513 MMX for kernel code is extremely useful. */
1515 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1516 & ~target_flags_explicit);
1519 ix86_fpmath = FPMATH_SSE;
1523 ix86_fpmath = FPMATH_387;
1524 /* i386 ABI does not specify red zone. It still makes sense to use it
1525 when programmer takes care to stack from being destroyed. */
1526 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1527 target_flags |= MASK_NO_RED_ZONE;
1530 if (ix86_fpmath_string != 0)
1532 if (! strcmp (ix86_fpmath_string, "387"))
1533 ix86_fpmath = FPMATH_387;
1534 else if (! strcmp (ix86_fpmath_string, "sse"))
1538 warning ("SSE instruction set disabled, using 387 arithmetics");
1539 ix86_fpmath = FPMATH_387;
1542 ix86_fpmath = FPMATH_SSE;
1544 else if (! strcmp (ix86_fpmath_string, "387,sse")
1545 || ! strcmp (ix86_fpmath_string, "sse,387"))
1549 warning ("SSE instruction set disabled, using 387 arithmetics");
1550 ix86_fpmath = FPMATH_387;
1552 else if (!TARGET_80387)
1554 warning ("387 instruction set disabled, using SSE arithmetics");
1555 ix86_fpmath = FPMATH_SSE;
1558 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1561 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1564 /* If fpmath doesn't include 387, disable use of x87 intrinsics. */
1565 if (! (ix86_fpmath & FPMATH_387))
1566 target_flags |= MASK_NO_FANCY_MATH_387;
1568 if ((x86_accumulate_outgoing_args & TUNEMASK)
1569 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1571 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1573 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1576 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1577 p = strchr (internal_label_prefix, 'X');
1578 internal_label_prefix_len = p - internal_label_prefix;
1582 /* When scheduling description is not available, disable scheduler pass
1583 so it won't slow down the compilation and make x87 code slower. */
1584 if (!TARGET_SCHEDULE)
1585 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1589 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1591 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1592 make the problem with not enough registers even worse. */
1593 #ifdef INSN_SCHEDULING
1595 flag_schedule_insns = 0;
1598 /* The default values of these switches depend on the TARGET_64BIT
1599 that is not known at this moment. Mark these values with 2 and
1600 let user the to override these. In case there is no command line option
1601 specifying them, we will set the defaults in override_options. */
1603 flag_omit_frame_pointer = 2;
1604 flag_pcc_struct_return = 2;
1605 flag_asynchronous_unwind_tables = 2;
1606 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1607 SUBTARGET_OPTIMIZATION_OPTIONS;
1611 /* Table of valid machine attributes. */
1612 const struct attribute_spec ix86_attribute_table[] =
1614 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1615 /* Stdcall attribute says callee is responsible for popping arguments
1616 if they are not variable. */
1617 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1618 /* Fastcall attribute says callee is responsible for popping arguments
1619 if they are not variable. */
1620 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1621 /* Cdecl attribute says the callee is a normal C declaration */
1622 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1623 /* Regparm attribute specifies how many integer arguments are to be
1624 passed in registers. */
1625 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1626 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1627 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1628 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1629 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1631 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1632 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1633 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1634 SUBTARGET_ATTRIBUTE_TABLE,
1636 { NULL, 0, 0, false, false, false, NULL }
1639 /* Decide whether we can make a sibling call to a function. DECL is the
1640 declaration of the function being targeted by the call and EXP is the
1641 CALL_EXPR representing the call. */
1644 ix86_function_ok_for_sibcall (tree decl, tree exp)
1646 /* If we are generating position-independent code, we cannot sibcall
1647 optimize any indirect call, or a direct call to a global function,
1648 as the PLT requires %ebx be live. */
1649 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1652 /* If we are returning floats on the 80387 register stack, we cannot
1653 make a sibcall from a function that doesn't return a float to a
1654 function that does or, conversely, from a function that does return
1655 a float to a function that doesn't; the necessary stack adjustment
1656 would not be executed. */
1657 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1658 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1661 /* If this call is indirect, we'll need to be able to use a call-clobbered
1662 register for the address of the target function. Make sure that all
1663 such registers are not used for passing parameters. */
1664 if (!decl && !TARGET_64BIT)
1668 /* We're looking at the CALL_EXPR, we need the type of the function. */
1669 type = TREE_OPERAND (exp, 0); /* pointer expression */
1670 type = TREE_TYPE (type); /* pointer type */
1671 type = TREE_TYPE (type); /* function type */
1673 if (ix86_function_regparm (type, NULL) >= 3)
1675 /* ??? Need to count the actual number of registers to be used,
1676 not the possible number of registers. Fix later. */
1681 /* Otherwise okay. That also includes certain types of indirect calls. */
1685 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1686 arguments as in struct attribute_spec.handler. */
1688 ix86_handle_cdecl_attribute (tree *node, tree name,
1689 tree args ATTRIBUTE_UNUSED,
1690 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1692 if (TREE_CODE (*node) != FUNCTION_TYPE
1693 && TREE_CODE (*node) != METHOD_TYPE
1694 && TREE_CODE (*node) != FIELD_DECL
1695 && TREE_CODE (*node) != TYPE_DECL)
1697 warning ("%qs attribute only applies to functions",
1698 IDENTIFIER_POINTER (name));
1699 *no_add_attrs = true;
1703 if (is_attribute_p ("fastcall", name))
1705 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1707 error ("fastcall and stdcall attributes are not compatible");
1709 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1711 error ("fastcall and regparm attributes are not compatible");
1714 else if (is_attribute_p ("stdcall", name))
1716 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1718 error ("fastcall and stdcall attributes are not compatible");
1725 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1726 *no_add_attrs = true;
1732 /* Handle a "regparm" attribute;
1733 arguments as in struct attribute_spec.handler. */
1735 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1736 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1738 if (TREE_CODE (*node) != FUNCTION_TYPE
1739 && TREE_CODE (*node) != METHOD_TYPE
1740 && TREE_CODE (*node) != FIELD_DECL
1741 && TREE_CODE (*node) != TYPE_DECL)
1743 warning ("%qs attribute only applies to functions",
1744 IDENTIFIER_POINTER (name));
1745 *no_add_attrs = true;
1751 cst = TREE_VALUE (args);
1752 if (TREE_CODE (cst) != INTEGER_CST)
1754 warning ("%qs attribute requires an integer constant argument",
1755 IDENTIFIER_POINTER (name));
1756 *no_add_attrs = true;
1758 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1760 warning ("argument to %qs attribute larger than %d",
1761 IDENTIFIER_POINTER (name), REGPARM_MAX);
1762 *no_add_attrs = true;
1765 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1767 error ("fastcall and regparm attributes are not compatible");
1774 /* Return 0 if the attributes for two types are incompatible, 1 if they
1775 are compatible, and 2 if they are nearly compatible (which causes a
1776 warning to be generated). */
1779 ix86_comp_type_attributes (tree type1, tree type2)
1781 /* Check for mismatch of non-default calling convention. */
1782 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1784 if (TREE_CODE (type1) != FUNCTION_TYPE)
1787 /* Check for mismatched fastcall types */
1788 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1789 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1792 /* Check for mismatched return types (cdecl vs stdcall). */
1793 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1794 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1796 if (ix86_function_regparm (type1, NULL)
1797 != ix86_function_regparm (type2, NULL))
1802 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1803 DECL may be NULL when calling function indirectly
1804 or considering a libcall. */
1807 ix86_function_regparm (tree type, tree decl)
1810 int regparm = ix86_regparm;
1811 bool user_convention = false;
1815 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1818 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1819 user_convention = true;
1822 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1825 user_convention = true;
1828 /* Use register calling convention for local functions when possible. */
1829 if (!TARGET_64BIT && !user_convention && decl
1830 && flag_unit_at_a_time && !profile_flag)
1832 struct cgraph_local_info *i = cgraph_local_info (decl);
1835 /* We can't use regparm(3) for nested functions as these use
1836 static chain pointer in third argument. */
1837 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1847 /* Return true if EAX is live at the start of the function. Used by
1848 ix86_expand_prologue to determine if we need special help before
1849 calling allocate_stack_worker. */
1852 ix86_eax_live_at_start_p (void)
1854 /* Cheat. Don't bother working forward from ix86_function_regparm
1855 to the function type to whether an actual argument is located in
1856 eax. Instead just look at cfg info, which is still close enough
1857 to correct at this point. This gives false positives for broken
1858 functions that might use uninitialized data that happens to be
1859 allocated in eax, but who cares? */
1860 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1863 /* Value is the number of bytes of arguments automatically
1864 popped when returning from a subroutine call.
1865 FUNDECL is the declaration node of the function (as a tree),
1866 FUNTYPE is the data type of the function (as a tree),
1867 or for a library call it is an identifier node for the subroutine name.
1868 SIZE is the number of bytes of arguments passed on the stack.
1870 On the 80386, the RTD insn may be used to pop them if the number
1871 of args is fixed, but if the number is variable then the caller
1872 must pop them all. RTD can't be used for library calls now
1873 because the library is compiled with the Unix compiler.
1874 Use of RTD is a selectable option, since it is incompatible with
1875 standard Unix calling sequences. If the option is not selected,
1876 the caller must always pop the args.
1878 The attribute stdcall is equivalent to RTD on a per module basis. */
1881 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1883 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1885 /* Cdecl functions override -mrtd, and never pop the stack. */
1886 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1888 /* Stdcall and fastcall functions will pop the stack if not
1890 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1891 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1895 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1896 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1897 == void_type_node)))
1901 /* Lose any fake structure return argument if it is passed on the stack. */
1902 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1904 && !KEEP_AGGREGATE_RETURN_POINTER)
1906 int nregs = ix86_function_regparm (funtype, fundecl);
1909 return GET_MODE_SIZE (Pmode);
1915 /* Argument support functions. */
1917 /* Return true when register may be used to pass function parameters. */
1919 ix86_function_arg_regno_p (int regno)
1923 return (regno < REGPARM_MAX
1924 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1925 if (SSE_REGNO_P (regno) && TARGET_SSE)
1927 /* RAX is used as hidden argument to va_arg functions. */
1930 for (i = 0; i < REGPARM_MAX; i++)
1931 if (regno == x86_64_int_parameter_registers[i])
1936 /* Return if we do not know how to pass TYPE solely in registers. */
1939 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1941 if (must_pass_in_stack_var_size_or_pad (mode, type))
1944 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1945 The layout_type routine is crafty and tries to trick us into passing
1946 currently unsupported vector types on the stack by using TImode. */
1947 return (!TARGET_64BIT && mode == TImode
1948 && type && TREE_CODE (type) != VECTOR_TYPE);
1951 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1952 for a call to a function whose data type is FNTYPE.
1953 For a library call, FNTYPE is 0. */
1956 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1957 tree fntype, /* tree ptr for function decl */
1958 rtx libname, /* SYMBOL_REF of library name or 0 */
1961 static CUMULATIVE_ARGS zero_cum;
1962 tree param, next_param;
1964 if (TARGET_DEBUG_ARG)
1966 fprintf (stderr, "\ninit_cumulative_args (");
1968 fprintf (stderr, "fntype code = %s, ret code = %s",
1969 tree_code_name[(int) TREE_CODE (fntype)],
1970 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1972 fprintf (stderr, "no fntype");
1975 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1980 /* Set up the number of registers to use for passing arguments. */
1982 cum->nregs = ix86_function_regparm (fntype, fndecl);
1984 cum->nregs = ix86_regparm;
1986 cum->sse_nregs = SSE_REGPARM_MAX;
1988 cum->mmx_nregs = MMX_REGPARM_MAX;
1989 cum->warn_sse = true;
1990 cum->warn_mmx = true;
1991 cum->maybe_vaarg = false;
1993 /* Use ecx and edx registers if function has fastcall attribute */
1994 if (fntype && !TARGET_64BIT)
1996 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2003 /* Determine if this function has variable arguments. This is
2004 indicated by the last argument being 'void_type_mode' if there
2005 are no variable arguments. If there are variable arguments, then
2006 we won't pass anything in registers in 32-bit mode. */
2008 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2010 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2011 param != 0; param = next_param)
2013 next_param = TREE_CHAIN (param);
2014 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2025 cum->maybe_vaarg = true;
2029 if ((!fntype && !libname)
2030 || (fntype && !TYPE_ARG_TYPES (fntype)))
2031 cum->maybe_vaarg = 1;
2033 if (TARGET_DEBUG_ARG)
2034 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2039 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2040 But in the case of vector types, it is some vector mode.
2042 When we have only some of our vector isa extensions enabled, then there
2043 are some modes for which vector_mode_supported_p is false. For these
2044 modes, the generic vector support in gcc will choose some non-vector mode
2045 in order to implement the type. By computing the natural mode, we'll
2046 select the proper ABI location for the operand and not depend on whatever
2047 the middle-end decides to do with these vector types. */
2049 static enum machine_mode
2050 type_natural_mode (tree type)
2052 enum machine_mode mode = TYPE_MODE (type);
2054 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2056 HOST_WIDE_INT size = int_size_in_bytes (type);
2057 if ((size == 8 || size == 16)
2058 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2059 && TYPE_VECTOR_SUBPARTS (type) > 1)
2061 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2063 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2064 mode = MIN_MODE_VECTOR_FLOAT;
2066 mode = MIN_MODE_VECTOR_INT;
2068 /* Get the mode which has this inner mode and number of units. */
2069 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2070 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2071 && GET_MODE_INNER (mode) == innermode)
2081 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2082 this may not agree with the mode that the type system has chosen for the
2083 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2084 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2087 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2092 if (orig_mode != BLKmode)
2093 tmp = gen_rtx_REG (orig_mode, regno);
2096 tmp = gen_rtx_REG (mode, regno);
2097 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2098 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2104 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2105 of this code is to classify each 8bytes of incoming argument by the register
2106 class and assign registers accordingly. */
2108 /* Return the union class of CLASS1 and CLASS2.
2109 See the x86-64 PS ABI for details. */
2111 static enum x86_64_reg_class
2112 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2114 /* Rule #1: If both classes are equal, this is the resulting class. */
2115 if (class1 == class2)
2118 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2120 if (class1 == X86_64_NO_CLASS)
2122 if (class2 == X86_64_NO_CLASS)
2125 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2126 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2127 return X86_64_MEMORY_CLASS;
2129 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2130 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2131 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2132 return X86_64_INTEGERSI_CLASS;
2133 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2134 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2135 return X86_64_INTEGER_CLASS;
2137 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2139 if (class1 == X86_64_X87_CLASS
2140 || class1 == X86_64_X87UP_CLASS
2141 || class1 == X86_64_COMPLEX_X87_CLASS
2142 || class2 == X86_64_X87_CLASS
2143 || class2 == X86_64_X87UP_CLASS
2144 || class2 == X86_64_COMPLEX_X87_CLASS)
2145 return X86_64_MEMORY_CLASS;
2147 /* Rule #6: Otherwise class SSE is used. */
2148 return X86_64_SSE_CLASS;
2151 /* Classify the argument of type TYPE and mode MODE.
2152 CLASSES will be filled by the register class used to pass each word
2153 of the operand. The number of words is returned. In case the parameter
2154 should be passed in memory, 0 is returned. As a special case for zero
2155 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2157 BIT_OFFSET is used internally for handling records and specifies offset
2158 of the offset in bits modulo 256 to avoid overflow cases.
2160 See the x86-64 PS ABI for details.
2164 classify_argument (enum machine_mode mode, tree type,
2165 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2167 HOST_WIDE_INT bytes =
2168 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2169 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2171 /* Variable sized entities are always passed/returned in memory. */
2175 if (mode != VOIDmode
2176 && targetm.calls.must_pass_in_stack (mode, type))
2179 if (type && AGGREGATE_TYPE_P (type))
2183 enum x86_64_reg_class subclasses[MAX_CLASSES];
2185 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2189 for (i = 0; i < words; i++)
2190 classes[i] = X86_64_NO_CLASS;
2192 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2193 signalize memory class, so handle it as special case. */
2196 classes[0] = X86_64_NO_CLASS;
2200 /* Classify each field of record and merge classes. */
2201 if (TREE_CODE (type) == RECORD_TYPE)
2203 /* For classes first merge in the field of the subclasses. */
2204 if (TYPE_BINFO (type))
2206 tree binfo, base_binfo;
2209 for (binfo = TYPE_BINFO (type), basenum = 0;
2210 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2213 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2214 tree type = BINFO_TYPE (base_binfo);
2216 num = classify_argument (TYPE_MODE (type),
2218 (offset + bit_offset) % 256);
2221 for (i = 0; i < num; i++)
2223 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2225 merge_classes (subclasses[i], classes[i + pos]);
2229 /* And now merge the fields of structure. */
2230 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2232 if (TREE_CODE (field) == FIELD_DECL)
2236 /* Bitfields are always classified as integer. Handle them
2237 early, since later code would consider them to be
2238 misaligned integers. */
2239 if (DECL_BIT_FIELD (field))
2241 for (i = int_bit_position (field) / 8 / 8;
2242 i < (int_bit_position (field)
2243 + tree_low_cst (DECL_SIZE (field), 0)
2246 merge_classes (X86_64_INTEGER_CLASS,
2251 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2252 TREE_TYPE (field), subclasses,
2253 (int_bit_position (field)
2254 + bit_offset) % 256);
2257 for (i = 0; i < num; i++)
2260 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2262 merge_classes (subclasses[i], classes[i + pos]);
2268 /* Arrays are handled as small records. */
2269 else if (TREE_CODE (type) == ARRAY_TYPE)
2272 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2273 TREE_TYPE (type), subclasses, bit_offset);
2277 /* The partial classes are now full classes. */
2278 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2279 subclasses[0] = X86_64_SSE_CLASS;
2280 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2281 subclasses[0] = X86_64_INTEGER_CLASS;
2283 for (i = 0; i < words; i++)
2284 classes[i] = subclasses[i % num];
2286 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2287 else if (TREE_CODE (type) == UNION_TYPE
2288 || TREE_CODE (type) == QUAL_UNION_TYPE)
2290 /* For classes first merge in the field of the subclasses. */
2291 if (TYPE_BINFO (type))
2293 tree binfo, base_binfo;
2296 for (binfo = TYPE_BINFO (type), basenum = 0;
2297 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2300 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2301 tree type = BINFO_TYPE (base_binfo);
2303 num = classify_argument (TYPE_MODE (type),
2305 (offset + (bit_offset % 64)) % 256);
2308 for (i = 0; i < num; i++)
2310 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2312 merge_classes (subclasses[i], classes[i + pos]);
2316 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2318 if (TREE_CODE (field) == FIELD_DECL)
2321 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2322 TREE_TYPE (field), subclasses,
2326 for (i = 0; i < num; i++)
2327 classes[i] = merge_classes (subclasses[i], classes[i]);
2334 /* Final merger cleanup. */
2335 for (i = 0; i < words; i++)
2337 /* If one class is MEMORY, everything should be passed in
2339 if (classes[i] == X86_64_MEMORY_CLASS)
2342 /* The X86_64_SSEUP_CLASS should be always preceded by
2343 X86_64_SSE_CLASS. */
2344 if (classes[i] == X86_64_SSEUP_CLASS
2345 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2346 classes[i] = X86_64_SSE_CLASS;
2348 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2349 if (classes[i] == X86_64_X87UP_CLASS
2350 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2351 classes[i] = X86_64_SSE_CLASS;
2356 /* Compute alignment needed. We align all types to natural boundaries with
2357 exception of XFmode that is aligned to 64bits. */
2358 if (mode != VOIDmode && mode != BLKmode)
2360 int mode_alignment = GET_MODE_BITSIZE (mode);
2363 mode_alignment = 128;
2364 else if (mode == XCmode)
2365 mode_alignment = 256;
2366 if (COMPLEX_MODE_P (mode))
2367 mode_alignment /= 2;
2368 /* Misaligned fields are always returned in memory. */
2369 if (bit_offset % mode_alignment)
2373 /* for V1xx modes, just use the base mode */
2374 if (VECTOR_MODE_P (mode)
2375 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2376 mode = GET_MODE_INNER (mode);
2378 /* Classification of atomic types. */
2388 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2389 classes[0] = X86_64_INTEGERSI_CLASS;
2391 classes[0] = X86_64_INTEGER_CLASS;
2395 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2400 if (!(bit_offset % 64))
2401 classes[0] = X86_64_SSESF_CLASS;
2403 classes[0] = X86_64_SSE_CLASS;
2406 classes[0] = X86_64_SSEDF_CLASS;
2409 classes[0] = X86_64_X87_CLASS;
2410 classes[1] = X86_64_X87UP_CLASS;
2413 classes[0] = X86_64_SSE_CLASS;
2414 classes[1] = X86_64_SSEUP_CLASS;
2417 classes[0] = X86_64_SSE_CLASS;
2420 classes[0] = X86_64_SSEDF_CLASS;
2421 classes[1] = X86_64_SSEDF_CLASS;
2424 classes[0] = X86_64_COMPLEX_X87_CLASS;
2427 /* This modes is larger than 16 bytes. */
2435 classes[0] = X86_64_SSE_CLASS;
2436 classes[1] = X86_64_SSEUP_CLASS;
2442 classes[0] = X86_64_SSE_CLASS;
2448 if (VECTOR_MODE_P (mode))
2452 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2454 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2455 classes[0] = X86_64_INTEGERSI_CLASS;
2457 classes[0] = X86_64_INTEGER_CLASS;
2458 classes[1] = X86_64_INTEGER_CLASS;
2459 return 1 + (bytes > 8);
2466 /* Examine the argument and return set number of register required in each
2467 class. Return 0 iff parameter should be passed in memory. */
2469 examine_argument (enum machine_mode mode, tree type, int in_return,
2470 int *int_nregs, int *sse_nregs)
2472 enum x86_64_reg_class class[MAX_CLASSES];
2473 int n = classify_argument (mode, type, class, 0);
2479 for (n--; n >= 0; n--)
2482 case X86_64_INTEGER_CLASS:
2483 case X86_64_INTEGERSI_CLASS:
2486 case X86_64_SSE_CLASS:
2487 case X86_64_SSESF_CLASS:
2488 case X86_64_SSEDF_CLASS:
2491 case X86_64_NO_CLASS:
2492 case X86_64_SSEUP_CLASS:
2494 case X86_64_X87_CLASS:
2495 case X86_64_X87UP_CLASS:
2499 case X86_64_COMPLEX_X87_CLASS:
2500 return in_return ? 2 : 0;
2501 case X86_64_MEMORY_CLASS:
2507 /* Construct container for the argument used by GCC interface. See
2508 FUNCTION_ARG for the detailed description. */
2511 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2512 tree type, int in_return, int nintregs, int nsseregs,
2513 const int *intreg, int sse_regno)
2515 enum machine_mode tmpmode;
2517 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2518 enum x86_64_reg_class class[MAX_CLASSES];
2522 int needed_sseregs, needed_intregs;
2523 rtx exp[MAX_CLASSES];
2526 n = classify_argument (mode, type, class, 0);
2527 if (TARGET_DEBUG_ARG)
2530 fprintf (stderr, "Memory class\n");
2533 fprintf (stderr, "Classes:");
2534 for (i = 0; i < n; i++)
2536 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2538 fprintf (stderr, "\n");
2543 if (!examine_argument (mode, type, in_return, &needed_intregs,
2546 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2549 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2550 some less clueful developer tries to use floating-point anyway. */
2551 if (needed_sseregs && !TARGET_SSE)
2553 static bool issued_error;
2556 issued_error = true;
2558 error ("SSE register return with SSE disabled");
2560 error ("SSE register argument with SSE disabled");
2565 /* First construct simple cases. Avoid SCmode, since we want to use
2566 single register to pass this type. */
2567 if (n == 1 && mode != SCmode)
2570 case X86_64_INTEGER_CLASS:
2571 case X86_64_INTEGERSI_CLASS:
2572 return gen_rtx_REG (mode, intreg[0]);
2573 case X86_64_SSE_CLASS:
2574 case X86_64_SSESF_CLASS:
2575 case X86_64_SSEDF_CLASS:
2576 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2577 case X86_64_X87_CLASS:
2578 case X86_64_COMPLEX_X87_CLASS:
2579 return gen_rtx_REG (mode, FIRST_STACK_REG);
2580 case X86_64_NO_CLASS:
2581 /* Zero sized array, struct or class. */
2586 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2588 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2590 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2591 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2592 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2593 && class[1] == X86_64_INTEGER_CLASS
2594 && (mode == CDImode || mode == TImode || mode == TFmode)
2595 && intreg[0] + 1 == intreg[1])
2596 return gen_rtx_REG (mode, intreg[0]);
2598 /* Otherwise figure out the entries of the PARALLEL. */
2599 for (i = 0; i < n; i++)
2603 case X86_64_NO_CLASS:
2605 case X86_64_INTEGER_CLASS:
2606 case X86_64_INTEGERSI_CLASS:
2607 /* Merge TImodes on aligned occasions here too. */
2608 if (i * 8 + 8 > bytes)
2609 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2610 else if (class[i] == X86_64_INTEGERSI_CLASS)
2614 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2615 if (tmpmode == BLKmode)
2617 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2618 gen_rtx_REG (tmpmode, *intreg),
2622 case X86_64_SSESF_CLASS:
2623 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2624 gen_rtx_REG (SFmode,
2625 SSE_REGNO (sse_regno)),
2629 case X86_64_SSEDF_CLASS:
2630 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2631 gen_rtx_REG (DFmode,
2632 SSE_REGNO (sse_regno)),
2636 case X86_64_SSE_CLASS:
2637 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2641 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2642 gen_rtx_REG (tmpmode,
2643 SSE_REGNO (sse_regno)),
2645 if (tmpmode == TImode)
2653 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2654 for (i = 0; i < nexps; i++)
2655 XVECEXP (ret, 0, i) = exp [i];
2659 /* Update the data in CUM to advance over an argument
2660 of mode MODE and data type TYPE.
2661 (TYPE is null for libcalls where that information may not be available.) */
2664 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2665 tree type, int named)
2668 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2669 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2671 if (TARGET_DEBUG_ARG)
2672 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2673 "mode=%s, named=%d)\n\n",
2674 words, cum->words, cum->nregs, cum->sse_nregs,
2675 GET_MODE_NAME (mode), named);
2678 int int_nregs, sse_nregs;
2679 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2680 cum->words += words;
2681 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2683 cum->nregs -= int_nregs;
2684 cum->sse_nregs -= sse_nregs;
2685 cum->regno += int_nregs;
2686 cum->sse_regno += sse_nregs;
2689 cum->words += words;
2693 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2694 && (!type || !AGGREGATE_TYPE_P (type)))
2696 cum->sse_words += words;
2697 cum->sse_nregs -= 1;
2698 cum->sse_regno += 1;
2699 if (cum->sse_nregs <= 0)
2705 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2706 && (!type || !AGGREGATE_TYPE_P (type)))
2708 cum->mmx_words += words;
2709 cum->mmx_nregs -= 1;
2710 cum->mmx_regno += 1;
2711 if (cum->mmx_nregs <= 0)
2719 cum->words += words;
2720 cum->nregs -= words;
2721 cum->regno += words;
2723 if (cum->nregs <= 0)
2733 /* Define where to put the arguments to a function.
2734 Value is zero to push the argument on the stack,
2735 or a hard register in which to store the argument.
2737 MODE is the argument's machine mode.
2738 TYPE is the data type of the argument (as a tree).
2739 This is null for libcalls where that information may
2741 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2742 the preceding args and about the function being called.
2743 NAMED is nonzero if this argument is a named parameter
2744 (otherwise it is an extra parameter matching an ellipsis). */
2747 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2748 tree type, int named)
2750 enum machine_mode mode = orig_mode;
2753 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2754 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2755 static bool warnedsse, warnedmmx;
2757 /* To simplify the code below, represent vector types with a vector mode
2758 even if MMX/SSE are not active. */
2759 if (type && TREE_CODE (type) == VECTOR_TYPE)
2760 mode = type_natural_mode (type);
2762 /* Handle a hidden AL argument containing number of registers for varargs
2763 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2765 if (mode == VOIDmode)
2768 return GEN_INT (cum->maybe_vaarg
2769 ? (cum->sse_nregs < 0
2777 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2779 &x86_64_int_parameter_registers [cum->regno],
2784 /* For now, pass fp/complex values on the stack. */
2796 if (words <= cum->nregs)
2798 int regno = cum->regno;
2800 /* Fastcall allocates the first two DWORD (SImode) or
2801 smaller arguments to ECX and EDX. */
2804 if (mode == BLKmode || mode == DImode)
2807 /* ECX not EAX is the first allocated register. */
2811 ret = gen_rtx_REG (mode, regno);
2821 if (!type || !AGGREGATE_TYPE_P (type))
2823 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2826 warning ("SSE vector argument without SSE enabled "
2830 ret = gen_reg_or_parallel (mode, orig_mode,
2831 cum->sse_regno + FIRST_SSE_REG);
2838 if (!type || !AGGREGATE_TYPE_P (type))
2840 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2843 warning ("MMX vector argument without MMX enabled "
2847 ret = gen_reg_or_parallel (mode, orig_mode,
2848 cum->mmx_regno + FIRST_MMX_REG);
2853 if (TARGET_DEBUG_ARG)
2856 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2857 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2860 print_simple_rtl (stderr, ret);
2862 fprintf (stderr, ", stack");
2864 fprintf (stderr, " )\n");
2870 /* A C expression that indicates when an argument must be passed by
2871 reference. If nonzero for an argument, a copy of that argument is
2872 made in memory and a pointer to the argument is passed instead of
2873 the argument itself. The pointer is passed in whatever way is
2874 appropriate for passing a pointer to that type. */
2877 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2878 enum machine_mode mode ATTRIBUTE_UNUSED,
2879 tree type, bool named ATTRIBUTE_UNUSED)
2884 if (type && int_size_in_bytes (type) == -1)
2886 if (TARGET_DEBUG_ARG)
2887 fprintf (stderr, "function_arg_pass_by_reference\n");
2894 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2895 ABI. Only called if TARGET_SSE. */
2897 contains_128bit_aligned_vector_p (tree type)
2899 enum machine_mode mode = TYPE_MODE (type);
2900 if (SSE_REG_MODE_P (mode)
2901 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2903 if (TYPE_ALIGN (type) < 128)
2906 if (AGGREGATE_TYPE_P (type))
2908 /* Walk the aggregates recursively. */
2909 if (TREE_CODE (type) == RECORD_TYPE
2910 || TREE_CODE (type) == UNION_TYPE
2911 || TREE_CODE (type) == QUAL_UNION_TYPE)
2915 if (TYPE_BINFO (type))
2917 tree binfo, base_binfo;
2920 for (binfo = TYPE_BINFO (type), i = 0;
2921 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2922 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2925 /* And now merge the fields of structure. */
2926 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2928 if (TREE_CODE (field) == FIELD_DECL
2929 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2933 /* Just for use if some languages passes arrays by value. */
2934 else if (TREE_CODE (type) == ARRAY_TYPE)
2936 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2945 /* Gives the alignment boundary, in bits, of an argument with the
2946 specified mode and type. */
2949 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2953 align = TYPE_ALIGN (type);
2955 align = GET_MODE_ALIGNMENT (mode);
2956 if (align < PARM_BOUNDARY)
2957 align = PARM_BOUNDARY;
2960 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2961 make an exception for SSE modes since these require 128bit
2964 The handling here differs from field_alignment. ICC aligns MMX
2965 arguments to 4 byte boundaries, while structure fields are aligned
2966 to 8 byte boundaries. */
2968 align = PARM_BOUNDARY;
2971 if (!SSE_REG_MODE_P (mode))
2972 align = PARM_BOUNDARY;
2976 if (!contains_128bit_aligned_vector_p (type))
2977 align = PARM_BOUNDARY;
2985 /* Return true if N is a possible register number of function value. */
2987 ix86_function_value_regno_p (int regno)
2991 return ((regno) == 0
2992 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2993 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2995 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2996 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2997 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3000 /* Define how to find the value returned by a function.
3001 VALTYPE is the data type of the value (as a tree).
3002 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3003 otherwise, FUNC is 0. */
3005 ix86_function_value (tree valtype)
3009 rtx ret = construct_container (type_natural_mode (valtype),
3010 TYPE_MODE (valtype), valtype,
3011 1, REGPARM_MAX, SSE_REGPARM_MAX,
3012 x86_64_int_return_registers, 0);
3013 /* For zero sized structures, construct_container return NULL, but we
3014 need to keep rest of compiler happy by returning meaningful value. */
3016 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3020 return gen_rtx_REG (TYPE_MODE (valtype),
3021 ix86_value_regno (TYPE_MODE (valtype)));
3024 /* Return false iff type is returned in memory. */
3026 ix86_return_in_memory (tree type)
3028 int needed_intregs, needed_sseregs, size;
3029 enum machine_mode mode = TYPE_MODE (type);
3032 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3034 if (mode == BLKmode)
3037 size = int_size_in_bytes (type);
3039 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3042 if (VECTOR_MODE_P (mode) || mode == TImode)
3044 /* User-created vectors small enough to fit in EAX. */
3048 /* MMX/3dNow values are returned on the stack, since we've
3049 got to EMMS/FEMMS before returning. */
3053 /* SSE values are returned in XMM0, except when it doesn't exist. */
3055 return (TARGET_SSE ? 0 : 1);
3066 /* When returning SSE vector types, we have a choice of either
3067 (1) being abi incompatible with a -march switch, or
3068 (2) generating an error.
3069 Given no good solution, I think the safest thing is one warning.
3070 The user won't be able to use -Werror, but....
3072 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3073 called in response to actually generating a caller or callee that
3074 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3075 via aggregate_value_p for general type probing from tree-ssa. */
3078 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3082 if (!TARGET_SSE && type && !warned)
3084 /* Look at the return type of the function, not the function type. */
3085 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3088 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3091 warning ("SSE vector return without SSE enabled changes the ABI");
3098 /* Define how to find the value returned by a library function
3099 assuming the value has mode MODE. */
3101 ix86_libcall_value (enum machine_mode mode)
3112 return gen_rtx_REG (mode, FIRST_SSE_REG);
3115 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3119 return gen_rtx_REG (mode, 0);
3123 return gen_rtx_REG (mode, ix86_value_regno (mode));
3126 /* Given a mode, return the register to use for a return value. */
3129 ix86_value_regno (enum machine_mode mode)
3131 /* Floating point return values in %st(0). */
3132 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3133 return FIRST_FLOAT_REG;
3134 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3135 we prevent this case when sse is not available. */
3136 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3137 return FIRST_SSE_REG;
3138 /* Everything else in %eax. */
3142 /* Create the va_list data type. */
3145 ix86_build_builtin_va_list (void)
3147 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3149 /* For i386 we use plain pointer to argument area. */
3151 return build_pointer_type (char_type_node);
3153 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3154 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3156 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3157 unsigned_type_node);
3158 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3159 unsigned_type_node);
3160 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3162 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3165 DECL_FIELD_CONTEXT (f_gpr) = record;
3166 DECL_FIELD_CONTEXT (f_fpr) = record;
3167 DECL_FIELD_CONTEXT (f_ovf) = record;
3168 DECL_FIELD_CONTEXT (f_sav) = record;
3170 TREE_CHAIN (record) = type_decl;
3171 TYPE_NAME (record) = type_decl;
3172 TYPE_FIELDS (record) = f_gpr;
3173 TREE_CHAIN (f_gpr) = f_fpr;
3174 TREE_CHAIN (f_fpr) = f_ovf;
3175 TREE_CHAIN (f_ovf) = f_sav;
3177 layout_type (record);
3179 /* The correct type is an array type of one element. */
3180 return build_array_type (record, build_index_type (size_zero_node));
3183 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3186 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3187 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3190 CUMULATIVE_ARGS next_cum;
3191 rtx save_area = NULL_RTX, mem;
3204 /* Indicate to allocate space on the stack for varargs save area. */
3205 ix86_save_varrargs_registers = 1;
3207 cfun->stack_alignment_needed = 128;
3209 fntype = TREE_TYPE (current_function_decl);
3210 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3211 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3212 != void_type_node));
3214 /* For varargs, we do not want to skip the dummy va_dcl argument.
3215 For stdargs, we do want to skip the last named argument. */
3218 function_arg_advance (&next_cum, mode, type, 1);
3221 save_area = frame_pointer_rtx;
3223 set = get_varargs_alias_set ();
3225 for (i = next_cum.regno; i < ix86_regparm; i++)
3227 mem = gen_rtx_MEM (Pmode,
3228 plus_constant (save_area, i * UNITS_PER_WORD));
3229 set_mem_alias_set (mem, set);
3230 emit_move_insn (mem, gen_rtx_REG (Pmode,
3231 x86_64_int_parameter_registers[i]));
3234 if (next_cum.sse_nregs)
3236 /* Now emit code to save SSE registers. The AX parameter contains number
3237 of SSE parameter registers used to call this function. We use
3238 sse_prologue_save insn template that produces computed jump across
3239 SSE saves. We need some preparation work to get this working. */
3241 label = gen_label_rtx ();
3242 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3244 /* Compute address to jump to :
3245 label - 5*eax + nnamed_sse_arguments*5 */
3246 tmp_reg = gen_reg_rtx (Pmode);
3247 nsse_reg = gen_reg_rtx (Pmode);
3248 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3249 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3250 gen_rtx_MULT (Pmode, nsse_reg,
3252 if (next_cum.sse_regno)
3255 gen_rtx_CONST (DImode,
3256 gen_rtx_PLUS (DImode,
3258 GEN_INT (next_cum.sse_regno * 4))));
3260 emit_move_insn (nsse_reg, label_ref);
3261 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3263 /* Compute address of memory block we save into. We always use pointer
3264 pointing 127 bytes after first byte to store - this is needed to keep
3265 instruction size limited by 4 bytes. */
3266 tmp_reg = gen_reg_rtx (Pmode);
3267 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3268 plus_constant (save_area,
3269 8 * REGPARM_MAX + 127)));
3270 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3271 set_mem_alias_set (mem, set);
3272 set_mem_align (mem, BITS_PER_WORD);
3274 /* And finally do the dirty job! */
3275 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3276 GEN_INT (next_cum.sse_regno), label));
3281 /* Implement va_start. */
3284 ix86_va_start (tree valist, rtx nextarg)
3286 HOST_WIDE_INT words, n_gpr, n_fpr;
3287 tree f_gpr, f_fpr, f_ovf, f_sav;
3288 tree gpr, fpr, ovf, sav, t;
3290 /* Only 64bit target needs something special. */
3293 std_expand_builtin_va_start (valist, nextarg);
3297 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3298 f_fpr = TREE_CHAIN (f_gpr);
3299 f_ovf = TREE_CHAIN (f_fpr);
3300 f_sav = TREE_CHAIN (f_ovf);
3302 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3303 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3304 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3305 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3306 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3308 /* Count number of gp and fp argument registers used. */
3309 words = current_function_args_info.words;
3310 n_gpr = current_function_args_info.regno;
3311 n_fpr = current_function_args_info.sse_regno;
3313 if (TARGET_DEBUG_ARG)
3314 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3315 (int) words, (int) n_gpr, (int) n_fpr);
3317 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3318 build_int_cst (NULL_TREE, n_gpr * 8));
3319 TREE_SIDE_EFFECTS (t) = 1;
3320 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3322 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3323 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3324 TREE_SIDE_EFFECTS (t) = 1;
3325 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3327 /* Find the overflow area. */
3328 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3330 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3331 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3332 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3333 TREE_SIDE_EFFECTS (t) = 1;
3334 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3336 /* Find the register save area.
3337 Prologue of the function save it right above stack frame. */
3338 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3339 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3340 TREE_SIDE_EFFECTS (t) = 1;
3341 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3344 /* Implement va_arg. */
3347 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3349 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3350 tree f_gpr, f_fpr, f_ovf, f_sav;
3351 tree gpr, fpr, ovf, sav, t;
3353 tree lab_false, lab_over = NULL_TREE;
3359 /* Only 64bit target needs something special. */
3361 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3363 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3364 f_fpr = TREE_CHAIN (f_gpr);
3365 f_ovf = TREE_CHAIN (f_fpr);
3366 f_sav = TREE_CHAIN (f_ovf);
3368 valist = build_va_arg_indirect_ref (valist);
3369 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3370 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3371 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3372 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3374 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3376 type = build_pointer_type (type);
3377 size = int_size_in_bytes (type);
3378 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3380 container = construct_container (type_natural_mode (type), TYPE_MODE (type),
3381 type, 0, REGPARM_MAX, SSE_REGPARM_MAX,
3384 /* Pull the value out of the saved registers. */
3386 addr = create_tmp_var (ptr_type_node, "addr");
3387 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3391 int needed_intregs, needed_sseregs;
3393 tree int_addr, sse_addr;
3395 lab_false = create_artificial_label ();
3396 lab_over = create_artificial_label ();
3398 examine_argument (TYPE_MODE (type), type, 0,
3399 &needed_intregs, &needed_sseregs);
3401 need_temp = (!REG_P (container)
3402 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3403 || TYPE_ALIGN (type) > 128));
3405 /* In case we are passing structure, verify that it is consecutive block
3406 on the register save area. If not we need to do moves. */
3407 if (!need_temp && !REG_P (container))
3409 /* Verify that all registers are strictly consecutive */
3410 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3414 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3416 rtx slot = XVECEXP (container, 0, i);
3417 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3418 || INTVAL (XEXP (slot, 1)) != i * 16)
3426 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3428 rtx slot = XVECEXP (container, 0, i);
3429 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3430 || INTVAL (XEXP (slot, 1)) != i * 8)
3442 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3443 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3444 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3445 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3448 /* First ensure that we fit completely in registers. */
3451 t = build_int_cst (TREE_TYPE (gpr),
3452 (REGPARM_MAX - needed_intregs + 1) * 8);
3453 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3454 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3455 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3456 gimplify_and_add (t, pre_p);
3460 t = build_int_cst (TREE_TYPE (fpr),
3461 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3463 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3464 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3465 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3466 gimplify_and_add (t, pre_p);
3469 /* Compute index to start of area used for integer regs. */
3472 /* int_addr = gpr + sav; */
3473 t = fold_convert (ptr_type_node, gpr);
3474 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3475 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3476 gimplify_and_add (t, pre_p);
3480 /* sse_addr = fpr + sav; */
3481 t = fold_convert (ptr_type_node, fpr);
3482 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3483 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3484 gimplify_and_add (t, pre_p);
3489 tree temp = create_tmp_var (type, "va_arg_tmp");
3492 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3493 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3494 gimplify_and_add (t, pre_p);
3496 for (i = 0; i < XVECLEN (container, 0); i++)
3498 rtx slot = XVECEXP (container, 0, i);
3499 rtx reg = XEXP (slot, 0);
3500 enum machine_mode mode = GET_MODE (reg);
3501 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3502 tree addr_type = build_pointer_type (piece_type);
3505 tree dest_addr, dest;
3507 if (SSE_REGNO_P (REGNO (reg)))
3509 src_addr = sse_addr;
3510 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3514 src_addr = int_addr;
3515 src_offset = REGNO (reg) * 8;
3517 src_addr = fold_convert (addr_type, src_addr);
3518 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3519 size_int (src_offset)));
3520 src = build_va_arg_indirect_ref (src_addr);
3522 dest_addr = fold_convert (addr_type, addr);
3523 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3524 size_int (INTVAL (XEXP (slot, 1)))));
3525 dest = build_va_arg_indirect_ref (dest_addr);
3527 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3528 gimplify_and_add (t, pre_p);
3534 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3535 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3536 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3537 gimplify_and_add (t, pre_p);
3541 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3542 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3543 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3544 gimplify_and_add (t, pre_p);
3547 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3548 gimplify_and_add (t, pre_p);
3550 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3551 append_to_statement_list (t, pre_p);
3554 /* ... otherwise out of the overflow area. */
3556 /* Care for on-stack alignment if needed. */
3557 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3561 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3562 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3563 build_int_cst (TREE_TYPE (ovf), align - 1));
3564 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3565 build_int_cst (TREE_TYPE (t), -align));
3567 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3569 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3570 gimplify_and_add (t2, pre_p);
3572 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3573 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3574 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3575 gimplify_and_add (t, pre_p);
3579 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3580 append_to_statement_list (t, pre_p);
3583 ptrtype = build_pointer_type (type);
3584 addr = fold_convert (ptrtype, addr);
3587 addr = build_va_arg_indirect_ref (addr);
3588 return build_va_arg_indirect_ref (addr);
3591 /* Return nonzero if OPNUM's MEM should be matched
3592 in movabs* patterns. */
3595 ix86_check_movabs (rtx insn, int opnum)
3599 set = PATTERN (insn);
3600 if (GET_CODE (set) == PARALLEL)
3601 set = XVECEXP (set, 0, 0);
3602 if (GET_CODE (set) != SET)
3604 mem = XEXP (set, opnum);
3605 while (GET_CODE (mem) == SUBREG)
3606 mem = SUBREG_REG (mem);
3607 if (GET_CODE (mem) != MEM)
3609 return (volatile_ok || !MEM_VOLATILE_P (mem));
3612 /* Initialize the table of extra 80387 mathematical constants. */
3615 init_ext_80387_constants (void)
3617 static const char * cst[5] =
3619 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3620 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3621 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3622 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3623 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3627 for (i = 0; i < 5; i++)
3629 real_from_string (&ext_80387_constants_table[i], cst[i]);
3630 /* Ensure each constant is rounded to XFmode precision. */
3631 real_convert (&ext_80387_constants_table[i],
3632 XFmode, &ext_80387_constants_table[i]);
3635 ext_80387_constants_init = 1;
3638 /* Return true if the constant is something that can be loaded with
3639 a special instruction. */
3642 standard_80387_constant_p (rtx x)
3644 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3647 if (x == CONST0_RTX (GET_MODE (x)))
3649 if (x == CONST1_RTX (GET_MODE (x)))
3652 /* For XFmode constants, try to find a special 80387 instruction when
3653 optimizing for size or on those CPUs that benefit from them. */
3654 if (GET_MODE (x) == XFmode
3655 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3660 if (! ext_80387_constants_init)
3661 init_ext_80387_constants ();
3663 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3664 for (i = 0; i < 5; i++)
3665 if (real_identical (&r, &ext_80387_constants_table[i]))
3672 /* Return the opcode of the special instruction to be used to load
3676 standard_80387_constant_opcode (rtx x)
3678 switch (standard_80387_constant_p (x))
3698 /* Return the CONST_DOUBLE representing the 80387 constant that is
3699 loaded by the specified special instruction. The argument IDX
3700 matches the return value from standard_80387_constant_p. */
3703 standard_80387_constant_rtx (int idx)
3707 if (! ext_80387_constants_init)
3708 init_ext_80387_constants ();
3724 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3728 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3731 standard_sse_constant_p (rtx x)
3733 if (x == const0_rtx)
3735 return (x == CONST0_RTX (GET_MODE (x)));
3738 /* Returns 1 if OP contains a symbol reference */
3741 symbolic_reference_mentioned_p (rtx op)
3746 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3749 fmt = GET_RTX_FORMAT (GET_CODE (op));
3750 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3756 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3757 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3761 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3768 /* Return 1 if it is appropriate to emit `ret' instructions in the
3769 body of a function. Do this only if the epilogue is simple, needing a
3770 couple of insns. Prior to reloading, we can't tell how many registers
3771 must be saved, so return 0 then. Return 0 if there is no frame
3772 marker to de-allocate. */
3775 ix86_can_use_return_insn_p (void)
3777 struct ix86_frame frame;
3779 if (! reload_completed || frame_pointer_needed)
3782 /* Don't allow more than 32 pop, since that's all we can do
3783 with one instruction. */
3784 if (current_function_pops_args
3785 && current_function_args_size >= 32768)
3788 ix86_compute_frame_layout (&frame);
3789 return frame.to_allocate == 0 && frame.nregs == 0;
3792 /* Value should be nonzero if functions must have frame pointers.
3793 Zero means the frame pointer need not be set up (and parms may
3794 be accessed via the stack pointer) in functions that seem suitable. */
3797 ix86_frame_pointer_required (void)
3799 /* If we accessed previous frames, then the generated code expects
3800 to be able to access the saved ebp value in our frame. */
3801 if (cfun->machine->accesses_prev_frame)
3804 /* Several x86 os'es need a frame pointer for other reasons,
3805 usually pertaining to setjmp. */
3806 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3809 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3810 the frame pointer by default. Turn it back on now if we've not
3811 got a leaf function. */
3812 if (TARGET_OMIT_LEAF_FRAME_POINTER
3813 && (!current_function_is_leaf))
3816 if (current_function_profile)
3822 /* Record that the current function accesses previous call frames. */
3825 ix86_setup_frame_addresses (void)
3827 cfun->machine->accesses_prev_frame = 1;
3830 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3831 # define USE_HIDDEN_LINKONCE 1
3833 # define USE_HIDDEN_LINKONCE 0
3836 static int pic_labels_used;
3838 /* Fills in the label name that should be used for a pc thunk for
3839 the given register. */
3842 get_pc_thunk_name (char name[32], unsigned int regno)
3844 if (USE_HIDDEN_LINKONCE)
3845 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3847 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3851 /* This function generates code for -fpic that loads %ebx with
3852 the return address of the caller and then returns. */
3855 ix86_file_end (void)
3860 for (regno = 0; regno < 8; ++regno)
3864 if (! ((pic_labels_used >> regno) & 1))
3867 get_pc_thunk_name (name, regno);
3869 if (USE_HIDDEN_LINKONCE)
3873 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3875 TREE_PUBLIC (decl) = 1;
3876 TREE_STATIC (decl) = 1;
3877 DECL_ONE_ONLY (decl) = 1;
3879 (*targetm.asm_out.unique_section) (decl, 0);
3880 named_section (decl, NULL, 0);
3882 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3883 fputs ("\t.hidden\t", asm_out_file);
3884 assemble_name (asm_out_file, name);
3885 fputc ('\n', asm_out_file);
3886 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3891 ASM_OUTPUT_LABEL (asm_out_file, name);
3894 xops[0] = gen_rtx_REG (SImode, regno);
3895 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3896 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3897 output_asm_insn ("ret", xops);
3900 if (NEED_INDICATE_EXEC_STACK)
3901 file_end_indicate_exec_stack ();
3904 /* Emit code for the SET_GOT patterns. */
3907 output_set_got (rtx dest)
3912 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3914 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3916 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3919 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3921 output_asm_insn ("call\t%a2", xops);
3924 /* Output the "canonical" label name ("Lxx$pb") here too. This
3925 is what will be referred to by the Mach-O PIC subsystem. */
3926 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3928 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3929 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3932 output_asm_insn ("pop{l}\t%0", xops);
3937 get_pc_thunk_name (name, REGNO (dest));
3938 pic_labels_used |= 1 << REGNO (dest);
3940 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3941 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3942 output_asm_insn ("call\t%X2", xops);
3945 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3946 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3947 else if (!TARGET_MACHO)
3948 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3953 /* Generate an "push" pattern for input ARG. */
3958 return gen_rtx_SET (VOIDmode,
3960 gen_rtx_PRE_DEC (Pmode,
3961 stack_pointer_rtx)),
3965 /* Return >= 0 if there is an unused call-clobbered register available
3966 for the entire function. */
3969 ix86_select_alt_pic_regnum (void)
3971 if (current_function_is_leaf && !current_function_profile)
3974 for (i = 2; i >= 0; --i)
3975 if (!regs_ever_live[i])
3979 return INVALID_REGNUM;
3982 /* Return 1 if we need to save REGNO. */
3984 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3986 if (pic_offset_table_rtx
3987 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3988 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3989 || current_function_profile
3990 || current_function_calls_eh_return
3991 || current_function_uses_const_pool))
3993 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3998 if (current_function_calls_eh_return && maybe_eh_return)
4003 unsigned test = EH_RETURN_DATA_REGNO (i);
4004 if (test == INVALID_REGNUM)
4011 return (regs_ever_live[regno]
4012 && !call_used_regs[regno]
4013 && !fixed_regs[regno]
4014 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4017 /* Return number of registers to be saved on the stack. */
4020 ix86_nsaved_regs (void)
4025 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4026 if (ix86_save_reg (regno, true))
4031 /* Return the offset between two registers, one to be eliminated, and the other
4032 its replacement, at the start of a routine. */
4035 ix86_initial_elimination_offset (int from, int to)
4037 struct ix86_frame frame;
4038 ix86_compute_frame_layout (&frame);
4040 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4041 return frame.hard_frame_pointer_offset;
4042 else if (from == FRAME_POINTER_REGNUM
4043 && to == HARD_FRAME_POINTER_REGNUM)
4044 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4047 if (to != STACK_POINTER_REGNUM)
4049 else if (from == ARG_POINTER_REGNUM)
4050 return frame.stack_pointer_offset;
4051 else if (from != FRAME_POINTER_REGNUM)
4054 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4058 /* Fill structure ix86_frame about frame of currently computed function. */
4061 ix86_compute_frame_layout (struct ix86_frame *frame)
4063 HOST_WIDE_INT total_size;
4064 unsigned int stack_alignment_needed;
4065 HOST_WIDE_INT offset;
4066 unsigned int preferred_alignment;
4067 HOST_WIDE_INT size = get_frame_size ();
4069 frame->nregs = ix86_nsaved_regs ();
4072 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4073 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4075 /* During reload iteration the amount of registers saved can change.
4076 Recompute the value as needed. Do not recompute when amount of registers
4077 didn't change as reload does mutiple calls to the function and does not
4078 expect the decision to change within single iteration. */
4080 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4082 int count = frame->nregs;
4084 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4085 /* The fast prologue uses move instead of push to save registers. This
4086 is significantly longer, but also executes faster as modern hardware
4087 can execute the moves in parallel, but can't do that for push/pop.
4089 Be careful about choosing what prologue to emit: When function takes
4090 many instructions to execute we may use slow version as well as in
4091 case function is known to be outside hot spot (this is known with
4092 feedback only). Weight the size of function by number of registers
4093 to save as it is cheap to use one or two push instructions but very
4094 slow to use many of them. */
4096 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4097 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4098 || (flag_branch_probabilities
4099 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4100 cfun->machine->use_fast_prologue_epilogue = false;
4102 cfun->machine->use_fast_prologue_epilogue
4103 = !expensive_function_p (count);
4105 if (TARGET_PROLOGUE_USING_MOVE
4106 && cfun->machine->use_fast_prologue_epilogue)
4107 frame->save_regs_using_mov = true;
4109 frame->save_regs_using_mov = false;
4112 /* Skip return address and saved base pointer. */
4113 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4115 frame->hard_frame_pointer_offset = offset;
4117 /* Do some sanity checking of stack_alignment_needed and
4118 preferred_alignment, since i386 port is the only using those features
4119 that may break easily. */
4121 if (size && !stack_alignment_needed)
4123 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4125 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4127 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4130 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4131 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4133 /* Register save area */
4134 offset += frame->nregs * UNITS_PER_WORD;
4137 if (ix86_save_varrargs_registers)
4139 offset += X86_64_VARARGS_SIZE;
4140 frame->va_arg_size = X86_64_VARARGS_SIZE;
4143 frame->va_arg_size = 0;
4145 /* Align start of frame for local function. */
4146 frame->padding1 = ((offset + stack_alignment_needed - 1)
4147 & -stack_alignment_needed) - offset;
4149 offset += frame->padding1;
4151 /* Frame pointer points here. */
4152 frame->frame_pointer_offset = offset;
4156 /* Add outgoing arguments area. Can be skipped if we eliminated
4157 all the function calls as dead code.
4158 Skipping is however impossible when function calls alloca. Alloca
4159 expander assumes that last current_function_outgoing_args_size
4160 of stack frame are unused. */
4161 if (ACCUMULATE_OUTGOING_ARGS
4162 && (!current_function_is_leaf || current_function_calls_alloca))
4164 offset += current_function_outgoing_args_size;
4165 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4168 frame->outgoing_arguments_size = 0;
4170 /* Align stack boundary. Only needed if we're calling another function
4172 if (!current_function_is_leaf || current_function_calls_alloca)
4173 frame->padding2 = ((offset + preferred_alignment - 1)
4174 & -preferred_alignment) - offset;
4176 frame->padding2 = 0;
4178 offset += frame->padding2;
4180 /* We've reached end of stack frame. */
4181 frame->stack_pointer_offset = offset;
4183 /* Size prologue needs to allocate. */
4184 frame->to_allocate =
4185 (size + frame->padding1 + frame->padding2
4186 + frame->outgoing_arguments_size + frame->va_arg_size);
4188 if ((!frame->to_allocate && frame->nregs <= 1)
4189 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4190 frame->save_regs_using_mov = false;
4192 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4193 && current_function_is_leaf)
4195 frame->red_zone_size = frame->to_allocate;
4196 if (frame->save_regs_using_mov)
4197 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4198 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4199 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4202 frame->red_zone_size = 0;
4203 frame->to_allocate -= frame->red_zone_size;
4204 frame->stack_pointer_offset -= frame->red_zone_size;
4206 fprintf (stderr, "nregs: %i\n", frame->nregs);
4207 fprintf (stderr, "size: %i\n", size);
4208 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4209 fprintf (stderr, "padding1: %i\n", frame->padding1);
4210 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4211 fprintf (stderr, "padding2: %i\n", frame->padding2);
4212 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4213 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4214 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4215 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4216 frame->hard_frame_pointer_offset);
4217 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4221 /* Emit code to save registers in the prologue. */
4224 ix86_emit_save_regs (void)
4229 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4230 if (ix86_save_reg (regno, true))
4232 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4233 RTX_FRAME_RELATED_P (insn) = 1;
4237 /* Emit code to save registers using MOV insns. First register
4238 is restored from POINTER + OFFSET. */
4240 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4245 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4246 if (ix86_save_reg (regno, true))
4248 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4250 gen_rtx_REG (Pmode, regno));
4251 RTX_FRAME_RELATED_P (insn) = 1;
4252 offset += UNITS_PER_WORD;
4256 /* Expand prologue or epilogue stack adjustment.
4257 The pattern exist to put a dependency on all ebp-based memory accesses.
4258 STYLE should be negative if instructions should be marked as frame related,
4259 zero if %r11 register is live and cannot be freely used and positive
4263 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4268 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4269 else if (x86_64_immediate_operand (offset, DImode))
4270 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4274 /* r11 is used by indirect sibcall return as well, set before the
4275 epilogue and used after the epilogue. ATM indirect sibcall
4276 shouldn't be used together with huge frame sizes in one
4277 function because of the frame_size check in sibcall.c. */
4280 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4281 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4283 RTX_FRAME_RELATED_P (insn) = 1;
4284 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4288 RTX_FRAME_RELATED_P (insn) = 1;
4291 /* Expand the prologue into a bunch of separate insns. */
4294 ix86_expand_prologue (void)
4298 struct ix86_frame frame;
4299 HOST_WIDE_INT allocate;
4301 ix86_compute_frame_layout (&frame);
4303 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4304 slower on all targets. Also sdb doesn't like it. */
4306 if (frame_pointer_needed)
4308 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4309 RTX_FRAME_RELATED_P (insn) = 1;
4311 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4312 RTX_FRAME_RELATED_P (insn) = 1;
4315 allocate = frame.to_allocate;
4317 if (!frame.save_regs_using_mov)
4318 ix86_emit_save_regs ();
4320 allocate += frame.nregs * UNITS_PER_WORD;
4322 /* When using red zone we may start register saving before allocating
4323 the stack frame saving one cycle of the prologue. */
4324 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4325 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4326 : stack_pointer_rtx,
4327 -frame.nregs * UNITS_PER_WORD);
4331 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4332 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4333 GEN_INT (-allocate), -1);
4336 /* Only valid for Win32. */
4337 rtx eax = gen_rtx_REG (SImode, 0);
4338 bool eax_live = ix86_eax_live_at_start_p ();
4345 emit_insn (gen_push (eax));
4349 insn = emit_move_insn (eax, GEN_INT (allocate));
4350 RTX_FRAME_RELATED_P (insn) = 1;
4352 insn = emit_insn (gen_allocate_stack_worker (eax));
4353 RTX_FRAME_RELATED_P (insn) = 1;
4358 if (frame_pointer_needed)
4359 t = plus_constant (hard_frame_pointer_rtx,
4362 - frame.nregs * UNITS_PER_WORD);
4364 t = plus_constant (stack_pointer_rtx, allocate);
4365 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4369 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4371 if (!frame_pointer_needed || !frame.to_allocate)
4372 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4374 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4375 -frame.nregs * UNITS_PER_WORD);
4378 pic_reg_used = false;
4379 if (pic_offset_table_rtx
4380 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4381 || current_function_profile))
4383 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4385 if (alt_pic_reg_used != INVALID_REGNUM)
4386 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4388 pic_reg_used = true;
4393 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4395 /* Even with accurate pre-reload life analysis, we can wind up
4396 deleting all references to the pic register after reload.
4397 Consider if cross-jumping unifies two sides of a branch
4398 controlled by a comparison vs the only read from a global.
4399 In which case, allow the set_got to be deleted, though we're
4400 too late to do anything about the ebx save in the prologue. */
4401 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4404 /* Prevent function calls from be scheduled before the call to mcount.
4405 In the pic_reg_used case, make sure that the got load isn't deleted. */
4406 if (current_function_profile)
4407 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4410 /* Emit code to restore saved registers using MOV insns. First register
4411 is restored from POINTER + OFFSET. */
4413 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4414 int maybe_eh_return)
4417 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4419 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4420 if (ix86_save_reg (regno, maybe_eh_return))
4422 /* Ensure that adjust_address won't be forced to produce pointer
4423 out of range allowed by x86-64 instruction set. */
4424 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4428 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4429 emit_move_insn (r11, GEN_INT (offset));
4430 emit_insn (gen_adddi3 (r11, r11, pointer));
4431 base_address = gen_rtx_MEM (Pmode, r11);
4434 emit_move_insn (gen_rtx_REG (Pmode, regno),
4435 adjust_address (base_address, Pmode, offset));
4436 offset += UNITS_PER_WORD;
4440 /* Restore function stack, frame, and registers. */
4443 ix86_expand_epilogue (int style)
4446 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4447 struct ix86_frame frame;
4448 HOST_WIDE_INT offset;
4450 ix86_compute_frame_layout (&frame);
4452 /* Calculate start of saved registers relative to ebp. Special care
4453 must be taken for the normal return case of a function using
4454 eh_return: the eax and edx registers are marked as saved, but not
4455 restored along this path. */
4456 offset = frame.nregs;
4457 if (current_function_calls_eh_return && style != 2)
4459 offset *= -UNITS_PER_WORD;
4461 /* If we're only restoring one register and sp is not valid then
4462 using a move instruction to restore the register since it's
4463 less work than reloading sp and popping the register.
4465 The default code result in stack adjustment using add/lea instruction,
4466 while this code results in LEAVE instruction (or discrete equivalent),
4467 so it is profitable in some other cases as well. Especially when there
4468 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4469 and there is exactly one register to pop. This heuristic may need some
4470 tuning in future. */
4471 if ((!sp_valid && frame.nregs <= 1)
4472 || (TARGET_EPILOGUE_USING_MOVE
4473 && cfun->machine->use_fast_prologue_epilogue
4474 && (frame.nregs > 1 || frame.to_allocate))
4475 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4476 || (frame_pointer_needed && TARGET_USE_LEAVE
4477 && cfun->machine->use_fast_prologue_epilogue
4478 && frame.nregs == 1)
4479 || current_function_calls_eh_return)
4481 /* Restore registers. We can use ebp or esp to address the memory
4482 locations. If both are available, default to ebp, since offsets
4483 are known to be small. Only exception is esp pointing directly to the
4484 end of block of saved registers, where we may simplify addressing
4487 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4488 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4489 frame.to_allocate, style == 2);
4491 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4492 offset, style == 2);
4494 /* eh_return epilogues need %ecx added to the stack pointer. */
4497 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4499 if (frame_pointer_needed)
4501 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4502 tmp = plus_constant (tmp, UNITS_PER_WORD);
4503 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4505 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4506 emit_move_insn (hard_frame_pointer_rtx, tmp);
4508 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4513 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4514 tmp = plus_constant (tmp, (frame.to_allocate
4515 + frame.nregs * UNITS_PER_WORD));
4516 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4519 else if (!frame_pointer_needed)
4520 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4521 GEN_INT (frame.to_allocate
4522 + frame.nregs * UNITS_PER_WORD),
4524 /* If not an i386, mov & pop is faster than "leave". */
4525 else if (TARGET_USE_LEAVE || optimize_size
4526 || !cfun->machine->use_fast_prologue_epilogue)
4527 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4530 pro_epilogue_adjust_stack (stack_pointer_rtx,
4531 hard_frame_pointer_rtx,
4534 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4536 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4541 /* First step is to deallocate the stack frame so that we can
4542 pop the registers. */
4545 if (!frame_pointer_needed)
4547 pro_epilogue_adjust_stack (stack_pointer_rtx,
4548 hard_frame_pointer_rtx,
4549 GEN_INT (offset), style);
4551 else if (frame.to_allocate)
4552 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4553 GEN_INT (frame.to_allocate), style);
4555 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4556 if (ix86_save_reg (regno, false))
4559 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4561 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4563 if (frame_pointer_needed)
4565 /* Leave results in shorter dependency chains on CPUs that are
4566 able to grok it fast. */
4567 if (TARGET_USE_LEAVE)
4568 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4569 else if (TARGET_64BIT)
4570 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4572 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4576 /* Sibcall epilogues don't want a return instruction. */
4580 if (current_function_pops_args && current_function_args_size)
4582 rtx popc = GEN_INT (current_function_pops_args);
4584 /* i386 can only pop 64K bytes. If asked to pop more, pop
4585 return address, do explicit add, and jump indirectly to the
4588 if (current_function_pops_args >= 65536)
4590 rtx ecx = gen_rtx_REG (SImode, 2);
4592 /* There is no "pascal" calling convention in 64bit ABI. */
4596 emit_insn (gen_popsi1 (ecx));
4597 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4598 emit_jump_insn (gen_return_indirect_internal (ecx));
4601 emit_jump_insn (gen_return_pop_internal (popc));
4604 emit_jump_insn (gen_return_internal ());
4607 /* Reset from the function's potential modifications. */
4610 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4611 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4613 if (pic_offset_table_rtx)
4614 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4617 /* Extract the parts of an RTL expression that is a valid memory address
4618 for an instruction. Return 0 if the structure of the address is
4619 grossly off. Return -1 if the address contains ASHIFT, so it is not
4620 strictly valid, but still used for computing length of lea instruction. */
4623 ix86_decompose_address (rtx addr, struct ix86_address *out)
4625 rtx base = NULL_RTX;
4626 rtx index = NULL_RTX;
4627 rtx disp = NULL_RTX;
4628 HOST_WIDE_INT scale = 1;
4629 rtx scale_rtx = NULL_RTX;
4631 enum ix86_address_seg seg = SEG_DEFAULT;
4633 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4635 else if (GET_CODE (addr) == PLUS)
4645 addends[n++] = XEXP (op, 1);
4648 while (GET_CODE (op) == PLUS);
4653 for (i = n; i >= 0; --i)
4656 switch (GET_CODE (op))
4661 index = XEXP (op, 0);
4662 scale_rtx = XEXP (op, 1);
4666 if (XINT (op, 1) == UNSPEC_TP
4667 && TARGET_TLS_DIRECT_SEG_REFS
4668 && seg == SEG_DEFAULT)
4669 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4698 else if (GET_CODE (addr) == MULT)
4700 index = XEXP (addr, 0); /* index*scale */
4701 scale_rtx = XEXP (addr, 1);
4703 else if (GET_CODE (addr) == ASHIFT)
4707 /* We're called for lea too, which implements ashift on occasion. */
4708 index = XEXP (addr, 0);
4709 tmp = XEXP (addr, 1);
4710 if (GET_CODE (tmp) != CONST_INT)
4712 scale = INTVAL (tmp);
4713 if ((unsigned HOST_WIDE_INT) scale > 3)
4719 disp = addr; /* displacement */
4721 /* Extract the integral value of scale. */
4724 if (GET_CODE (scale_rtx) != CONST_INT)
4726 scale = INTVAL (scale_rtx);
4729 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4730 if (base && index && scale == 1
4731 && (index == arg_pointer_rtx
4732 || index == frame_pointer_rtx
4733 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4740 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4741 if ((base == hard_frame_pointer_rtx
4742 || base == frame_pointer_rtx
4743 || base == arg_pointer_rtx) && !disp)
4746 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4747 Avoid this by transforming to [%esi+0]. */
4748 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4749 && base && !index && !disp
4751 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4754 /* Special case: encode reg+reg instead of reg*2. */
4755 if (!base && index && scale && scale == 2)
4756 base = index, scale = 1;
4758 /* Special case: scaling cannot be encoded without base or displacement. */
4759 if (!base && !disp && index && scale != 1)
4771 /* Return cost of the memory address x.
4772 For i386, it is better to use a complex address than let gcc copy
4773 the address into a reg and make a new pseudo. But not if the address
4774 requires to two regs - that would mean more pseudos with longer
4777 ix86_address_cost (rtx x)
4779 struct ix86_address parts;
4782 if (!ix86_decompose_address (x, &parts))
4785 /* More complex memory references are better. */
4786 if (parts.disp && parts.disp != const0_rtx)
4788 if (parts.seg != SEG_DEFAULT)
4791 /* Attempt to minimize number of registers in the address. */
4793 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4795 && (!REG_P (parts.index)
4796 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4800 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4802 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4803 && parts.base != parts.index)
4806 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4807 since it's predecode logic can't detect the length of instructions
4808 and it degenerates to vector decoded. Increase cost of such
4809 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4810 to split such addresses or even refuse such addresses at all.
4812 Following addressing modes are affected:
4817 The first and last case may be avoidable by explicitly coding the zero in
4818 memory address, but I don't have AMD-K6 machine handy to check this
4822 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4823 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4824 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4830 /* If X is a machine specific address (i.e. a symbol or label being
4831 referenced as a displacement from the GOT implemented using an
4832 UNSPEC), then return the base term. Otherwise return X. */
4835 ix86_find_base_term (rtx x)
4841 if (GET_CODE (x) != CONST)
4844 if (GET_CODE (term) == PLUS
4845 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4846 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4847 term = XEXP (term, 0);
4848 if (GET_CODE (term) != UNSPEC
4849 || XINT (term, 1) != UNSPEC_GOTPCREL)
4852 term = XVECEXP (term, 0, 0);
4854 if (GET_CODE (term) != SYMBOL_REF
4855 && GET_CODE (term) != LABEL_REF)
4861 term = ix86_delegitimize_address (x);
4863 if (GET_CODE (term) != SYMBOL_REF
4864 && GET_CODE (term) != LABEL_REF)
4870 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4871 this is used for to form addresses to local data when -fPIC is in
4875 darwin_local_data_pic (rtx disp)
4877 if (GET_CODE (disp) == MINUS)
4879 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4880 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4881 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4883 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4884 if (! strcmp (sym_name, "<pic base>"))
4892 /* Determine if a given RTX is a valid constant. We already know this
4893 satisfies CONSTANT_P. */
4896 legitimate_constant_p (rtx x)
4898 switch (GET_CODE (x))
4903 if (GET_CODE (x) == PLUS)
4905 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4910 if (TARGET_MACHO && darwin_local_data_pic (x))
4913 /* Only some unspecs are valid as "constants". */
4914 if (GET_CODE (x) == UNSPEC)
4915 switch (XINT (x, 1))
4919 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4921 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4926 /* We must have drilled down to a symbol. */
4927 if (!symbolic_operand (x, Pmode))
4932 /* TLS symbols are never valid. */
4933 if (tls_symbolic_operand (x, Pmode))
4941 /* Otherwise we handle everything else in the move patterns. */
4945 /* Determine if it's legal to put X into the constant pool. This
4946 is not possible for the address of thread-local symbols, which
4947 is checked above. */
4950 ix86_cannot_force_const_mem (rtx x)
4952 return !legitimate_constant_p (x);
4955 /* Determine if a given RTX is a valid constant address. */
4958 constant_address_p (rtx x)
4960 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4963 /* Nonzero if the constant value X is a legitimate general operand
4964 when generating PIC code. It is given that flag_pic is on and
4965 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4968 legitimate_pic_operand_p (rtx x)
4972 switch (GET_CODE (x))
4975 inner = XEXP (x, 0);
4977 /* Only some unspecs are valid as "constants". */
4978 if (GET_CODE (inner) == UNSPEC)
4979 switch (XINT (inner, 1))
4982 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4990 return legitimate_pic_address_disp_p (x);
4997 /* Determine if a given CONST RTX is a valid memory displacement
5001 legitimate_pic_address_disp_p (rtx disp)
5005 /* In 64bit mode we can allow direct addresses of symbols and labels
5006 when they are not dynamic symbols. */
5009 /* TLS references should always be enclosed in UNSPEC. */
5010 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5012 if (GET_CODE (disp) == SYMBOL_REF
5013 && ix86_cmodel == CM_SMALL_PIC
5014 && SYMBOL_REF_LOCAL_P (disp))
5016 if (GET_CODE (disp) == LABEL_REF)
5018 if (GET_CODE (disp) == CONST
5019 && GET_CODE (XEXP (disp, 0)) == PLUS)
5021 rtx op0 = XEXP (XEXP (disp, 0), 0);
5022 rtx op1 = XEXP (XEXP (disp, 0), 1);
5024 /* TLS references should always be enclosed in UNSPEC. */
5025 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5027 if (((GET_CODE (op0) == SYMBOL_REF
5028 && ix86_cmodel == CM_SMALL_PIC
5029 && SYMBOL_REF_LOCAL_P (op0))
5030 || GET_CODE (op0) == LABEL_REF)
5031 && GET_CODE (op1) == CONST_INT
5032 && INTVAL (op1) < 16*1024*1024
5033 && INTVAL (op1) >= -16*1024*1024)
5037 if (GET_CODE (disp) != CONST)
5039 disp = XEXP (disp, 0);
5043 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5044 of GOT tables. We should not need these anyway. */
5045 if (GET_CODE (disp) != UNSPEC
5046 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5049 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5050 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5056 if (GET_CODE (disp) == PLUS)
5058 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5060 disp = XEXP (disp, 0);
5064 if (TARGET_MACHO && darwin_local_data_pic (disp))
5067 if (GET_CODE (disp) != UNSPEC)
5070 switch (XINT (disp, 1))
5075 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5077 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5078 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5079 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5081 case UNSPEC_GOTTPOFF:
5082 case UNSPEC_GOTNTPOFF:
5083 case UNSPEC_INDNTPOFF:
5086 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5088 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5090 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5096 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5097 memory address for an instruction. The MODE argument is the machine mode
5098 for the MEM expression that wants to use this address.
5100 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5101 convert common non-canonical forms to canonical form so that they will
5105 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5107 struct ix86_address parts;
5108 rtx base, index, disp;
5109 HOST_WIDE_INT scale;
5110 const char *reason = NULL;
5111 rtx reason_rtx = NULL_RTX;
5113 if (TARGET_DEBUG_ADDR)
5116 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5117 GET_MODE_NAME (mode), strict);
5121 if (ix86_decompose_address (addr, &parts) <= 0)
5123 reason = "decomposition failed";
5128 index = parts.index;
5130 scale = parts.scale;
5132 /* Validate base register.
5134 Don't allow SUBREG's here, it can lead to spill failures when the base
5135 is one word out of a two word structure, which is represented internally
5142 if (GET_CODE (base) != REG)
5144 reason = "base is not a register";
5148 if (GET_MODE (base) != Pmode)
5150 reason = "base is not in Pmode";
5154 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5155 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5157 reason = "base is not valid";
5162 /* Validate index register.
5164 Don't allow SUBREG's here, it can lead to spill failures when the index
5165 is one word out of a two word structure, which is represented internally
5172 if (GET_CODE (index) != REG)
5174 reason = "index is not a register";
5178 if (GET_MODE (index) != Pmode)
5180 reason = "index is not in Pmode";
5184 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5185 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5187 reason = "index is not valid";
5192 /* Validate scale factor. */
5195 reason_rtx = GEN_INT (scale);
5198 reason = "scale without index";
5202 if (scale != 2 && scale != 4 && scale != 8)
5204 reason = "scale is not a valid multiplier";
5209 /* Validate displacement. */
5214 if (GET_CODE (disp) == CONST
5215 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5216 switch (XINT (XEXP (disp, 0), 1))
5220 case UNSPEC_GOTPCREL:
5223 goto is_legitimate_pic;
5225 case UNSPEC_GOTTPOFF:
5226 case UNSPEC_GOTNTPOFF:
5227 case UNSPEC_INDNTPOFF:
5233 reason = "invalid address unspec";
5237 else if (flag_pic && (SYMBOLIC_CONST (disp)
5239 && !machopic_operand_p (disp)
5244 if (TARGET_64BIT && (index || base))
5246 /* foo@dtpoff(%rX) is ok. */
5247 if (GET_CODE (disp) != CONST
5248 || GET_CODE (XEXP (disp, 0)) != PLUS
5249 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5250 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5251 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5252 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5254 reason = "non-constant pic memory reference";
5258 else if (! legitimate_pic_address_disp_p (disp))
5260 reason = "displacement is an invalid pic construct";
5264 /* This code used to verify that a symbolic pic displacement
5265 includes the pic_offset_table_rtx register.
5267 While this is good idea, unfortunately these constructs may
5268 be created by "adds using lea" optimization for incorrect
5277 This code is nonsensical, but results in addressing
5278 GOT table with pic_offset_table_rtx base. We can't
5279 just refuse it easily, since it gets matched by
5280 "addsi3" pattern, that later gets split to lea in the
5281 case output register differs from input. While this
5282 can be handled by separate addsi pattern for this case
5283 that never results in lea, this seems to be easier and
5284 correct fix for crash to disable this test. */
5286 else if (GET_CODE (disp) != LABEL_REF
5287 && GET_CODE (disp) != CONST_INT
5288 && (GET_CODE (disp) != CONST
5289 || !legitimate_constant_p (disp))
5290 && (GET_CODE (disp) != SYMBOL_REF
5291 || !legitimate_constant_p (disp)))
5293 reason = "displacement is not constant";
5296 else if (TARGET_64BIT
5297 && !x86_64_immediate_operand (disp, VOIDmode))
5299 reason = "displacement is out of range";
5304 /* Everything looks valid. */
5305 if (TARGET_DEBUG_ADDR)
5306 fprintf (stderr, "Success.\n");
5310 if (TARGET_DEBUG_ADDR)
5312 fprintf (stderr, "Error: %s\n", reason);
5313 debug_rtx (reason_rtx);
5318 /* Return an unique alias set for the GOT. */
5320 static HOST_WIDE_INT
5321 ix86_GOT_alias_set (void)
5323 static HOST_WIDE_INT set = -1;
5325 set = new_alias_set ();
5329 /* Return a legitimate reference for ORIG (an address) using the
5330 register REG. If REG is 0, a new pseudo is generated.
5332 There are two types of references that must be handled:
5334 1. Global data references must load the address from the GOT, via
5335 the PIC reg. An insn is emitted to do this load, and the reg is
5338 2. Static data references, constant pool addresses, and code labels
5339 compute the address as an offset from the GOT, whose base is in
5340 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5341 differentiate them from global data objects. The returned
5342 address is the PIC reg + an unspec constant.
5344 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5345 reg also appears in the address. */
5348 legitimize_pic_address (rtx orig, rtx reg)
5356 reg = gen_reg_rtx (Pmode);
5357 /* Use the generic Mach-O PIC machinery. */
5358 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5361 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5363 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5365 /* This symbol may be referenced via a displacement from the PIC
5366 base address (@GOTOFF). */
5368 if (reload_in_progress)
5369 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5370 if (GET_CODE (addr) == CONST)
5371 addr = XEXP (addr, 0);
5372 if (GET_CODE (addr) == PLUS)
5374 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5375 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5378 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5379 new = gen_rtx_CONST (Pmode, new);
5380 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5384 emit_move_insn (reg, new);
5388 else if (GET_CODE (addr) == SYMBOL_REF)
5392 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5393 new = gen_rtx_CONST (Pmode, new);
5394 new = gen_const_mem (Pmode, new);
5395 set_mem_alias_set (new, ix86_GOT_alias_set ());
5398 reg = gen_reg_rtx (Pmode);
5399 /* Use directly gen_movsi, otherwise the address is loaded
5400 into register for CSE. We don't want to CSE this addresses,
5401 instead we CSE addresses from the GOT table, so skip this. */
5402 emit_insn (gen_movsi (reg, new));
5407 /* This symbol must be referenced via a load from the
5408 Global Offset Table (@GOT). */
5410 if (reload_in_progress)
5411 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5412 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5413 new = gen_rtx_CONST (Pmode, new);
5414 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5415 new = gen_const_mem (Pmode, new);
5416 set_mem_alias_set (new, ix86_GOT_alias_set ());
5419 reg = gen_reg_rtx (Pmode);
5420 emit_move_insn (reg, new);
5426 if (GET_CODE (addr) == CONST)
5428 addr = XEXP (addr, 0);
5430 /* We must match stuff we generate before. Assume the only
5431 unspecs that can get here are ours. Not that we could do
5432 anything with them anyway.... */
5433 if (GET_CODE (addr) == UNSPEC
5434 || (GET_CODE (addr) == PLUS
5435 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5437 if (GET_CODE (addr) != PLUS)
5440 if (GET_CODE (addr) == PLUS)
5442 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5444 /* Check first to see if this is a constant offset from a @GOTOFF
5445 symbol reference. */
5446 if (local_symbolic_operand (op0, Pmode)
5447 && GET_CODE (op1) == CONST_INT)
5451 if (reload_in_progress)
5452 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5453 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5455 new = gen_rtx_PLUS (Pmode, new, op1);
5456 new = gen_rtx_CONST (Pmode, new);
5457 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5461 emit_move_insn (reg, new);
5467 if (INTVAL (op1) < -16*1024*1024
5468 || INTVAL (op1) >= 16*1024*1024)
5469 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5474 base = legitimize_pic_address (XEXP (addr, 0), reg);
5475 new = legitimize_pic_address (XEXP (addr, 1),
5476 base == reg ? NULL_RTX : reg);
5478 if (GET_CODE (new) == CONST_INT)
5479 new = plus_constant (base, INTVAL (new));
5482 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5484 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5485 new = XEXP (new, 1);
5487 new = gen_rtx_PLUS (Pmode, base, new);
5495 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5498 get_thread_pointer (int to_reg)
5502 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5506 reg = gen_reg_rtx (Pmode);
5507 insn = gen_rtx_SET (VOIDmode, reg, tp);
5508 insn = emit_insn (insn);
5513 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5514 false if we expect this to be used for a memory address and true if
5515 we expect to load the address into a register. */
5518 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5520 rtx dest, base, off, pic;
5525 case TLS_MODEL_GLOBAL_DYNAMIC:
5526 dest = gen_reg_rtx (Pmode);
5529 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5532 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5533 insns = get_insns ();
5536 emit_libcall_block (insns, dest, rax, x);
5539 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5542 case TLS_MODEL_LOCAL_DYNAMIC:
5543 base = gen_reg_rtx (Pmode);
5546 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5549 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5550 insns = get_insns ();
5553 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5554 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5555 emit_libcall_block (insns, base, rax, note);
5558 emit_insn (gen_tls_local_dynamic_base_32 (base));
5560 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5561 off = gen_rtx_CONST (Pmode, off);
5563 return gen_rtx_PLUS (Pmode, base, off);
5565 case TLS_MODEL_INITIAL_EXEC:
5569 type = UNSPEC_GOTNTPOFF;
5573 if (reload_in_progress)
5574 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5575 pic = pic_offset_table_rtx;
5576 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5578 else if (!TARGET_GNU_TLS)
5580 pic = gen_reg_rtx (Pmode);
5581 emit_insn (gen_set_got (pic));
5582 type = UNSPEC_GOTTPOFF;
5587 type = UNSPEC_INDNTPOFF;
5590 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5591 off = gen_rtx_CONST (Pmode, off);
5593 off = gen_rtx_PLUS (Pmode, pic, off);
5594 off = gen_const_mem (Pmode, off);
5595 set_mem_alias_set (off, ix86_GOT_alias_set ());
5597 if (TARGET_64BIT || TARGET_GNU_TLS)
5599 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5600 off = force_reg (Pmode, off);
5601 return gen_rtx_PLUS (Pmode, base, off);
5605 base = get_thread_pointer (true);
5606 dest = gen_reg_rtx (Pmode);
5607 emit_insn (gen_subsi3 (dest, base, off));
5611 case TLS_MODEL_LOCAL_EXEC:
5612 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5613 (TARGET_64BIT || TARGET_GNU_TLS)
5614 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5615 off = gen_rtx_CONST (Pmode, off);
5617 if (TARGET_64BIT || TARGET_GNU_TLS)
5619 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5620 return gen_rtx_PLUS (Pmode, base, off);
5624 base = get_thread_pointer (true);
5625 dest = gen_reg_rtx (Pmode);
5626 emit_insn (gen_subsi3 (dest, base, off));
5637 /* Try machine-dependent ways of modifying an illegitimate address
5638 to be legitimate. If we find one, return the new, valid address.
5639 This macro is used in only one place: `memory_address' in explow.c.
5641 OLDX is the address as it was before break_out_memory_refs was called.
5642 In some cases it is useful to look at this to decide what needs to be done.
5644 MODE and WIN are passed so that this macro can use
5645 GO_IF_LEGITIMATE_ADDRESS.
5647 It is always safe for this macro to do nothing. It exists to recognize
5648 opportunities to optimize the output.
5650 For the 80386, we handle X+REG by loading X into a register R and
5651 using R+REG. R will go in a general reg and indexing will be used.
5652 However, if REG is a broken-out memory address or multiplication,
5653 nothing needs to be done because REG can certainly go in a general reg.
5655 When -fpic is used, special handling is needed for symbolic references.
5656 See comments by legitimize_pic_address in i386.c for details. */
5659 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5664 if (TARGET_DEBUG_ADDR)
5666 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5667 GET_MODE_NAME (mode));
5671 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5673 return legitimize_tls_address (x, log, false);
5674 if (GET_CODE (x) == CONST
5675 && GET_CODE (XEXP (x, 0)) == PLUS
5676 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5677 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5679 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5680 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5683 if (flag_pic && SYMBOLIC_CONST (x))
5684 return legitimize_pic_address (x, 0);
5686 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5687 if (GET_CODE (x) == ASHIFT
5688 && GET_CODE (XEXP (x, 1)) == CONST_INT
5689 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5692 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5693 GEN_INT (1 << log));
5696 if (GET_CODE (x) == PLUS)
5698 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5700 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5701 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5702 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5705 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5706 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5707 GEN_INT (1 << log));
5710 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5711 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5712 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5715 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5716 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5717 GEN_INT (1 << log));
5720 /* Put multiply first if it isn't already. */
5721 if (GET_CODE (XEXP (x, 1)) == MULT)
5723 rtx tmp = XEXP (x, 0);
5724 XEXP (x, 0) = XEXP (x, 1);
5729 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5730 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5731 created by virtual register instantiation, register elimination, and
5732 similar optimizations. */
5733 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5736 x = gen_rtx_PLUS (Pmode,
5737 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5738 XEXP (XEXP (x, 1), 0)),
5739 XEXP (XEXP (x, 1), 1));
5743 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5744 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5745 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5746 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5747 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5748 && CONSTANT_P (XEXP (x, 1)))
5751 rtx other = NULL_RTX;
5753 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5755 constant = XEXP (x, 1);
5756 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5758 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5760 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5761 other = XEXP (x, 1);
5769 x = gen_rtx_PLUS (Pmode,
5770 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5771 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5772 plus_constant (other, INTVAL (constant)));
5776 if (changed && legitimate_address_p (mode, x, FALSE))
5779 if (GET_CODE (XEXP (x, 0)) == MULT)
5782 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5785 if (GET_CODE (XEXP (x, 1)) == MULT)
5788 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5792 && GET_CODE (XEXP (x, 1)) == REG
5793 && GET_CODE (XEXP (x, 0)) == REG)
5796 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5799 x = legitimize_pic_address (x, 0);
5802 if (changed && legitimate_address_p (mode, x, FALSE))
5805 if (GET_CODE (XEXP (x, 0)) == REG)
5807 rtx temp = gen_reg_rtx (Pmode);
5808 rtx val = force_operand (XEXP (x, 1), temp);
5810 emit_move_insn (temp, val);
5816 else if (GET_CODE (XEXP (x, 1)) == REG)
5818 rtx temp = gen_reg_rtx (Pmode);
5819 rtx val = force_operand (XEXP (x, 0), temp);
5821 emit_move_insn (temp, val);
5831 /* Print an integer constant expression in assembler syntax. Addition
5832 and subtraction are the only arithmetic that may appear in these
5833 expressions. FILE is the stdio stream to write to, X is the rtx, and
5834 CODE is the operand print code from the output string. */
5837 output_pic_addr_const (FILE *file, rtx x, int code)
5841 switch (GET_CODE (x))
5851 /* Mark the decl as referenced so that cgraph will output the function. */
5852 if (SYMBOL_REF_DECL (x))
5853 mark_decl_referenced (SYMBOL_REF_DECL (x));
5855 assemble_name (file, XSTR (x, 0));
5856 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5857 fputs ("@PLT", file);
5864 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5865 assemble_name (asm_out_file, buf);
5869 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5873 /* This used to output parentheses around the expression,
5874 but that does not work on the 386 (either ATT or BSD assembler). */
5875 output_pic_addr_const (file, XEXP (x, 0), code);
5879 if (GET_MODE (x) == VOIDmode)
5881 /* We can use %d if the number is <32 bits and positive. */
5882 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5883 fprintf (file, "0x%lx%08lx",
5884 (unsigned long) CONST_DOUBLE_HIGH (x),
5885 (unsigned long) CONST_DOUBLE_LOW (x));
5887 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5890 /* We can't handle floating point constants;
5891 PRINT_OPERAND must handle them. */
5892 output_operand_lossage ("floating constant misused");
5896 /* Some assemblers need integer constants to appear first. */
5897 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5899 output_pic_addr_const (file, XEXP (x, 0), code);
5901 output_pic_addr_const (file, XEXP (x, 1), code);
5903 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5905 output_pic_addr_const (file, XEXP (x, 1), code);
5907 output_pic_addr_const (file, XEXP (x, 0), code);
5915 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5916 output_pic_addr_const (file, XEXP (x, 0), code);
5918 output_pic_addr_const (file, XEXP (x, 1), code);
5920 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5924 if (XVECLEN (x, 0) != 1)
5926 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5927 switch (XINT (x, 1))
5930 fputs ("@GOT", file);
5933 fputs ("@GOTOFF", file);
5935 case UNSPEC_GOTPCREL:
5936 fputs ("@GOTPCREL(%rip)", file);
5938 case UNSPEC_GOTTPOFF:
5939 /* FIXME: This might be @TPOFF in Sun ld too. */
5940 fputs ("@GOTTPOFF", file);
5943 fputs ("@TPOFF", file);
5947 fputs ("@TPOFF", file);
5949 fputs ("@NTPOFF", file);
5952 fputs ("@DTPOFF", file);
5954 case UNSPEC_GOTNTPOFF:
5956 fputs ("@GOTTPOFF(%rip)", file);
5958 fputs ("@GOTNTPOFF", file);
5960 case UNSPEC_INDNTPOFF:
5961 fputs ("@INDNTPOFF", file);
5964 output_operand_lossage ("invalid UNSPEC as operand");
5970 output_operand_lossage ("invalid expression as operand");
5974 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5975 We need to emit DTP-relative relocations. */
5978 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5980 fputs (ASM_LONG, file);
5981 output_addr_const (file, x);
5982 fputs ("@DTPOFF", file);
5988 fputs (", 0", file);
5995 /* In the name of slightly smaller debug output, and to cater to
5996 general assembler losage, recognize PIC+GOTOFF and turn it back
5997 into a direct symbol reference. */
6000 ix86_delegitimize_address (rtx orig_x)
6004 if (GET_CODE (x) == MEM)
6009 if (GET_CODE (x) != CONST
6010 || GET_CODE (XEXP (x, 0)) != UNSPEC
6011 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6012 || GET_CODE (orig_x) != MEM)
6014 return XVECEXP (XEXP (x, 0), 0, 0);
6017 if (GET_CODE (x) != PLUS
6018 || GET_CODE (XEXP (x, 1)) != CONST)
6021 if (GET_CODE (XEXP (x, 0)) == REG
6022 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6023 /* %ebx + GOT/GOTOFF */
6025 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6027 /* %ebx + %reg * scale + GOT/GOTOFF */
6029 if (GET_CODE (XEXP (y, 0)) == REG
6030 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6032 else if (GET_CODE (XEXP (y, 1)) == REG
6033 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6037 if (GET_CODE (y) != REG
6038 && GET_CODE (y) != MULT
6039 && GET_CODE (y) != ASHIFT)
6045 x = XEXP (XEXP (x, 1), 0);
6046 if (GET_CODE (x) == UNSPEC
6047 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6048 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6051 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6052 return XVECEXP (x, 0, 0);
6055 if (GET_CODE (x) == PLUS
6056 && GET_CODE (XEXP (x, 0)) == UNSPEC
6057 && GET_CODE (XEXP (x, 1)) == CONST_INT
6058 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6059 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6060 && GET_CODE (orig_x) != MEM)))
6062 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6064 return gen_rtx_PLUS (Pmode, y, x);
6072 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6077 if (mode == CCFPmode || mode == CCFPUmode)
6079 enum rtx_code second_code, bypass_code;
6080 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6081 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6083 code = ix86_fp_compare_code_to_integer (code);
6087 code = reverse_condition (code);
6098 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6103 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6104 Those same assemblers have the same but opposite losage on cmov. */
6107 suffix = fp ? "nbe" : "a";
6110 if (mode == CCNOmode || mode == CCGOCmode)
6112 else if (mode == CCmode || mode == CCGCmode)
6123 if (mode == CCNOmode || mode == CCGOCmode)
6125 else if (mode == CCmode || mode == CCGCmode)
6134 suffix = fp ? "nb" : "ae";
6137 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6147 suffix = fp ? "u" : "p";
6150 suffix = fp ? "nu" : "np";
6155 fputs (suffix, file);
6158 /* Print the name of register X to FILE based on its machine mode and number.
6159 If CODE is 'w', pretend the mode is HImode.
6160 If CODE is 'b', pretend the mode is QImode.
6161 If CODE is 'k', pretend the mode is SImode.
6162 If CODE is 'q', pretend the mode is DImode.
6163 If CODE is 'h', pretend the reg is the `high' byte register.
6164 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6167 print_reg (rtx x, int code, FILE *file)
6169 if (REGNO (x) == ARG_POINTER_REGNUM
6170 || REGNO (x) == FRAME_POINTER_REGNUM
6171 || REGNO (x) == FLAGS_REG
6172 || REGNO (x) == FPSR_REG)
6175 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6178 if (code == 'w' || MMX_REG_P (x))
6180 else if (code == 'b')
6182 else if (code == 'k')
6184 else if (code == 'q')
6186 else if (code == 'y')
6188 else if (code == 'h')
6191 code = GET_MODE_SIZE (GET_MODE (x));
6193 /* Irritatingly, AMD extended registers use different naming convention
6194 from the normal registers. */
6195 if (REX_INT_REG_P (x))
6202 error ("extended registers have no high halves");
6205 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6208 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6211 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6214 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6217 error ("unsupported operand size for extended register");
6225 if (STACK_TOP_P (x))
6227 fputs ("st(0)", file);
6234 if (! ANY_FP_REG_P (x))
6235 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6240 fputs (hi_reg_name[REGNO (x)], file);
6243 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6245 fputs (qi_reg_name[REGNO (x)], file);
6248 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6250 fputs (qi_high_reg_name[REGNO (x)], file);
6257 /* Locate some local-dynamic symbol still in use by this function
6258 so that we can print its name in some tls_local_dynamic_base
6262 get_some_local_dynamic_name (void)
6266 if (cfun->machine->some_ld_name)
6267 return cfun->machine->some_ld_name;
6269 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6271 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6272 return cfun->machine->some_ld_name;
6278 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6282 if (GET_CODE (x) == SYMBOL_REF
6283 && local_dynamic_symbolic_operand (x, Pmode))
6285 cfun->machine->some_ld_name = XSTR (x, 0);
6293 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6294 C -- print opcode suffix for set/cmov insn.
6295 c -- like C, but print reversed condition
6296 F,f -- likewise, but for floating-point.
6297 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6299 R -- print the prefix for register names.
6300 z -- print the opcode suffix for the size of the current operand.
6301 * -- print a star (in certain assembler syntax)
6302 A -- print an absolute memory reference.
6303 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6304 s -- print a shift double count, followed by the assemblers argument
6306 b -- print the QImode name of the register for the indicated operand.
6307 %b0 would print %al if operands[0] is reg 0.
6308 w -- likewise, print the HImode name of the register.
6309 k -- likewise, print the SImode name of the register.
6310 q -- likewise, print the DImode name of the register.
6311 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6312 y -- print "st(0)" instead of "st" as a register.
6313 D -- print condition for SSE cmp instruction.
6314 P -- if PIC, print an @PLT suffix.
6315 X -- don't print any sort of PIC '@' suffix for a symbol.
6316 & -- print some in-use local-dynamic symbol name.
6320 print_operand (FILE *file, rtx x, int code)
6327 if (ASSEMBLER_DIALECT == ASM_ATT)
6332 assemble_name (file, get_some_local_dynamic_name ());
6336 if (ASSEMBLER_DIALECT == ASM_ATT)
6338 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6340 /* Intel syntax. For absolute addresses, registers should not
6341 be surrounded by braces. */
6342 if (GET_CODE (x) != REG)
6345 PRINT_OPERAND (file, x, 0);
6353 PRINT_OPERAND (file, x, 0);
6358 if (ASSEMBLER_DIALECT == ASM_ATT)
6363 if (ASSEMBLER_DIALECT == ASM_ATT)
6368 if (ASSEMBLER_DIALECT == ASM_ATT)
6373 if (ASSEMBLER_DIALECT == ASM_ATT)
6378 if (ASSEMBLER_DIALECT == ASM_ATT)
6383 if (ASSEMBLER_DIALECT == ASM_ATT)
6388 /* 387 opcodes don't get size suffixes if the operands are
6390 if (STACK_REG_P (x))
6393 /* Likewise if using Intel opcodes. */
6394 if (ASSEMBLER_DIALECT == ASM_INTEL)
6397 /* This is the size of op from size of operand. */
6398 switch (GET_MODE_SIZE (GET_MODE (x)))
6401 #ifdef HAVE_GAS_FILDS_FISTS
6407 if (GET_MODE (x) == SFmode)
6422 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6424 #ifdef GAS_MNEMONICS
6450 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6452 PRINT_OPERAND (file, x, 0);
6458 /* Little bit of braindamage here. The SSE compare instructions
6459 does use completely different names for the comparisons that the
6460 fp conditional moves. */
6461 switch (GET_CODE (x))
6476 fputs ("unord", file);
6480 fputs ("neq", file);
6484 fputs ("nlt", file);
6488 fputs ("nle", file);
6491 fputs ("ord", file);
6499 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6500 if (ASSEMBLER_DIALECT == ASM_ATT)
6502 switch (GET_MODE (x))
6504 case HImode: putc ('w', file); break;
6506 case SFmode: putc ('l', file); break;
6508 case DFmode: putc ('q', file); break;
6516 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6519 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6520 if (ASSEMBLER_DIALECT == ASM_ATT)
6523 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6526 /* Like above, but reverse condition */
6528 /* Check to see if argument to %c is really a constant
6529 and not a condition code which needs to be reversed. */
6530 if (!COMPARISON_P (x))
6532 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6535 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6538 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6539 if (ASSEMBLER_DIALECT == ASM_ATT)
6542 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6548 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6551 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6554 int pred_val = INTVAL (XEXP (x, 0));
6556 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6557 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6559 int taken = pred_val > REG_BR_PROB_BASE / 2;
6560 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6562 /* Emit hints only in the case default branch prediction
6563 heuristics would fail. */
6564 if (taken != cputaken)
6566 /* We use 3e (DS) prefix for taken branches and
6567 2e (CS) prefix for not taken branches. */
6569 fputs ("ds ; ", file);
6571 fputs ("cs ; ", file);
6578 output_operand_lossage ("invalid operand code '%c'", code);
6582 if (GET_CODE (x) == REG)
6583 print_reg (x, code, file);
6585 else if (GET_CODE (x) == MEM)
6587 /* No `byte ptr' prefix for call instructions. */
6588 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6591 switch (GET_MODE_SIZE (GET_MODE (x)))
6593 case 1: size = "BYTE"; break;
6594 case 2: size = "WORD"; break;
6595 case 4: size = "DWORD"; break;
6596 case 8: size = "QWORD"; break;
6597 case 12: size = "XWORD"; break;
6598 case 16: size = "XMMWORD"; break;
6603 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6606 else if (code == 'w')
6608 else if (code == 'k')
6612 fputs (" PTR ", file);
6616 /* Avoid (%rip) for call operands. */
6617 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6618 && GET_CODE (x) != CONST_INT)
6619 output_addr_const (file, x);
6620 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6621 output_operand_lossage ("invalid constraints for operand");
6626 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6631 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6632 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6634 if (ASSEMBLER_DIALECT == ASM_ATT)
6636 fprintf (file, "0x%08lx", l);
6639 /* These float cases don't actually occur as immediate operands. */
6640 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6644 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6645 fprintf (file, "%s", dstr);
6648 else if (GET_CODE (x) == CONST_DOUBLE
6649 && GET_MODE (x) == XFmode)
6653 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6654 fprintf (file, "%s", dstr);
6661 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6663 if (ASSEMBLER_DIALECT == ASM_ATT)
6666 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6667 || GET_CODE (x) == LABEL_REF)
6669 if (ASSEMBLER_DIALECT == ASM_ATT)
6672 fputs ("OFFSET FLAT:", file);
6675 if (GET_CODE (x) == CONST_INT)
6676 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6678 output_pic_addr_const (file, x, code);
6680 output_addr_const (file, x);
6684 /* Print a memory operand whose address is ADDR. */
6687 print_operand_address (FILE *file, rtx addr)
6689 struct ix86_address parts;
6690 rtx base, index, disp;
6693 if (! ix86_decompose_address (addr, &parts))
6697 index = parts.index;
6699 scale = parts.scale;
6707 if (USER_LABEL_PREFIX[0] == 0)
6709 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6715 if (!base && !index)
6717 /* Displacement only requires special attention. */
6719 if (GET_CODE (disp) == CONST_INT)
6721 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6723 if (USER_LABEL_PREFIX[0] == 0)
6725 fputs ("ds:", file);
6727 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6730 output_pic_addr_const (file, disp, 0);
6732 output_addr_const (file, disp);
6734 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6736 && ((GET_CODE (disp) == SYMBOL_REF
6737 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6738 || GET_CODE (disp) == LABEL_REF
6739 || (GET_CODE (disp) == CONST
6740 && GET_CODE (XEXP (disp, 0)) == PLUS
6741 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6742 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6743 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6744 fputs ("(%rip)", file);
6748 if (ASSEMBLER_DIALECT == ASM_ATT)
6753 output_pic_addr_const (file, disp, 0);
6754 else if (GET_CODE (disp) == LABEL_REF)
6755 output_asm_label (disp);
6757 output_addr_const (file, disp);
6762 print_reg (base, 0, file);
6766 print_reg (index, 0, file);
6768 fprintf (file, ",%d", scale);
6774 rtx offset = NULL_RTX;
6778 /* Pull out the offset of a symbol; print any symbol itself. */
6779 if (GET_CODE (disp) == CONST
6780 && GET_CODE (XEXP (disp, 0)) == PLUS
6781 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6783 offset = XEXP (XEXP (disp, 0), 1);
6784 disp = gen_rtx_CONST (VOIDmode,
6785 XEXP (XEXP (disp, 0), 0));
6789 output_pic_addr_const (file, disp, 0);
6790 else if (GET_CODE (disp) == LABEL_REF)
6791 output_asm_label (disp);
6792 else if (GET_CODE (disp) == CONST_INT)
6795 output_addr_const (file, disp);
6801 print_reg (base, 0, file);
6804 if (INTVAL (offset) >= 0)
6806 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6810 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6817 print_reg (index, 0, file);
6819 fprintf (file, "*%d", scale);
6827 output_addr_const_extra (FILE *file, rtx x)
6831 if (GET_CODE (x) != UNSPEC)
6834 op = XVECEXP (x, 0, 0);
6835 switch (XINT (x, 1))
6837 case UNSPEC_GOTTPOFF:
6838 output_addr_const (file, op);
6839 /* FIXME: This might be @TPOFF in Sun ld. */
6840 fputs ("@GOTTPOFF", file);
6843 output_addr_const (file, op);
6844 fputs ("@TPOFF", file);
6847 output_addr_const (file, op);
6849 fputs ("@TPOFF", file);
6851 fputs ("@NTPOFF", file);
6854 output_addr_const (file, op);
6855 fputs ("@DTPOFF", file);
6857 case UNSPEC_GOTNTPOFF:
6858 output_addr_const (file, op);
6860 fputs ("@GOTTPOFF(%rip)", file);
6862 fputs ("@GOTNTPOFF", file);
6864 case UNSPEC_INDNTPOFF:
6865 output_addr_const (file, op);
6866 fputs ("@INDNTPOFF", file);
6876 /* Split one or more DImode RTL references into pairs of SImode
6877 references. The RTL can be REG, offsettable MEM, integer constant, or
6878 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6879 split and "num" is its length. lo_half and hi_half are output arrays
6880 that parallel "operands". */
6883 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6887 rtx op = operands[num];
6889 /* simplify_subreg refuse to split volatile memory addresses,
6890 but we still have to handle it. */
6891 if (GET_CODE (op) == MEM)
6893 lo_half[num] = adjust_address (op, SImode, 0);
6894 hi_half[num] = adjust_address (op, SImode, 4);
6898 lo_half[num] = simplify_gen_subreg (SImode, op,
6899 GET_MODE (op) == VOIDmode
6900 ? DImode : GET_MODE (op), 0);
6901 hi_half[num] = simplify_gen_subreg (SImode, op,
6902 GET_MODE (op) == VOIDmode
6903 ? DImode : GET_MODE (op), 4);
6907 /* Split one or more TImode RTL references into pairs of SImode
6908 references. The RTL can be REG, offsettable MEM, integer constant, or
6909 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6910 split and "num" is its length. lo_half and hi_half are output arrays
6911 that parallel "operands". */
6914 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6918 rtx op = operands[num];
6920 /* simplify_subreg refuse to split volatile memory addresses, but we
6921 still have to handle it. */
6922 if (GET_CODE (op) == MEM)
6924 lo_half[num] = adjust_address (op, DImode, 0);
6925 hi_half[num] = adjust_address (op, DImode, 8);
6929 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6930 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6935 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6936 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6937 is the expression of the binary operation. The output may either be
6938 emitted here, or returned to the caller, like all output_* functions.
6940 There is no guarantee that the operands are the same mode, as they
6941 might be within FLOAT or FLOAT_EXTEND expressions. */
6943 #ifndef SYSV386_COMPAT
6944 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6945 wants to fix the assemblers because that causes incompatibility
6946 with gcc. No-one wants to fix gcc because that causes
6947 incompatibility with assemblers... You can use the option of
6948 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6949 #define SYSV386_COMPAT 1
6953 output_387_binary_op (rtx insn, rtx *operands)
6955 static char buf[30];
6958 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
6960 #ifdef ENABLE_CHECKING
6961 /* Even if we do not want to check the inputs, this documents input
6962 constraints. Which helps in understanding the following code. */
6963 if (STACK_REG_P (operands[0])
6964 && ((REG_P (operands[1])
6965 && REGNO (operands[0]) == REGNO (operands[1])
6966 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6967 || (REG_P (operands[2])
6968 && REGNO (operands[0]) == REGNO (operands[2])
6969 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6970 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6976 switch (GET_CODE (operands[3]))
6979 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6980 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6988 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6989 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6997 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6998 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7006 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7007 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7021 if (GET_MODE (operands[0]) == SFmode)
7022 strcat (buf, "ss\t{%2, %0|%0, %2}");
7024 strcat (buf, "sd\t{%2, %0|%0, %2}");
7029 switch (GET_CODE (operands[3]))
7033 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7035 rtx temp = operands[2];
7036 operands[2] = operands[1];
7040 /* know operands[0] == operands[1]. */
7042 if (GET_CODE (operands[2]) == MEM)
7048 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7050 if (STACK_TOP_P (operands[0]))
7051 /* How is it that we are storing to a dead operand[2]?
7052 Well, presumably operands[1] is dead too. We can't
7053 store the result to st(0) as st(0) gets popped on this
7054 instruction. Instead store to operands[2] (which I
7055 think has to be st(1)). st(1) will be popped later.
7056 gcc <= 2.8.1 didn't have this check and generated
7057 assembly code that the Unixware assembler rejected. */
7058 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7060 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7064 if (STACK_TOP_P (operands[0]))
7065 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7067 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7072 if (GET_CODE (operands[1]) == MEM)
7078 if (GET_CODE (operands[2]) == MEM)
7084 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7087 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7088 derived assemblers, confusingly reverse the direction of
7089 the operation for fsub{r} and fdiv{r} when the
7090 destination register is not st(0). The Intel assembler
7091 doesn't have this brain damage. Read !SYSV386_COMPAT to
7092 figure out what the hardware really does. */
7093 if (STACK_TOP_P (operands[0]))
7094 p = "{p\t%0, %2|rp\t%2, %0}";
7096 p = "{rp\t%2, %0|p\t%0, %2}";
7098 if (STACK_TOP_P (operands[0]))
7099 /* As above for fmul/fadd, we can't store to st(0). */
7100 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7102 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7107 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7110 if (STACK_TOP_P (operands[0]))
7111 p = "{rp\t%0, %1|p\t%1, %0}";
7113 p = "{p\t%1, %0|rp\t%0, %1}";
7115 if (STACK_TOP_P (operands[0]))
7116 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7118 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7123 if (STACK_TOP_P (operands[0]))
7125 if (STACK_TOP_P (operands[1]))
7126 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7128 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7131 else if (STACK_TOP_P (operands[1]))
7134 p = "{\t%1, %0|r\t%0, %1}";
7136 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7142 p = "{r\t%2, %0|\t%0, %2}";
7144 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7157 /* Output code to initialize control word copies used by trunc?f?i and
7158 rounding patterns. CURRENT_MODE is set to current control word,
7159 while NEW_MODE is set to new control word. */
7162 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7164 rtx reg = gen_reg_rtx (HImode);
7166 emit_insn (gen_x86_fnstcw_1 (current_mode));
7167 emit_move_insn (reg, current_mode);
7169 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7175 /* round down toward -oo */
7176 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7180 /* round up toward +oo */
7181 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7185 /* round toward zero (truncate) */
7186 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7189 case I387_CW_MASK_PM:
7190 /* mask precision exception for nearbyint() */
7191 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7203 /* round down toward -oo */
7204 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7205 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7209 /* round up toward +oo */
7210 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7211 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7215 /* round toward zero (truncate) */
7216 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7219 case I387_CW_MASK_PM:
7220 /* mask precision exception for nearbyint() */
7221 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7229 emit_move_insn (new_mode, reg);
7232 /* Output code for INSN to convert a float to a signed int. OPERANDS
7233 are the insn operands. The output may be [HSD]Imode and the input
7234 operand may be [SDX]Fmode. */
7237 output_fix_trunc (rtx insn, rtx *operands)
7239 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7240 int dimode_p = GET_MODE (operands[0]) == DImode;
7242 /* Jump through a hoop or two for DImode, since the hardware has no
7243 non-popping instruction. We used to do this a different way, but
7244 that was somewhat fragile and broke with post-reload splitters. */
7245 if (dimode_p && !stack_top_dies)
7246 output_asm_insn ("fld\t%y1", operands);
7248 if (!STACK_TOP_P (operands[1]))
7251 if (GET_CODE (operands[0]) != MEM)
7254 output_asm_insn ("fldcw\t%3", operands);
7255 if (stack_top_dies || dimode_p)
7256 output_asm_insn ("fistp%z0\t%0", operands);
7258 output_asm_insn ("fist%z0\t%0", operands);
7259 output_asm_insn ("fldcw\t%2", operands);
7264 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7265 should be used. UNORDERED_P is true when fucom should be used. */
7268 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7271 rtx cmp_op0, cmp_op1;
7272 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7276 cmp_op0 = operands[0];
7277 cmp_op1 = operands[1];
7281 cmp_op0 = operands[1];
7282 cmp_op1 = operands[2];
7287 if (GET_MODE (operands[0]) == SFmode)
7289 return "ucomiss\t{%1, %0|%0, %1}";
7291 return "comiss\t{%1, %0|%0, %1}";
7294 return "ucomisd\t{%1, %0|%0, %1}";
7296 return "comisd\t{%1, %0|%0, %1}";
7299 if (! STACK_TOP_P (cmp_op0))
7302 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7304 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7308 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7309 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7312 return "ftst\n\tfnstsw\t%0";
7315 if (STACK_REG_P (cmp_op1)
7317 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7318 && REGNO (cmp_op1) != FIRST_STACK_REG)
7320 /* If both the top of the 387 stack dies, and the other operand
7321 is also a stack register that dies, then this must be a
7322 `fcompp' float compare */
7326 /* There is no double popping fcomi variant. Fortunately,
7327 eflags is immune from the fstp's cc clobbering. */
7329 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7331 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7332 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7337 return "fucompp\n\tfnstsw\t%0";
7339 return "fcompp\n\tfnstsw\t%0";
7344 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7346 static const char * const alt[16] =
7348 "fcom%z2\t%y2\n\tfnstsw\t%0",
7349 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7350 "fucom%z2\t%y2\n\tfnstsw\t%0",
7351 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7353 "ficom%z2\t%y2\n\tfnstsw\t%0",
7354 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7358 "fcomi\t{%y1, %0|%0, %y1}",
7359 "fcomip\t{%y1, %0|%0, %y1}",
7360 "fucomi\t{%y1, %0|%0, %y1}",
7361 "fucomip\t{%y1, %0|%0, %y1}",
7372 mask = eflags_p << 3;
7373 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7374 mask |= unordered_p << 1;
7375 mask |= stack_top_dies;
7388 ix86_output_addr_vec_elt (FILE *file, int value)
7390 const char *directive = ASM_LONG;
7395 directive = ASM_QUAD;
7401 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7405 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7408 fprintf (file, "%s%s%d-%s%d\n",
7409 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7410 else if (HAVE_AS_GOTOFF_IN_DATA)
7411 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7413 else if (TARGET_MACHO)
7415 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7416 machopic_output_function_base_name (file);
7417 fprintf(file, "\n");
7421 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7422 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7425 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7429 ix86_expand_clear (rtx dest)
7433 /* We play register width games, which are only valid after reload. */
7434 if (!reload_completed)
7437 /* Avoid HImode and its attendant prefix byte. */
7438 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7439 dest = gen_rtx_REG (SImode, REGNO (dest));
7441 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7443 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7444 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7446 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7447 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7453 /* X is an unchanging MEM. If it is a constant pool reference, return
7454 the constant pool rtx, else NULL. */
7457 maybe_get_pool_constant (rtx x)
7459 x = ix86_delegitimize_address (XEXP (x, 0));
7461 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7462 return get_pool_constant (x);
7468 ix86_expand_move (enum machine_mode mode, rtx operands[])
7470 int strict = (reload_in_progress || reload_completed);
7472 enum tls_model model;
7477 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7480 op1 = legitimize_tls_address (op1, model, true);
7481 op1 = force_operand (op1, op0);
7486 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7491 rtx temp = ((reload_in_progress
7492 || ((op0 && GET_CODE (op0) == REG)
7494 ? op0 : gen_reg_rtx (Pmode));
7495 op1 = machopic_indirect_data_reference (op1, temp);
7496 op1 = machopic_legitimize_pic_address (op1, mode,
7497 temp == op1 ? 0 : temp);
7499 else if (MACHOPIC_INDIRECT)
7500 op1 = machopic_indirect_data_reference (op1, 0);
7504 if (GET_CODE (op0) == MEM)
7505 op1 = force_reg (Pmode, op1);
7507 op1 = legitimize_address (op1, op1, Pmode);
7508 #endif /* TARGET_MACHO */
7512 if (GET_CODE (op0) == MEM
7513 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7514 || !push_operand (op0, mode))
7515 && GET_CODE (op1) == MEM)
7516 op1 = force_reg (mode, op1);
7518 if (push_operand (op0, mode)
7519 && ! general_no_elim_operand (op1, mode))
7520 op1 = copy_to_mode_reg (mode, op1);
7522 /* Force large constants in 64bit compilation into register
7523 to get them CSEed. */
7524 if (TARGET_64BIT && mode == DImode
7525 && immediate_operand (op1, mode)
7526 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7527 && !register_operand (op0, mode)
7528 && optimize && !reload_completed && !reload_in_progress)
7529 op1 = copy_to_mode_reg (mode, op1);
7531 if (FLOAT_MODE_P (mode))
7533 /* If we are loading a floating point constant to a register,
7534 force the value to memory now, since we'll get better code
7535 out the back end. */
7539 else if (GET_CODE (op1) == CONST_DOUBLE)
7541 op1 = validize_mem (force_const_mem (mode, op1));
7542 if (!register_operand (op0, mode))
7544 rtx temp = gen_reg_rtx (mode);
7545 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7546 emit_move_insn (op0, temp);
7553 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7557 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7559 /* Force constants other than zero into memory. We do not know how
7560 the instructions used to build constants modify the upper 64 bits
7561 of the register, once we have that information we may be able
7562 to handle some of them more efficiently. */
7563 if ((reload_in_progress | reload_completed) == 0
7564 && register_operand (operands[0], mode)
7565 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
7566 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
7568 /* Make operand1 a register if it isn't already. */
7570 && !register_operand (operands[0], mode)
7571 && !register_operand (operands[1], mode))
7573 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7574 emit_move_insn (operands[0], temp);
7578 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7581 /* Attempt to expand a binary operator. Make the expansion closer to the
7582 actual machine, then just general_operand, which will allow 3 separate
7583 memory references (one output, two input) in a single insn. */
7586 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7589 int matching_memory;
7590 rtx src1, src2, dst, op, clob;
7596 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7597 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7598 && (rtx_equal_p (dst, src2)
7599 || immediate_operand (src1, mode)))
7606 /* If the destination is memory, and we do not have matching source
7607 operands, do things in registers. */
7608 matching_memory = 0;
7609 if (GET_CODE (dst) == MEM)
7611 if (rtx_equal_p (dst, src1))
7612 matching_memory = 1;
7613 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7614 && rtx_equal_p (dst, src2))
7615 matching_memory = 2;
7617 dst = gen_reg_rtx (mode);
7620 /* Both source operands cannot be in memory. */
7621 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7623 if (matching_memory != 2)
7624 src2 = force_reg (mode, src2);
7626 src1 = force_reg (mode, src1);
7629 /* If the operation is not commutable, source 1 cannot be a constant
7630 or non-matching memory. */
7631 if ((CONSTANT_P (src1)
7632 || (!matching_memory && GET_CODE (src1) == MEM))
7633 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7634 src1 = force_reg (mode, src1);
7636 /* If optimizing, copy to regs to improve CSE */
7637 if (optimize && ! no_new_pseudos)
7639 if (GET_CODE (dst) == MEM)
7640 dst = gen_reg_rtx (mode);
7641 if (GET_CODE (src1) == MEM)
7642 src1 = force_reg (mode, src1);
7643 if (GET_CODE (src2) == MEM)
7644 src2 = force_reg (mode, src2);
7647 /* Emit the instruction. */
7649 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7650 if (reload_in_progress)
7652 /* Reload doesn't know about the flags register, and doesn't know that
7653 it doesn't want to clobber it. We can only do this with PLUS. */
7660 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7661 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7664 /* Fix up the destination if needed. */
7665 if (dst != operands[0])
7666 emit_move_insn (operands[0], dst);
7669 /* Return TRUE or FALSE depending on whether the binary operator meets the
7670 appropriate constraints. */
7673 ix86_binary_operator_ok (enum rtx_code code,
7674 enum machine_mode mode ATTRIBUTE_UNUSED,
7677 /* Both source operands cannot be in memory. */
7678 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7680 /* If the operation is not commutable, source 1 cannot be a constant. */
7681 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7683 /* If the destination is memory, we must have a matching source operand. */
7684 if (GET_CODE (operands[0]) == MEM
7685 && ! (rtx_equal_p (operands[0], operands[1])
7686 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7687 && rtx_equal_p (operands[0], operands[2]))))
7689 /* If the operation is not commutable and the source 1 is memory, we must
7690 have a matching destination. */
7691 if (GET_CODE (operands[1]) == MEM
7692 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7693 && ! rtx_equal_p (operands[0], operands[1]))
7698 /* Attempt to expand a unary operator. Make the expansion closer to the
7699 actual machine, then just general_operand, which will allow 2 separate
7700 memory references (one output, one input) in a single insn. */
7703 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7706 int matching_memory;
7707 rtx src, dst, op, clob;
7712 /* If the destination is memory, and we do not have matching source
7713 operands, do things in registers. */
7714 matching_memory = 0;
7717 if (rtx_equal_p (dst, src))
7718 matching_memory = 1;
7720 dst = gen_reg_rtx (mode);
7723 /* When source operand is memory, destination must match. */
7724 if (MEM_P (src) && !matching_memory)
7725 src = force_reg (mode, src);
7727 /* If optimizing, copy to regs to improve CSE. */
7728 if (optimize && ! no_new_pseudos)
7730 if (GET_CODE (dst) == MEM)
7731 dst = gen_reg_rtx (mode);
7732 if (GET_CODE (src) == MEM)
7733 src = force_reg (mode, src);
7736 /* Emit the instruction. */
7738 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7739 if (reload_in_progress || code == NOT)
7741 /* Reload doesn't know about the flags register, and doesn't know that
7742 it doesn't want to clobber it. */
7749 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7750 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7753 /* Fix up the destination if needed. */
7754 if (dst != operands[0])
7755 emit_move_insn (operands[0], dst);
7758 /* Return TRUE or FALSE depending on whether the unary operator meets the
7759 appropriate constraints. */
7762 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7763 enum machine_mode mode ATTRIBUTE_UNUSED,
7764 rtx operands[2] ATTRIBUTE_UNUSED)
7766 /* If one of operands is memory, source and destination must match. */
7767 if ((GET_CODE (operands[0]) == MEM
7768 || GET_CODE (operands[1]) == MEM)
7769 && ! rtx_equal_p (operands[0], operands[1]))
7774 /* Generate code for floating point ABS or NEG. */
7777 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
7780 rtx mask, set, use, clob, dst, src;
7781 bool matching_memory;
7782 bool use_sse = false;
7784 if (TARGET_SSE_MATH)
7788 else if (mode == DFmode && TARGET_SSE2)
7792 /* NEG and ABS performed with SSE use bitwise mask operations.
7793 Create the appropriate mask now. */
7796 HOST_WIDE_INT hi, lo;
7799 /* Find the sign bit, sign extended to 2*HWI. */
7801 lo = 0x80000000, hi = lo < 0;
7802 else if (HOST_BITS_PER_WIDE_INT >= 64)
7803 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
7805 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
7807 /* If we're looking for the absolute value, then we want
7812 /* Force this value into the low part of a fp vector constant. */
7813 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
7814 mask = gen_lowpart (mode, mask);
7816 mask = gen_rtx_CONST_VECTOR (V4SFmode,
7817 gen_rtvec (4, mask, CONST0_RTX (SFmode),
7818 CONST0_RTX (SFmode),
7819 CONST0_RTX (SFmode)));
7821 mask = gen_rtx_CONST_VECTOR (V2DFmode,
7822 gen_rtvec (2, mask, CONST0_RTX (DFmode)));
7823 mask = force_reg (GET_MODE (mask), mask);
7827 /* When not using SSE, we don't use the mask, but prefer to keep the
7828 same general form of the insn pattern to reduce duplication when
7829 it comes time to split. */
7836 /* If the destination is memory, and we don't have matching source
7837 operands, do things in registers. */
7838 matching_memory = false;
7841 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
7842 matching_memory = true;
7844 dst = gen_reg_rtx (mode);
7846 if (MEM_P (src) && !matching_memory)
7847 src = force_reg (mode, src);
7849 set = gen_rtx_fmt_e (code, mode, src);
7850 set = gen_rtx_SET (VOIDmode, dst, set);
7851 use = gen_rtx_USE (VOIDmode, mask);
7852 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7853 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
7855 if (dst != operands[0])
7856 emit_move_insn (operands[0], dst);
7859 /* Return TRUE or FALSE depending on whether the first SET in INSN
7860 has source and destination with matching CC modes, and that the
7861 CC mode is at least as constrained as REQ_MODE. */
7864 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7867 enum machine_mode set_mode;
7869 set = PATTERN (insn);
7870 if (GET_CODE (set) == PARALLEL)
7871 set = XVECEXP (set, 0, 0);
7872 if (GET_CODE (set) != SET)
7874 if (GET_CODE (SET_SRC (set)) != COMPARE)
7877 set_mode = GET_MODE (SET_DEST (set));
7881 if (req_mode != CCNOmode
7882 && (req_mode != CCmode
7883 || XEXP (SET_SRC (set), 1) != const0_rtx))
7887 if (req_mode == CCGCmode)
7891 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7895 if (req_mode == CCZmode)
7905 return (GET_MODE (SET_SRC (set)) == set_mode);
7908 /* Generate insn patterns to do an integer compare of OPERANDS. */
7911 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
7913 enum machine_mode cmpmode;
7916 cmpmode = SELECT_CC_MODE (code, op0, op1);
7917 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7919 /* This is very simple, but making the interface the same as in the
7920 FP case makes the rest of the code easier. */
7921 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7922 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7924 /* Return the test that should be put into the flags user, i.e.
7925 the bcc, scc, or cmov instruction. */
7926 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7929 /* Figure out whether to use ordered or unordered fp comparisons.
7930 Return the appropriate mode to use. */
7933 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
7935 /* ??? In order to make all comparisons reversible, we do all comparisons
7936 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7937 all forms trapping and nontrapping comparisons, we can make inequality
7938 comparisons trapping again, since it results in better code when using
7939 FCOM based compares. */
7940 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7944 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
7946 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7947 return ix86_fp_compare_mode (code);
7950 /* Only zero flag is needed. */
7952 case NE: /* ZF!=0 */
7954 /* Codes needing carry flag. */
7955 case GEU: /* CF=0 */
7956 case GTU: /* CF=0 & ZF=0 */
7957 case LTU: /* CF=1 */
7958 case LEU: /* CF=1 | ZF=1 */
7960 /* Codes possibly doable only with sign flag when
7961 comparing against zero. */
7962 case GE: /* SF=OF or SF=0 */
7963 case LT: /* SF<>OF or SF=1 */
7964 if (op1 == const0_rtx)
7967 /* For other cases Carry flag is not required. */
7969 /* Codes doable only with sign flag when comparing
7970 against zero, but we miss jump instruction for it
7971 so we need to use relational tests against overflow
7972 that thus needs to be zero. */
7973 case GT: /* ZF=0 & SF=OF */
7974 case LE: /* ZF=1 | SF<>OF */
7975 if (op1 == const0_rtx)
7979 /* strcmp pattern do (use flags) and combine may ask us for proper
7988 /* Return the fixed registers used for condition codes. */
7991 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7998 /* If two condition code modes are compatible, return a condition code
7999 mode which is compatible with both. Otherwise, return
8002 static enum machine_mode
8003 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8008 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8011 if ((m1 == CCGCmode && m2 == CCGOCmode)
8012 || (m1 == CCGOCmode && m2 == CCGCmode))
8040 /* These are only compatible with themselves, which we already
8046 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8049 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8051 enum rtx_code swapped_code = swap_condition (code);
8052 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8053 || (ix86_fp_comparison_cost (swapped_code)
8054 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8057 /* Swap, force into registers, or otherwise massage the two operands
8058 to a fp comparison. The operands are updated in place; the new
8059 comparison code is returned. */
8061 static enum rtx_code
8062 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8064 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8065 rtx op0 = *pop0, op1 = *pop1;
8066 enum machine_mode op_mode = GET_MODE (op0);
8067 int is_sse = SSE_REG_P (op0) || SSE_REG_P (op1);
8069 /* All of the unordered compare instructions only work on registers.
8070 The same is true of the fcomi compare instructions. The same is
8071 true of the XFmode compare instructions if not comparing with
8072 zero (ftst insn is used in this case). */
8075 && (fpcmp_mode == CCFPUmode
8076 || (op_mode == XFmode
8077 && ! (standard_80387_constant_p (op0) == 1
8078 || standard_80387_constant_p (op1) == 1))
8079 || ix86_use_fcomi_compare (code)))
8081 op0 = force_reg (op_mode, op0);
8082 op1 = force_reg (op_mode, op1);
8086 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8087 things around if they appear profitable, otherwise force op0
8090 if (standard_80387_constant_p (op0) == 0
8091 || (GET_CODE (op0) == MEM
8092 && ! (standard_80387_constant_p (op1) == 0
8093 || GET_CODE (op1) == MEM)))
8096 tmp = op0, op0 = op1, op1 = tmp;
8097 code = swap_condition (code);
8100 if (GET_CODE (op0) != REG)
8101 op0 = force_reg (op_mode, op0);
8103 if (CONSTANT_P (op1))
8105 int tmp = standard_80387_constant_p (op1);
8107 op1 = validize_mem (force_const_mem (op_mode, op1));
8111 op1 = force_reg (op_mode, op1);
8114 op1 = force_reg (op_mode, op1);
8118 /* Try to rearrange the comparison to make it cheaper. */
8119 if (ix86_fp_comparison_cost (code)
8120 > ix86_fp_comparison_cost (swap_condition (code))
8121 && (GET_CODE (op1) == REG || !no_new_pseudos))
8124 tmp = op0, op0 = op1, op1 = tmp;
8125 code = swap_condition (code);
8126 if (GET_CODE (op0) != REG)
8127 op0 = force_reg (op_mode, op0);
8135 /* Convert comparison codes we use to represent FP comparison to integer
8136 code that will result in proper branch. Return UNKNOWN if no such code
8140 ix86_fp_compare_code_to_integer (enum rtx_code code)
8169 /* Split comparison code CODE into comparisons we can do using branch
8170 instructions. BYPASS_CODE is comparison code for branch that will
8171 branch around FIRST_CODE and SECOND_CODE. If some of branches
8172 is not required, set value to UNKNOWN.
8173 We never require more than two branches. */
8176 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8177 enum rtx_code *first_code,
8178 enum rtx_code *second_code)
8181 *bypass_code = UNKNOWN;
8182 *second_code = UNKNOWN;
8184 /* The fcomi comparison sets flags as follows:
8194 case GT: /* GTU - CF=0 & ZF=0 */
8195 case GE: /* GEU - CF=0 */
8196 case ORDERED: /* PF=0 */
8197 case UNORDERED: /* PF=1 */
8198 case UNEQ: /* EQ - ZF=1 */
8199 case UNLT: /* LTU - CF=1 */
8200 case UNLE: /* LEU - CF=1 | ZF=1 */
8201 case LTGT: /* EQ - ZF=0 */
8203 case LT: /* LTU - CF=1 - fails on unordered */
8205 *bypass_code = UNORDERED;
8207 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8209 *bypass_code = UNORDERED;
8211 case EQ: /* EQ - ZF=1 - fails on unordered */
8213 *bypass_code = UNORDERED;
8215 case NE: /* NE - ZF=0 - fails on unordered */
8217 *second_code = UNORDERED;
8219 case UNGE: /* GEU - CF=0 - fails on unordered */
8221 *second_code = UNORDERED;
8223 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8225 *second_code = UNORDERED;
8230 if (!TARGET_IEEE_FP)
8232 *second_code = UNKNOWN;
8233 *bypass_code = UNKNOWN;
8237 /* Return cost of comparison done fcom + arithmetics operations on AX.
8238 All following functions do use number of instructions as a cost metrics.
8239 In future this should be tweaked to compute bytes for optimize_size and
8240 take into account performance of various instructions on various CPUs. */
8242 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8244 if (!TARGET_IEEE_FP)
8246 /* The cost of code output by ix86_expand_fp_compare. */
8274 /* Return cost of comparison done using fcomi operation.
8275 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8277 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8279 enum rtx_code bypass_code, first_code, second_code;
8280 /* Return arbitrarily high cost when instruction is not supported - this
8281 prevents gcc from using it. */
8284 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8285 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8288 /* Return cost of comparison done using sahf operation.
8289 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8291 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8293 enum rtx_code bypass_code, first_code, second_code;
8294 /* Return arbitrarily high cost when instruction is not preferred - this
8295 avoids gcc from using it. */
8296 if (!TARGET_USE_SAHF && !optimize_size)
8298 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8299 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8302 /* Compute cost of the comparison done using any method.
8303 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8305 ix86_fp_comparison_cost (enum rtx_code code)
8307 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8310 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8311 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8313 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8314 if (min > sahf_cost)
8316 if (min > fcomi_cost)
8321 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8324 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8325 rtx *second_test, rtx *bypass_test)
8327 enum machine_mode fpcmp_mode, intcmp_mode;
8329 int cost = ix86_fp_comparison_cost (code);
8330 enum rtx_code bypass_code, first_code, second_code;
8332 fpcmp_mode = ix86_fp_compare_mode (code);
8333 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8336 *second_test = NULL_RTX;
8338 *bypass_test = NULL_RTX;
8340 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8342 /* Do fcomi/sahf based test when profitable. */
8343 if ((bypass_code == UNKNOWN || bypass_test)
8344 && (second_code == UNKNOWN || second_test)
8345 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8349 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8350 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8356 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8357 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8359 scratch = gen_reg_rtx (HImode);
8360 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8361 emit_insn (gen_x86_sahf_1 (scratch));
8364 /* The FP codes work out to act like unsigned. */
8365 intcmp_mode = fpcmp_mode;
8367 if (bypass_code != UNKNOWN)
8368 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8369 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8371 if (second_code != UNKNOWN)
8372 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8373 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8378 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8379 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8380 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8382 scratch = gen_reg_rtx (HImode);
8383 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8385 /* In the unordered case, we have to check C2 for NaN's, which
8386 doesn't happen to work out to anything nice combination-wise.
8387 So do some bit twiddling on the value we've got in AH to come
8388 up with an appropriate set of condition codes. */
8390 intcmp_mode = CCNOmode;
8395 if (code == GT || !TARGET_IEEE_FP)
8397 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8402 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8403 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8404 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8405 intcmp_mode = CCmode;
8411 if (code == LT && TARGET_IEEE_FP)
8413 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8414 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8415 intcmp_mode = CCmode;
8420 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8426 if (code == GE || !TARGET_IEEE_FP)
8428 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8433 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8434 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8441 if (code == LE && TARGET_IEEE_FP)
8443 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8444 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8445 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8446 intcmp_mode = CCmode;
8451 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8457 if (code == EQ && TARGET_IEEE_FP)
8459 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8460 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8461 intcmp_mode = CCmode;
8466 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8473 if (code == NE && TARGET_IEEE_FP)
8475 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8476 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8482 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8488 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8492 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8501 /* Return the test that should be put into the flags user, i.e.
8502 the bcc, scc, or cmov instruction. */
8503 return gen_rtx_fmt_ee (code, VOIDmode,
8504 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8509 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8512 op0 = ix86_compare_op0;
8513 op1 = ix86_compare_op1;
8516 *second_test = NULL_RTX;
8518 *bypass_test = NULL_RTX;
8520 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8521 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8522 second_test, bypass_test);
8524 ret = ix86_expand_int_compare (code, op0, op1);
8529 /* Return true if the CODE will result in nontrivial jump sequence. */
8531 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8533 enum rtx_code bypass_code, first_code, second_code;
8536 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8537 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8541 ix86_expand_branch (enum rtx_code code, rtx label)
8545 switch (GET_MODE (ix86_compare_op0))
8551 tmp = ix86_expand_compare (code, NULL, NULL);
8552 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8553 gen_rtx_LABEL_REF (VOIDmode, label),
8555 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8564 enum rtx_code bypass_code, first_code, second_code;
8566 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8569 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8571 /* Check whether we will use the natural sequence with one jump. If
8572 so, we can expand jump early. Otherwise delay expansion by
8573 creating compound insn to not confuse optimizers. */
8574 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8577 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8578 gen_rtx_LABEL_REF (VOIDmode, label),
8579 pc_rtx, NULL_RTX, NULL_RTX);
8583 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8584 ix86_compare_op0, ix86_compare_op1);
8585 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8586 gen_rtx_LABEL_REF (VOIDmode, label),
8588 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8590 use_fcomi = ix86_use_fcomi_compare (code);
8591 vec = rtvec_alloc (3 + !use_fcomi);
8592 RTVEC_ELT (vec, 0) = tmp;
8594 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8596 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8599 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8601 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8609 /* Expand DImode branch into multiple compare+branch. */
8611 rtx lo[2], hi[2], label2;
8612 enum rtx_code code1, code2, code3;
8614 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8616 tmp = ix86_compare_op0;
8617 ix86_compare_op0 = ix86_compare_op1;
8618 ix86_compare_op1 = tmp;
8619 code = swap_condition (code);
8621 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8622 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8624 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8625 avoid two branches. This costs one extra insn, so disable when
8626 optimizing for size. */
8628 if ((code == EQ || code == NE)
8630 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8635 if (hi[1] != const0_rtx)
8636 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8637 NULL_RTX, 0, OPTAB_WIDEN);
8640 if (lo[1] != const0_rtx)
8641 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8642 NULL_RTX, 0, OPTAB_WIDEN);
8644 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8645 NULL_RTX, 0, OPTAB_WIDEN);
8647 ix86_compare_op0 = tmp;
8648 ix86_compare_op1 = const0_rtx;
8649 ix86_expand_branch (code, label);
8653 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8654 op1 is a constant and the low word is zero, then we can just
8655 examine the high word. */
8657 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8660 case LT: case LTU: case GE: case GEU:
8661 ix86_compare_op0 = hi[0];
8662 ix86_compare_op1 = hi[1];
8663 ix86_expand_branch (code, label);
8669 /* Otherwise, we need two or three jumps. */
8671 label2 = gen_label_rtx ();
8674 code2 = swap_condition (code);
8675 code3 = unsigned_condition (code);
8679 case LT: case GT: case LTU: case GTU:
8682 case LE: code1 = LT; code2 = GT; break;
8683 case GE: code1 = GT; code2 = LT; break;
8684 case LEU: code1 = LTU; code2 = GTU; break;
8685 case GEU: code1 = GTU; code2 = LTU; break;
8687 case EQ: code1 = UNKNOWN; code2 = NE; break;
8688 case NE: code2 = UNKNOWN; break;
8696 * if (hi(a) < hi(b)) goto true;
8697 * if (hi(a) > hi(b)) goto false;
8698 * if (lo(a) < lo(b)) goto true;
8702 ix86_compare_op0 = hi[0];
8703 ix86_compare_op1 = hi[1];
8705 if (code1 != UNKNOWN)
8706 ix86_expand_branch (code1, label);
8707 if (code2 != UNKNOWN)
8708 ix86_expand_branch (code2, label2);
8710 ix86_compare_op0 = lo[0];
8711 ix86_compare_op1 = lo[1];
8712 ix86_expand_branch (code3, label);
8714 if (code2 != UNKNOWN)
8715 emit_label (label2);
8724 /* Split branch based on floating point condition. */
8726 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8727 rtx target1, rtx target2, rtx tmp, rtx pushed)
8730 rtx label = NULL_RTX;
8732 int bypass_probability = -1, second_probability = -1, probability = -1;
8735 if (target2 != pc_rtx)
8738 code = reverse_condition_maybe_unordered (code);
8743 condition = ix86_expand_fp_compare (code, op1, op2,
8744 tmp, &second, &bypass);
8746 /* Remove pushed operand from stack. */
8748 ix86_free_from_memory (GET_MODE (pushed));
8750 if (split_branch_probability >= 0)
8752 /* Distribute the probabilities across the jumps.
8753 Assume the BYPASS and SECOND to be always test
8755 probability = split_branch_probability;
8757 /* Value of 1 is low enough to make no need for probability
8758 to be updated. Later we may run some experiments and see
8759 if unordered values are more frequent in practice. */
8761 bypass_probability = 1;
8763 second_probability = 1;
8765 if (bypass != NULL_RTX)
8767 label = gen_label_rtx ();
8768 i = emit_jump_insn (gen_rtx_SET
8770 gen_rtx_IF_THEN_ELSE (VOIDmode,
8772 gen_rtx_LABEL_REF (VOIDmode,
8775 if (bypass_probability >= 0)
8777 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8778 GEN_INT (bypass_probability),
8781 i = emit_jump_insn (gen_rtx_SET
8783 gen_rtx_IF_THEN_ELSE (VOIDmode,
8784 condition, target1, target2)));
8785 if (probability >= 0)
8787 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8788 GEN_INT (probability),
8790 if (second != NULL_RTX)
8792 i = emit_jump_insn (gen_rtx_SET
8794 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8796 if (second_probability >= 0)
8798 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8799 GEN_INT (second_probability),
8802 if (label != NULL_RTX)
8807 ix86_expand_setcc (enum rtx_code code, rtx dest)
8809 rtx ret, tmp, tmpreg, equiv;
8810 rtx second_test, bypass_test;
8812 if (GET_MODE (ix86_compare_op0) == DImode
8814 return 0; /* FAIL */
8816 if (GET_MODE (dest) != QImode)
8819 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8820 PUT_MODE (ret, QImode);
8825 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8826 if (bypass_test || second_test)
8828 rtx test = second_test;
8830 rtx tmp2 = gen_reg_rtx (QImode);
8837 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8839 PUT_MODE (test, QImode);
8840 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8843 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8845 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8848 /* Attach a REG_EQUAL note describing the comparison result. */
8849 equiv = simplify_gen_relational (code, QImode,
8850 GET_MODE (ix86_compare_op0),
8851 ix86_compare_op0, ix86_compare_op1);
8852 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8854 return 1; /* DONE */
8857 /* Expand comparison setting or clearing carry flag. Return true when
8858 successful and set pop for the operation. */
8860 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8862 enum machine_mode mode =
8863 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8865 /* Do not handle DImode compares that go trought special path. Also we can't
8866 deal with FP compares yet. This is possible to add. */
8867 if ((mode == DImode && !TARGET_64BIT))
8869 if (FLOAT_MODE_P (mode))
8871 rtx second_test = NULL, bypass_test = NULL;
8872 rtx compare_op, compare_seq;
8874 /* Shortcut: following common codes never translate into carry flag compares. */
8875 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8876 || code == ORDERED || code == UNORDERED)
8879 /* These comparisons require zero flag; swap operands so they won't. */
8880 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8886 code = swap_condition (code);
8889 /* Try to expand the comparison and verify that we end up with carry flag
8890 based comparison. This is fails to be true only when we decide to expand
8891 comparison using arithmetic that is not too common scenario. */
8893 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8894 &second_test, &bypass_test);
8895 compare_seq = get_insns ();
8898 if (second_test || bypass_test)
8900 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8901 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8902 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
8904 code = GET_CODE (compare_op);
8905 if (code != LTU && code != GEU)
8907 emit_insn (compare_seq);
8911 if (!INTEGRAL_MODE_P (mode))
8919 /* Convert a==0 into (unsigned)a<1. */
8922 if (op1 != const0_rtx)
8925 code = (code == EQ ? LTU : GEU);
8928 /* Convert a>b into b<a or a>=b-1. */
8931 if (GET_CODE (op1) == CONST_INT)
8933 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
8934 /* Bail out on overflow. We still can swap operands but that
8935 would force loading of the constant into register. */
8936 if (op1 == const0_rtx
8937 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
8939 code = (code == GTU ? GEU : LTU);
8946 code = (code == GTU ? LTU : GEU);
8950 /* Convert a>=0 into (unsigned)a<0x80000000. */
8953 if (mode == DImode || op1 != const0_rtx)
8955 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8956 code = (code == LT ? GEU : LTU);
8960 if (mode == DImode || op1 != constm1_rtx)
8962 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8963 code = (code == LE ? GEU : LTU);
8969 /* Swapping operands may cause constant to appear as first operand. */
8970 if (!nonimmediate_operand (op0, VOIDmode))
8974 op0 = force_reg (mode, op0);
8976 ix86_compare_op0 = op0;
8977 ix86_compare_op1 = op1;
8978 *pop = ix86_expand_compare (code, NULL, NULL);
8979 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
8985 ix86_expand_int_movcc (rtx operands[])
8987 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8988 rtx compare_seq, compare_op;
8989 rtx second_test, bypass_test;
8990 enum machine_mode mode = GET_MODE (operands[0]);
8991 bool sign_bit_compare_p = false;;
8994 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8995 compare_seq = get_insns ();
8998 compare_code = GET_CODE (compare_op);
9000 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9001 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9002 sign_bit_compare_p = true;
9004 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9005 HImode insns, we'd be swallowed in word prefix ops. */
9007 if ((mode != HImode || TARGET_FAST_PREFIX)
9008 && (mode != DImode || TARGET_64BIT)
9009 && GET_CODE (operands[2]) == CONST_INT
9010 && GET_CODE (operands[3]) == CONST_INT)
9012 rtx out = operands[0];
9013 HOST_WIDE_INT ct = INTVAL (operands[2]);
9014 HOST_WIDE_INT cf = INTVAL (operands[3]);
9018 /* Sign bit compares are better done using shifts than we do by using
9020 if (sign_bit_compare_p
9021 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9022 ix86_compare_op1, &compare_op))
9024 /* Detect overlap between destination and compare sources. */
9027 if (!sign_bit_compare_p)
9031 compare_code = GET_CODE (compare_op);
9033 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9034 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9037 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9040 /* To simplify rest of code, restrict to the GEU case. */
9041 if (compare_code == LTU)
9043 HOST_WIDE_INT tmp = ct;
9046 compare_code = reverse_condition (compare_code);
9047 code = reverse_condition (code);
9052 PUT_CODE (compare_op,
9053 reverse_condition_maybe_unordered
9054 (GET_CODE (compare_op)));
9056 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9060 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9061 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9062 tmp = gen_reg_rtx (mode);
9065 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9067 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9071 if (code == GT || code == GE)
9072 code = reverse_condition (code);
9075 HOST_WIDE_INT tmp = ct;
9080 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9081 ix86_compare_op1, VOIDmode, 0, -1);
9094 tmp = expand_simple_binop (mode, PLUS,
9096 copy_rtx (tmp), 1, OPTAB_DIRECT);
9107 tmp = expand_simple_binop (mode, IOR,
9109 copy_rtx (tmp), 1, OPTAB_DIRECT);
9111 else if (diff == -1 && ct)
9121 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9123 tmp = expand_simple_binop (mode, PLUS,
9124 copy_rtx (tmp), GEN_INT (cf),
9125 copy_rtx (tmp), 1, OPTAB_DIRECT);
9133 * andl cf - ct, dest
9143 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9146 tmp = expand_simple_binop (mode, AND,
9148 gen_int_mode (cf - ct, mode),
9149 copy_rtx (tmp), 1, OPTAB_DIRECT);
9151 tmp = expand_simple_binop (mode, PLUS,
9152 copy_rtx (tmp), GEN_INT (ct),
9153 copy_rtx (tmp), 1, OPTAB_DIRECT);
9156 if (!rtx_equal_p (tmp, out))
9157 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9159 return 1; /* DONE */
9165 tmp = ct, ct = cf, cf = tmp;
9167 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9169 /* We may be reversing unordered compare to normal compare, that
9170 is not valid in general (we may convert non-trapping condition
9171 to trapping one), however on i386 we currently emit all
9172 comparisons unordered. */
9173 compare_code = reverse_condition_maybe_unordered (compare_code);
9174 code = reverse_condition_maybe_unordered (code);
9178 compare_code = reverse_condition (compare_code);
9179 code = reverse_condition (code);
9183 compare_code = UNKNOWN;
9184 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9185 && GET_CODE (ix86_compare_op1) == CONST_INT)
9187 if (ix86_compare_op1 == const0_rtx
9188 && (code == LT || code == GE))
9189 compare_code = code;
9190 else if (ix86_compare_op1 == constm1_rtx)
9194 else if (code == GT)
9199 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9200 if (compare_code != UNKNOWN
9201 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9202 && (cf == -1 || ct == -1))
9204 /* If lea code below could be used, only optimize
9205 if it results in a 2 insn sequence. */
9207 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9208 || diff == 3 || diff == 5 || diff == 9)
9209 || (compare_code == LT && ct == -1)
9210 || (compare_code == GE && cf == -1))
9213 * notl op1 (if necessary)
9221 code = reverse_condition (code);
9224 out = emit_store_flag (out, code, ix86_compare_op0,
9225 ix86_compare_op1, VOIDmode, 0, -1);
9227 out = expand_simple_binop (mode, IOR,
9229 out, 1, OPTAB_DIRECT);
9230 if (out != operands[0])
9231 emit_move_insn (operands[0], out);
9233 return 1; /* DONE */
9238 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9239 || diff == 3 || diff == 5 || diff == 9)
9240 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9242 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9248 * lea cf(dest*(ct-cf)),dest
9252 * This also catches the degenerate setcc-only case.
9258 out = emit_store_flag (out, code, ix86_compare_op0,
9259 ix86_compare_op1, VOIDmode, 0, 1);
9262 /* On x86_64 the lea instruction operates on Pmode, so we need
9263 to get arithmetics done in proper mode to match. */
9265 tmp = copy_rtx (out);
9269 out1 = copy_rtx (out);
9270 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9274 tmp = gen_rtx_PLUS (mode, tmp, out1);
9280 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9283 if (!rtx_equal_p (tmp, out))
9286 out = force_operand (tmp, copy_rtx (out));
9288 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9290 if (!rtx_equal_p (out, operands[0]))
9291 emit_move_insn (operands[0], copy_rtx (out));
9293 return 1; /* DONE */
9297 * General case: Jumpful:
9298 * xorl dest,dest cmpl op1, op2
9299 * cmpl op1, op2 movl ct, dest
9301 * decl dest movl cf, dest
9302 * andl (cf-ct),dest 1:
9307 * This is reasonably steep, but branch mispredict costs are
9308 * high on modern cpus, so consider failing only if optimizing
9312 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9313 && BRANCH_COST >= 2)
9319 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9320 /* We may be reversing unordered compare to normal compare,
9321 that is not valid in general (we may convert non-trapping
9322 condition to trapping one), however on i386 we currently
9323 emit all comparisons unordered. */
9324 code = reverse_condition_maybe_unordered (code);
9327 code = reverse_condition (code);
9328 if (compare_code != UNKNOWN)
9329 compare_code = reverse_condition (compare_code);
9333 if (compare_code != UNKNOWN)
9335 /* notl op1 (if needed)
9340 For x < 0 (resp. x <= -1) there will be no notl,
9341 so if possible swap the constants to get rid of the
9343 True/false will be -1/0 while code below (store flag
9344 followed by decrement) is 0/-1, so the constants need
9345 to be exchanged once more. */
9347 if (compare_code == GE || !cf)
9349 code = reverse_condition (code);
9354 HOST_WIDE_INT tmp = cf;
9359 out = emit_store_flag (out, code, ix86_compare_op0,
9360 ix86_compare_op1, VOIDmode, 0, -1);
9364 out = emit_store_flag (out, code, ix86_compare_op0,
9365 ix86_compare_op1, VOIDmode, 0, 1);
9367 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9368 copy_rtx (out), 1, OPTAB_DIRECT);
9371 out = expand_simple_binop (mode, AND, copy_rtx (out),
9372 gen_int_mode (cf - ct, mode),
9373 copy_rtx (out), 1, OPTAB_DIRECT);
9375 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9376 copy_rtx (out), 1, OPTAB_DIRECT);
9377 if (!rtx_equal_p (out, operands[0]))
9378 emit_move_insn (operands[0], copy_rtx (out));
9380 return 1; /* DONE */
9384 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9386 /* Try a few things more with specific constants and a variable. */
9389 rtx var, orig_out, out, tmp;
9391 if (BRANCH_COST <= 2)
9392 return 0; /* FAIL */
9394 /* If one of the two operands is an interesting constant, load a
9395 constant with the above and mask it in with a logical operation. */
9397 if (GET_CODE (operands[2]) == CONST_INT)
9400 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9401 operands[3] = constm1_rtx, op = and_optab;
9402 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9403 operands[3] = const0_rtx, op = ior_optab;
9405 return 0; /* FAIL */
9407 else if (GET_CODE (operands[3]) == CONST_INT)
9410 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9411 operands[2] = constm1_rtx, op = and_optab;
9412 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9413 operands[2] = const0_rtx, op = ior_optab;
9415 return 0; /* FAIL */
9418 return 0; /* FAIL */
9420 orig_out = operands[0];
9421 tmp = gen_reg_rtx (mode);
9424 /* Recurse to get the constant loaded. */
9425 if (ix86_expand_int_movcc (operands) == 0)
9426 return 0; /* FAIL */
9428 /* Mask in the interesting variable. */
9429 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9431 if (!rtx_equal_p (out, orig_out))
9432 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9434 return 1; /* DONE */
9438 * For comparison with above,
9448 if (! nonimmediate_operand (operands[2], mode))
9449 operands[2] = force_reg (mode, operands[2]);
9450 if (! nonimmediate_operand (operands[3], mode))
9451 operands[3] = force_reg (mode, operands[3]);
9453 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9455 rtx tmp = gen_reg_rtx (mode);
9456 emit_move_insn (tmp, operands[3]);
9459 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9461 rtx tmp = gen_reg_rtx (mode);
9462 emit_move_insn (tmp, operands[2]);
9466 if (! register_operand (operands[2], VOIDmode)
9468 || ! register_operand (operands[3], VOIDmode)))
9469 operands[2] = force_reg (mode, operands[2]);
9472 && ! register_operand (operands[3], VOIDmode))
9473 operands[3] = force_reg (mode, operands[3]);
9475 emit_insn (compare_seq);
9476 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9477 gen_rtx_IF_THEN_ELSE (mode,
9478 compare_op, operands[2],
9481 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9482 gen_rtx_IF_THEN_ELSE (mode,
9484 copy_rtx (operands[3]),
9485 copy_rtx (operands[0]))));
9487 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9488 gen_rtx_IF_THEN_ELSE (mode,
9490 copy_rtx (operands[2]),
9491 copy_rtx (operands[0]))));
9493 return 1; /* DONE */
9497 ix86_expand_fp_movcc (rtx operands[])
9501 rtx compare_op, second_test, bypass_test;
9503 /* For SF/DFmode conditional moves based on comparisons
9504 in same mode, we may want to use SSE min/max instructions. */
9505 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9506 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9507 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9508 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9510 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9511 /* We may be called from the post-reload splitter. */
9512 && (!REG_P (operands[0])
9513 || SSE_REG_P (operands[0])
9514 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9516 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9517 code = GET_CODE (operands[1]);
9519 /* See if we have (cross) match between comparison operands and
9520 conditional move operands. */
9521 if (rtx_equal_p (operands[2], op1))
9526 code = reverse_condition_maybe_unordered (code);
9528 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9530 /* Check for min operation. */
9531 if (code == LT || code == UNLE)
9539 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9540 if (memory_operand (op0, VOIDmode))
9541 op0 = force_reg (GET_MODE (operands[0]), op0);
9542 if (GET_MODE (operands[0]) == SFmode)
9543 emit_insn (gen_minsf3 (operands[0], op0, op1));
9545 emit_insn (gen_mindf3 (operands[0], op0, op1));
9548 /* Check for max operation. */
9549 if (code == GT || code == UNGE)
9557 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9558 if (memory_operand (op0, VOIDmode))
9559 op0 = force_reg (GET_MODE (operands[0]), op0);
9560 if (GET_MODE (operands[0]) == SFmode)
9561 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9563 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9567 /* Manage condition to be sse_comparison_operator. In case we are
9568 in non-ieee mode, try to canonicalize the destination operand
9569 to be first in the comparison - this helps reload to avoid extra
9571 if (!sse_comparison_operator (operands[1], VOIDmode)
9572 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9574 rtx tmp = ix86_compare_op0;
9575 ix86_compare_op0 = ix86_compare_op1;
9576 ix86_compare_op1 = tmp;
9577 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9578 VOIDmode, ix86_compare_op0,
9581 /* Similarly try to manage result to be first operand of conditional
9582 move. We also don't support the NE comparison on SSE, so try to
9584 if ((rtx_equal_p (operands[0], operands[3])
9585 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9586 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9588 rtx tmp = operands[2];
9589 operands[2] = operands[3];
9591 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9592 (GET_CODE (operands[1])),
9593 VOIDmode, ix86_compare_op0,
9596 if (GET_MODE (operands[0]) == SFmode)
9597 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9598 operands[2], operands[3],
9599 ix86_compare_op0, ix86_compare_op1));
9601 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9602 operands[2], operands[3],
9603 ix86_compare_op0, ix86_compare_op1));
9607 /* The floating point conditional move instructions don't directly
9608 support conditions resulting from a signed integer comparison. */
9610 code = GET_CODE (operands[1]);
9611 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9613 /* The floating point conditional move instructions don't directly
9614 support signed integer comparisons. */
9616 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9618 if (second_test != NULL || bypass_test != NULL)
9620 tmp = gen_reg_rtx (QImode);
9621 ix86_expand_setcc (code, tmp);
9623 ix86_compare_op0 = tmp;
9624 ix86_compare_op1 = const0_rtx;
9625 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9627 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9629 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9630 emit_move_insn (tmp, operands[3]);
9633 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9635 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9636 emit_move_insn (tmp, operands[2]);
9640 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9641 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9646 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9647 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9652 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9653 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9661 /* Expand conditional increment or decrement using adb/sbb instructions.
9662 The default case using setcc followed by the conditional move can be
9663 done by generic code. */
9665 ix86_expand_int_addcc (rtx operands[])
9667 enum rtx_code code = GET_CODE (operands[1]);
9669 rtx val = const0_rtx;
9671 enum machine_mode mode = GET_MODE (operands[0]);
9673 if (operands[3] != const1_rtx
9674 && operands[3] != constm1_rtx)
9676 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9677 ix86_compare_op1, &compare_op))
9679 code = GET_CODE (compare_op);
9681 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9682 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9685 code = ix86_fp_compare_code_to_integer (code);
9692 PUT_CODE (compare_op,
9693 reverse_condition_maybe_unordered
9694 (GET_CODE (compare_op)));
9696 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9698 PUT_MODE (compare_op, mode);
9700 /* Construct either adc or sbb insn. */
9701 if ((code == LTU) == (operands[3] == constm1_rtx))
9703 switch (GET_MODE (operands[0]))
9706 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9709 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9712 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9715 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9723 switch (GET_MODE (operands[0]))
9726 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9729 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9732 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9735 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9741 return 1; /* DONE */
9745 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9746 works for floating pointer parameters and nonoffsetable memories.
9747 For pushes, it returns just stack offsets; the values will be saved
9748 in the right order. Maximally three parts are generated. */
9751 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9756 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9758 size = (GET_MODE_SIZE (mode) + 4) / 8;
9760 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9762 if (size < 2 || size > 3)
9765 /* Optimize constant pool reference to immediates. This is used by fp
9766 moves, that force all constants to memory to allow combining. */
9767 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9769 rtx tmp = maybe_get_pool_constant (operand);
9774 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9776 /* The only non-offsetable memories we handle are pushes. */
9777 if (! push_operand (operand, VOIDmode))
9780 operand = copy_rtx (operand);
9781 PUT_MODE (operand, Pmode);
9782 parts[0] = parts[1] = parts[2] = operand;
9784 else if (!TARGET_64BIT)
9787 split_di (&operand, 1, &parts[0], &parts[1]);
9790 if (REG_P (operand))
9792 if (!reload_completed)
9794 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9795 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9797 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9799 else if (offsettable_memref_p (operand))
9801 operand = adjust_address (operand, SImode, 0);
9803 parts[1] = adjust_address (operand, SImode, 4);
9805 parts[2] = adjust_address (operand, SImode, 8);
9807 else if (GET_CODE (operand) == CONST_DOUBLE)
9812 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9816 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9817 parts[2] = gen_int_mode (l[2], SImode);
9820 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9825 parts[1] = gen_int_mode (l[1], SImode);
9826 parts[0] = gen_int_mode (l[0], SImode);
9835 split_ti (&operand, 1, &parts[0], &parts[1]);
9836 if (mode == XFmode || mode == TFmode)
9838 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9839 if (REG_P (operand))
9841 if (!reload_completed)
9843 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9844 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9846 else if (offsettable_memref_p (operand))
9848 operand = adjust_address (operand, DImode, 0);
9850 parts[1] = adjust_address (operand, upper_mode, 8);
9852 else if (GET_CODE (operand) == CONST_DOUBLE)
9857 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9858 real_to_target (l, &r, mode);
9860 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9861 if (HOST_BITS_PER_WIDE_INT >= 64)
9864 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9865 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9868 parts[0] = immed_double_const (l[0], l[1], DImode);
9870 if (upper_mode == SImode)
9871 parts[1] = gen_int_mode (l[2], SImode);
9872 else if (HOST_BITS_PER_WIDE_INT >= 64)
9875 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9876 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9879 parts[1] = immed_double_const (l[2], l[3], DImode);
9889 /* Emit insns to perform a move or push of DI, DF, and XF values.
9890 Return false when normal moves are needed; true when all required
9891 insns have been emitted. Operands 2-4 contain the input values
9892 int the correct order; operands 5-7 contain the output values. */
9895 ix86_split_long_move (rtx operands[])
9901 enum machine_mode mode = GET_MODE (operands[0]);
9903 /* The DFmode expanders may ask us to move double.
9904 For 64bit target this is single move. By hiding the fact
9905 here we simplify i386.md splitters. */
9906 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9908 /* Optimize constant pool reference to immediates. This is used by
9909 fp moves, that force all constants to memory to allow combining. */
9911 if (GET_CODE (operands[1]) == MEM
9912 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9913 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9914 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9915 if (push_operand (operands[0], VOIDmode))
9917 operands[0] = copy_rtx (operands[0]);
9918 PUT_MODE (operands[0], Pmode);
9921 operands[0] = gen_lowpart (DImode, operands[0]);
9922 operands[1] = gen_lowpart (DImode, operands[1]);
9923 emit_move_insn (operands[0], operands[1]);
9927 /* The only non-offsettable memory we handle is push. */
9928 if (push_operand (operands[0], VOIDmode))
9930 else if (GET_CODE (operands[0]) == MEM
9931 && ! offsettable_memref_p (operands[0]))
9934 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9935 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9937 /* When emitting push, take care for source operands on the stack. */
9938 if (push && GET_CODE (operands[1]) == MEM
9939 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9942 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9943 XEXP (part[1][2], 0));
9944 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9945 XEXP (part[1][1], 0));
9948 /* We need to do copy in the right order in case an address register
9949 of the source overlaps the destination. */
9950 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9952 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9954 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9957 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9960 /* Collision in the middle part can be handled by reordering. */
9961 if (collisions == 1 && nparts == 3
9962 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9965 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9966 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9969 /* If there are more collisions, we can't handle it by reordering.
9970 Do an lea to the last part and use only one colliding move. */
9971 else if (collisions > 1)
9977 base = part[0][nparts - 1];
9979 /* Handle the case when the last part isn't valid for lea.
9980 Happens in 64-bit mode storing the 12-byte XFmode. */
9981 if (GET_MODE (base) != Pmode)
9982 base = gen_rtx_REG (Pmode, REGNO (base));
9984 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
9985 part[1][0] = replace_equiv_address (part[1][0], base);
9986 part[1][1] = replace_equiv_address (part[1][1],
9987 plus_constant (base, UNITS_PER_WORD));
9989 part[1][2] = replace_equiv_address (part[1][2],
9990 plus_constant (base, 8));
10000 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10001 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10002 emit_move_insn (part[0][2], part[1][2]);
10007 /* In 64bit mode we don't have 32bit push available. In case this is
10008 register, it is OK - we will just use larger counterpart. We also
10009 retype memory - these comes from attempt to avoid REX prefix on
10010 moving of second half of TFmode value. */
10011 if (GET_MODE (part[1][1]) == SImode)
10013 if (GET_CODE (part[1][1]) == MEM)
10014 part[1][1] = adjust_address (part[1][1], DImode, 0);
10015 else if (REG_P (part[1][1]))
10016 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10019 if (GET_MODE (part[1][0]) == SImode)
10020 part[1][0] = part[1][1];
10023 emit_move_insn (part[0][1], part[1][1]);
10024 emit_move_insn (part[0][0], part[1][0]);
10028 /* Choose correct order to not overwrite the source before it is copied. */
10029 if ((REG_P (part[0][0])
10030 && REG_P (part[1][1])
10031 && (REGNO (part[0][0]) == REGNO (part[1][1])
10033 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10035 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10039 operands[2] = part[0][2];
10040 operands[3] = part[0][1];
10041 operands[4] = part[0][0];
10042 operands[5] = part[1][2];
10043 operands[6] = part[1][1];
10044 operands[7] = part[1][0];
10048 operands[2] = part[0][1];
10049 operands[3] = part[0][0];
10050 operands[5] = part[1][1];
10051 operands[6] = part[1][0];
10058 operands[2] = part[0][0];
10059 operands[3] = part[0][1];
10060 operands[4] = part[0][2];
10061 operands[5] = part[1][0];
10062 operands[6] = part[1][1];
10063 operands[7] = part[1][2];
10067 operands[2] = part[0][0];
10068 operands[3] = part[0][1];
10069 operands[5] = part[1][0];
10070 operands[6] = part[1][1];
10074 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10077 if (GET_CODE (operands[5]) == CONST_INT
10078 && operands[5] != const0_rtx
10079 && REG_P (operands[2]))
10081 if (GET_CODE (operands[6]) == CONST_INT
10082 && INTVAL (operands[6]) == INTVAL (operands[5]))
10083 operands[6] = operands[2];
10086 && GET_CODE (operands[7]) == CONST_INT
10087 && INTVAL (operands[7]) == INTVAL (operands[5]))
10088 operands[7] = operands[2];
10092 && GET_CODE (operands[6]) == CONST_INT
10093 && operands[6] != const0_rtx
10094 && REG_P (operands[3])
10095 && GET_CODE (operands[7]) == CONST_INT
10096 && INTVAL (operands[7]) == INTVAL (operands[6]))
10097 operands[7] = operands[3];
10100 emit_move_insn (operands[2], operands[5]);
10101 emit_move_insn (operands[3], operands[6]);
10103 emit_move_insn (operands[4], operands[7]);
10108 /* Helper function of ix86_split_ashldi used to generate an SImode
10109 left shift by a constant, either using a single shift or
10110 a sequence of add instructions. */
10113 ix86_expand_ashlsi3_const (rtx operand, int count)
10116 emit_insn (gen_addsi3 (operand, operand, operand));
10117 else if (!optimize_size
10118 && count * ix86_cost->add <= ix86_cost->shift_const)
10121 for (i=0; i<count; i++)
10122 emit_insn (gen_addsi3 (operand, operand, operand));
10125 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10129 ix86_split_ashldi (rtx *operands, rtx scratch)
10131 rtx low[2], high[2];
10134 if (GET_CODE (operands[2]) == CONST_INT)
10136 split_di (operands, 2, low, high);
10137 count = INTVAL (operands[2]) & 63;
10141 emit_move_insn (high[0], low[1]);
10142 emit_move_insn (low[0], const0_rtx);
10145 ix86_expand_ashlsi3_const (high[0], count - 32);
10149 if (!rtx_equal_p (operands[0], operands[1]))
10150 emit_move_insn (operands[0], operands[1]);
10151 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10152 ix86_expand_ashlsi3_const (low[0], count);
10157 split_di (operands, 1, low, high);
10159 if (operands[1] == const1_rtx)
10161 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10162 can be done with two 32-bit shifts, no branches, no cmoves. */
10163 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10165 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10167 ix86_expand_clear (low[0]);
10168 ix86_expand_clear (high[0]);
10169 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10171 d = gen_lowpart (QImode, low[0]);
10172 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10173 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10174 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10176 d = gen_lowpart (QImode, high[0]);
10177 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10178 s = gen_rtx_NE (QImode, flags, const0_rtx);
10179 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10182 /* Otherwise, we can get the same results by manually performing
10183 a bit extract operation on bit 5, and then performing the two
10184 shifts. The two methods of getting 0/1 into low/high are exactly
10185 the same size. Avoiding the shift in the bit extract case helps
10186 pentium4 a bit; no one else seems to care much either way. */
10191 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10192 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10194 x = gen_lowpart (SImode, operands[2]);
10195 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10197 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10198 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10199 emit_move_insn (low[0], high[0]);
10200 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10203 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10204 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10208 if (operands[1] == constm1_rtx)
10210 /* For -1LL << N, we can avoid the shld instruction, because we
10211 know that we're shifting 0...31 ones into a -1. */
10212 emit_move_insn (low[0], constm1_rtx);
10214 emit_move_insn (high[0], low[0]);
10216 emit_move_insn (high[0], constm1_rtx);
10220 if (!rtx_equal_p (operands[0], operands[1]))
10221 emit_move_insn (operands[0], operands[1]);
10223 split_di (operands, 1, low, high);
10224 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10227 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10229 if (TARGET_CMOVE && scratch)
10231 ix86_expand_clear (scratch);
10232 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10235 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10239 ix86_split_ashrdi (rtx *operands, rtx scratch)
10241 rtx low[2], high[2];
10244 if (GET_CODE (operands[2]) == CONST_INT)
10246 split_di (operands, 2, low, high);
10247 count = INTVAL (operands[2]) & 63;
10251 emit_move_insn (high[0], high[1]);
10252 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10253 emit_move_insn (low[0], high[0]);
10256 else if (count >= 32)
10258 emit_move_insn (low[0], high[1]);
10259 emit_move_insn (high[0], low[0]);
10260 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10262 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10266 if (!rtx_equal_p (operands[0], operands[1]))
10267 emit_move_insn (operands[0], operands[1]);
10268 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10269 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10274 if (!rtx_equal_p (operands[0], operands[1]))
10275 emit_move_insn (operands[0], operands[1]);
10277 split_di (operands, 1, low, high);
10279 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10280 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10282 if (TARGET_CMOVE && scratch)
10284 emit_move_insn (scratch, high[0]);
10285 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10286 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10290 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10295 ix86_split_lshrdi (rtx *operands, rtx scratch)
10297 rtx low[2], high[2];
10300 if (GET_CODE (operands[2]) == CONST_INT)
10302 split_di (operands, 2, low, high);
10303 count = INTVAL (operands[2]) & 63;
10307 emit_move_insn (low[0], high[1]);
10308 ix86_expand_clear (high[0]);
10311 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10315 if (!rtx_equal_p (operands[0], operands[1]))
10316 emit_move_insn (operands[0], operands[1]);
10317 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10318 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10323 if (!rtx_equal_p (operands[0], operands[1]))
10324 emit_move_insn (operands[0], operands[1]);
10326 split_di (operands, 1, low, high);
10328 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10329 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10331 /* Heh. By reversing the arguments, we can reuse this pattern. */
10332 if (TARGET_CMOVE && scratch)
10334 ix86_expand_clear (scratch);
10335 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10339 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10343 /* Helper function for the string operations below. Dest VARIABLE whether
10344 it is aligned to VALUE bytes. If true, jump to the label. */
10346 ix86_expand_aligntest (rtx variable, int value)
10348 rtx label = gen_label_rtx ();
10349 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10350 if (GET_MODE (variable) == DImode)
10351 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10353 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10354 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10359 /* Adjust COUNTER by the VALUE. */
10361 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10363 if (GET_MODE (countreg) == DImode)
10364 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10366 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10369 /* Zero extend possibly SImode EXP to Pmode register. */
10371 ix86_zero_extend_to_Pmode (rtx exp)
10374 if (GET_MODE (exp) == VOIDmode)
10375 return force_reg (Pmode, exp);
10376 if (GET_MODE (exp) == Pmode)
10377 return copy_to_mode_reg (Pmode, exp);
10378 r = gen_reg_rtx (Pmode);
10379 emit_insn (gen_zero_extendsidi2 (r, exp));
10383 /* Expand string move (memcpy) operation. Use i386 string operations when
10384 profitable. expand_clrmem contains similar code. */
10386 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10388 rtx srcreg, destreg, countreg, srcexp, destexp;
10389 enum machine_mode counter_mode;
10390 HOST_WIDE_INT align = 0;
10391 unsigned HOST_WIDE_INT count = 0;
10393 if (GET_CODE (align_exp) == CONST_INT)
10394 align = INTVAL (align_exp);
10396 /* Can't use any of this if the user has appropriated esi or edi. */
10397 if (global_regs[4] || global_regs[5])
10400 /* This simple hack avoids all inlining code and simplifies code below. */
10401 if (!TARGET_ALIGN_STRINGOPS)
10404 if (GET_CODE (count_exp) == CONST_INT)
10406 count = INTVAL (count_exp);
10407 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10411 /* Figure out proper mode for counter. For 32bits it is always SImode,
10412 for 64bits use SImode when possible, otherwise DImode.
10413 Set count to number of bytes copied when known at compile time. */
10415 || GET_MODE (count_exp) == SImode
10416 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10417 counter_mode = SImode;
10419 counter_mode = DImode;
10421 if (counter_mode != SImode && counter_mode != DImode)
10424 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10425 if (destreg != XEXP (dst, 0))
10426 dst = replace_equiv_address_nv (dst, destreg);
10427 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10428 if (srcreg != XEXP (src, 0))
10429 src = replace_equiv_address_nv (src, srcreg);
10431 /* When optimizing for size emit simple rep ; movsb instruction for
10432 counts not divisible by 4. */
10434 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10436 emit_insn (gen_cld ());
10437 countreg = ix86_zero_extend_to_Pmode (count_exp);
10438 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10439 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10440 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10444 /* For constant aligned (or small unaligned) copies use rep movsl
10445 followed by code copying the rest. For PentiumPro ensure 8 byte
10446 alignment to allow rep movsl acceleration. */
10448 else if (count != 0
10450 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10451 || optimize_size || count < (unsigned int) 64))
10453 unsigned HOST_WIDE_INT offset = 0;
10454 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10455 rtx srcmem, dstmem;
10457 emit_insn (gen_cld ());
10458 if (count & ~(size - 1))
10460 countreg = copy_to_mode_reg (counter_mode,
10461 GEN_INT ((count >> (size == 4 ? 2 : 3))
10462 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10463 countreg = ix86_zero_extend_to_Pmode (countreg);
10465 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10466 GEN_INT (size == 4 ? 2 : 3));
10467 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10468 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10470 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10471 countreg, destexp, srcexp));
10472 offset = count & ~(size - 1);
10474 if (size == 8 && (count & 0x04))
10476 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10478 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10480 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10485 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10487 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10489 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10494 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10496 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10498 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10501 /* The generic code based on the glibc implementation:
10502 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10503 allowing accelerated copying there)
10504 - copy the data using rep movsl
10505 - copy the rest. */
10510 rtx srcmem, dstmem;
10511 int desired_alignment = (TARGET_PENTIUMPRO
10512 && (count == 0 || count >= (unsigned int) 260)
10513 ? 8 : UNITS_PER_WORD);
10514 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10515 dst = change_address (dst, BLKmode, destreg);
10516 src = change_address (src, BLKmode, srcreg);
10518 /* In case we don't know anything about the alignment, default to
10519 library version, since it is usually equally fast and result in
10522 Also emit call when we know that the count is large and call overhead
10523 will not be important. */
10524 if (!TARGET_INLINE_ALL_STRINGOPS
10525 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10528 if (TARGET_SINGLE_STRINGOP)
10529 emit_insn (gen_cld ());
10531 countreg2 = gen_reg_rtx (Pmode);
10532 countreg = copy_to_mode_reg (counter_mode, count_exp);
10534 /* We don't use loops to align destination and to copy parts smaller
10535 than 4 bytes, because gcc is able to optimize such code better (in
10536 the case the destination or the count really is aligned, gcc is often
10537 able to predict the branches) and also it is friendlier to the
10538 hardware branch prediction.
10540 Using loops is beneficial for generic case, because we can
10541 handle small counts using the loops. Many CPUs (such as Athlon)
10542 have large REP prefix setup costs.
10544 This is quite costly. Maybe we can revisit this decision later or
10545 add some customizability to this code. */
10547 if (count == 0 && align < desired_alignment)
10549 label = gen_label_rtx ();
10550 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10551 LEU, 0, counter_mode, 1, label);
10555 rtx label = ix86_expand_aligntest (destreg, 1);
10556 srcmem = change_address (src, QImode, srcreg);
10557 dstmem = change_address (dst, QImode, destreg);
10558 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10559 ix86_adjust_counter (countreg, 1);
10560 emit_label (label);
10561 LABEL_NUSES (label) = 1;
10565 rtx label = ix86_expand_aligntest (destreg, 2);
10566 srcmem = change_address (src, HImode, srcreg);
10567 dstmem = change_address (dst, HImode, destreg);
10568 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10569 ix86_adjust_counter (countreg, 2);
10570 emit_label (label);
10571 LABEL_NUSES (label) = 1;
10573 if (align <= 4 && desired_alignment > 4)
10575 rtx label = ix86_expand_aligntest (destreg, 4);
10576 srcmem = change_address (src, SImode, srcreg);
10577 dstmem = change_address (dst, SImode, destreg);
10578 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10579 ix86_adjust_counter (countreg, 4);
10580 emit_label (label);
10581 LABEL_NUSES (label) = 1;
10584 if (label && desired_alignment > 4 && !TARGET_64BIT)
10586 emit_label (label);
10587 LABEL_NUSES (label) = 1;
10590 if (!TARGET_SINGLE_STRINGOP)
10591 emit_insn (gen_cld ());
10594 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10596 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10600 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10601 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10603 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10604 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10605 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10606 countreg2, destexp, srcexp));
10610 emit_label (label);
10611 LABEL_NUSES (label) = 1;
10613 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10615 srcmem = change_address (src, SImode, srcreg);
10616 dstmem = change_address (dst, SImode, destreg);
10617 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10619 if ((align <= 4 || count == 0) && TARGET_64BIT)
10621 rtx label = ix86_expand_aligntest (countreg, 4);
10622 srcmem = change_address (src, SImode, srcreg);
10623 dstmem = change_address (dst, SImode, destreg);
10624 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10625 emit_label (label);
10626 LABEL_NUSES (label) = 1;
10628 if (align > 2 && count != 0 && (count & 2))
10630 srcmem = change_address (src, HImode, srcreg);
10631 dstmem = change_address (dst, HImode, destreg);
10632 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10634 if (align <= 2 || count == 0)
10636 rtx label = ix86_expand_aligntest (countreg, 2);
10637 srcmem = change_address (src, HImode, srcreg);
10638 dstmem = change_address (dst, HImode, destreg);
10639 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10640 emit_label (label);
10641 LABEL_NUSES (label) = 1;
10643 if (align > 1 && count != 0 && (count & 1))
10645 srcmem = change_address (src, QImode, srcreg);
10646 dstmem = change_address (dst, QImode, destreg);
10647 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10649 if (align <= 1 || count == 0)
10651 rtx label = ix86_expand_aligntest (countreg, 1);
10652 srcmem = change_address (src, QImode, srcreg);
10653 dstmem = change_address (dst, QImode, destreg);
10654 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10655 emit_label (label);
10656 LABEL_NUSES (label) = 1;
10663 /* Expand string clear operation (bzero). Use i386 string operations when
10664 profitable. expand_movmem contains similar code. */
10666 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10668 rtx destreg, zeroreg, countreg, destexp;
10669 enum machine_mode counter_mode;
10670 HOST_WIDE_INT align = 0;
10671 unsigned HOST_WIDE_INT count = 0;
10673 if (GET_CODE (align_exp) == CONST_INT)
10674 align = INTVAL (align_exp);
10676 /* Can't use any of this if the user has appropriated esi. */
10677 if (global_regs[4])
10680 /* This simple hack avoids all inlining code and simplifies code below. */
10681 if (!TARGET_ALIGN_STRINGOPS)
10684 if (GET_CODE (count_exp) == CONST_INT)
10686 count = INTVAL (count_exp);
10687 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10690 /* Figure out proper mode for counter. For 32bits it is always SImode,
10691 for 64bits use SImode when possible, otherwise DImode.
10692 Set count to number of bytes copied when known at compile time. */
10694 || GET_MODE (count_exp) == SImode
10695 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10696 counter_mode = SImode;
10698 counter_mode = DImode;
10700 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10701 if (destreg != XEXP (dst, 0))
10702 dst = replace_equiv_address_nv (dst, destreg);
10705 /* When optimizing for size emit simple rep ; movsb instruction for
10706 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10707 sequence is 7 bytes long, so if optimizing for size and count is
10708 small enough that some stosl, stosw and stosb instructions without
10709 rep are shorter, fall back into the next if. */
10711 if ((!optimize || optimize_size)
10714 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10716 emit_insn (gen_cld ());
10718 countreg = ix86_zero_extend_to_Pmode (count_exp);
10719 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10720 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10721 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10723 else if (count != 0
10725 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10726 || optimize_size || count < (unsigned int) 64))
10728 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10729 unsigned HOST_WIDE_INT offset = 0;
10731 emit_insn (gen_cld ());
10733 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10734 if (count & ~(size - 1))
10736 unsigned HOST_WIDE_INT repcount;
10737 unsigned int max_nonrep;
10739 repcount = count >> (size == 4 ? 2 : 3);
10741 repcount &= 0x3fffffff;
10743 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10744 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10745 bytes. In both cases the latter seems to be faster for small
10747 max_nonrep = size == 4 ? 7 : 4;
10748 if (!optimize_size)
10751 case PROCESSOR_PENTIUM4:
10752 case PROCESSOR_NOCONA:
10759 if (repcount <= max_nonrep)
10760 while (repcount-- > 0)
10762 rtx mem = adjust_automodify_address_nv (dst,
10763 GET_MODE (zeroreg),
10765 emit_insn (gen_strset (destreg, mem, zeroreg));
10770 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10771 countreg = ix86_zero_extend_to_Pmode (countreg);
10772 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10773 GEN_INT (size == 4 ? 2 : 3));
10774 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10775 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10777 offset = count & ~(size - 1);
10780 if (size == 8 && (count & 0x04))
10782 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10784 emit_insn (gen_strset (destreg, mem,
10785 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10790 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10792 emit_insn (gen_strset (destreg, mem,
10793 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10798 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10800 emit_insn (gen_strset (destreg, mem,
10801 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10808 /* Compute desired alignment of the string operation. */
10809 int desired_alignment = (TARGET_PENTIUMPRO
10810 && (count == 0 || count >= (unsigned int) 260)
10811 ? 8 : UNITS_PER_WORD);
10813 /* In case we don't know anything about the alignment, default to
10814 library version, since it is usually equally fast and result in
10817 Also emit call when we know that the count is large and call overhead
10818 will not be important. */
10819 if (!TARGET_INLINE_ALL_STRINGOPS
10820 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10823 if (TARGET_SINGLE_STRINGOP)
10824 emit_insn (gen_cld ());
10826 countreg2 = gen_reg_rtx (Pmode);
10827 countreg = copy_to_mode_reg (counter_mode, count_exp);
10828 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10829 /* Get rid of MEM_OFFSET, it won't be accurate. */
10830 dst = change_address (dst, BLKmode, destreg);
10832 if (count == 0 && align < desired_alignment)
10834 label = gen_label_rtx ();
10835 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10836 LEU, 0, counter_mode, 1, label);
10840 rtx label = ix86_expand_aligntest (destreg, 1);
10841 emit_insn (gen_strset (destreg, dst,
10842 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10843 ix86_adjust_counter (countreg, 1);
10844 emit_label (label);
10845 LABEL_NUSES (label) = 1;
10849 rtx label = ix86_expand_aligntest (destreg, 2);
10850 emit_insn (gen_strset (destreg, dst,
10851 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10852 ix86_adjust_counter (countreg, 2);
10853 emit_label (label);
10854 LABEL_NUSES (label) = 1;
10856 if (align <= 4 && desired_alignment > 4)
10858 rtx label = ix86_expand_aligntest (destreg, 4);
10859 emit_insn (gen_strset (destreg, dst,
10861 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10863 ix86_adjust_counter (countreg, 4);
10864 emit_label (label);
10865 LABEL_NUSES (label) = 1;
10868 if (label && desired_alignment > 4 && !TARGET_64BIT)
10870 emit_label (label);
10871 LABEL_NUSES (label) = 1;
10875 if (!TARGET_SINGLE_STRINGOP)
10876 emit_insn (gen_cld ());
10879 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10881 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10885 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10886 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10888 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10889 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
10893 emit_label (label);
10894 LABEL_NUSES (label) = 1;
10897 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10898 emit_insn (gen_strset (destreg, dst,
10899 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10900 if (TARGET_64BIT && (align <= 4 || count == 0))
10902 rtx label = ix86_expand_aligntest (countreg, 4);
10903 emit_insn (gen_strset (destreg, dst,
10904 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10905 emit_label (label);
10906 LABEL_NUSES (label) = 1;
10908 if (align > 2 && count != 0 && (count & 2))
10909 emit_insn (gen_strset (destreg, dst,
10910 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10911 if (align <= 2 || count == 0)
10913 rtx label = ix86_expand_aligntest (countreg, 2);
10914 emit_insn (gen_strset (destreg, dst,
10915 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10916 emit_label (label);
10917 LABEL_NUSES (label) = 1;
10919 if (align > 1 && count != 0 && (count & 1))
10920 emit_insn (gen_strset (destreg, dst,
10921 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10922 if (align <= 1 || count == 0)
10924 rtx label = ix86_expand_aligntest (countreg, 1);
10925 emit_insn (gen_strset (destreg, dst,
10926 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10927 emit_label (label);
10928 LABEL_NUSES (label) = 1;
10934 /* Expand strlen. */
10936 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
10938 rtx addr, scratch1, scratch2, scratch3, scratch4;
10940 /* The generic case of strlen expander is long. Avoid it's
10941 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10943 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10944 && !TARGET_INLINE_ALL_STRINGOPS
10946 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10949 addr = force_reg (Pmode, XEXP (src, 0));
10950 scratch1 = gen_reg_rtx (Pmode);
10952 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10955 /* Well it seems that some optimizer does not combine a call like
10956 foo(strlen(bar), strlen(bar));
10957 when the move and the subtraction is done here. It does calculate
10958 the length just once when these instructions are done inside of
10959 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10960 often used and I use one fewer register for the lifetime of
10961 output_strlen_unroll() this is better. */
10963 emit_move_insn (out, addr);
10965 ix86_expand_strlensi_unroll_1 (out, src, align);
10967 /* strlensi_unroll_1 returns the address of the zero at the end of
10968 the string, like memchr(), so compute the length by subtracting
10969 the start address. */
10971 emit_insn (gen_subdi3 (out, out, addr));
10973 emit_insn (gen_subsi3 (out, out, addr));
10978 scratch2 = gen_reg_rtx (Pmode);
10979 scratch3 = gen_reg_rtx (Pmode);
10980 scratch4 = force_reg (Pmode, constm1_rtx);
10982 emit_move_insn (scratch3, addr);
10983 eoschar = force_reg (QImode, eoschar);
10985 emit_insn (gen_cld ());
10986 src = replace_equiv_address_nv (src, scratch3);
10988 /* If .md starts supporting :P, this can be done in .md. */
10989 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
10990 scratch4), UNSPEC_SCAS);
10991 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
10994 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10995 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10999 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11000 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11006 /* Expand the appropriate insns for doing strlen if not just doing
11009 out = result, initialized with the start address
11010 align_rtx = alignment of the address.
11011 scratch = scratch register, initialized with the startaddress when
11012 not aligned, otherwise undefined
11014 This is just the body. It needs the initializations mentioned above and
11015 some address computing at the end. These things are done in i386.md. */
11018 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11022 rtx align_2_label = NULL_RTX;
11023 rtx align_3_label = NULL_RTX;
11024 rtx align_4_label = gen_label_rtx ();
11025 rtx end_0_label = gen_label_rtx ();
11027 rtx tmpreg = gen_reg_rtx (SImode);
11028 rtx scratch = gen_reg_rtx (SImode);
11032 if (GET_CODE (align_rtx) == CONST_INT)
11033 align = INTVAL (align_rtx);
11035 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11037 /* Is there a known alignment and is it less than 4? */
11040 rtx scratch1 = gen_reg_rtx (Pmode);
11041 emit_move_insn (scratch1, out);
11042 /* Is there a known alignment and is it not 2? */
11045 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11046 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11048 /* Leave just the 3 lower bits. */
11049 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11050 NULL_RTX, 0, OPTAB_WIDEN);
11052 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11053 Pmode, 1, align_4_label);
11054 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11055 Pmode, 1, align_2_label);
11056 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11057 Pmode, 1, align_3_label);
11061 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11062 check if is aligned to 4 - byte. */
11064 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11065 NULL_RTX, 0, OPTAB_WIDEN);
11067 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11068 Pmode, 1, align_4_label);
11071 mem = change_address (src, QImode, out);
11073 /* Now compare the bytes. */
11075 /* Compare the first n unaligned byte on a byte per byte basis. */
11076 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11077 QImode, 1, end_0_label);
11079 /* Increment the address. */
11081 emit_insn (gen_adddi3 (out, out, const1_rtx));
11083 emit_insn (gen_addsi3 (out, out, const1_rtx));
11085 /* Not needed with an alignment of 2 */
11088 emit_label (align_2_label);
11090 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11094 emit_insn (gen_adddi3 (out, out, const1_rtx));
11096 emit_insn (gen_addsi3 (out, out, const1_rtx));
11098 emit_label (align_3_label);
11101 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11105 emit_insn (gen_adddi3 (out, out, const1_rtx));
11107 emit_insn (gen_addsi3 (out, out, const1_rtx));
11110 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11111 align this loop. It gives only huge programs, but does not help to
11113 emit_label (align_4_label);
11115 mem = change_address (src, SImode, out);
11116 emit_move_insn (scratch, mem);
11118 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11120 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11122 /* This formula yields a nonzero result iff one of the bytes is zero.
11123 This saves three branches inside loop and many cycles. */
11125 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11126 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11127 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11128 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11129 gen_int_mode (0x80808080, SImode)));
11130 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11135 rtx reg = gen_reg_rtx (SImode);
11136 rtx reg2 = gen_reg_rtx (Pmode);
11137 emit_move_insn (reg, tmpreg);
11138 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11140 /* If zero is not in the first two bytes, move two bytes forward. */
11141 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11142 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11143 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11144 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11145 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11148 /* Emit lea manually to avoid clobbering of flags. */
11149 emit_insn (gen_rtx_SET (SImode, reg2,
11150 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11152 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11153 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11154 emit_insn (gen_rtx_SET (VOIDmode, out,
11155 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11162 rtx end_2_label = gen_label_rtx ();
11163 /* Is zero in the first two bytes? */
11165 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11166 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11167 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11168 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11169 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11171 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11172 JUMP_LABEL (tmp) = end_2_label;
11174 /* Not in the first two. Move two bytes forward. */
11175 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11177 emit_insn (gen_adddi3 (out, out, const2_rtx));
11179 emit_insn (gen_addsi3 (out, out, const2_rtx));
11181 emit_label (end_2_label);
11185 /* Avoid branch in fixing the byte. */
11186 tmpreg = gen_lowpart (QImode, tmpreg);
11187 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11188 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11190 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11192 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11194 emit_label (end_0_label);
11198 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11199 rtx callarg2 ATTRIBUTE_UNUSED,
11200 rtx pop, int sibcall)
11202 rtx use = NULL, call;
11204 if (pop == const0_rtx)
11206 if (TARGET_64BIT && pop)
11210 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11211 fnaddr = machopic_indirect_call_target (fnaddr);
11213 /* Static functions and indirect calls don't need the pic register. */
11214 if (! TARGET_64BIT && flag_pic
11215 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11216 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11217 use_reg (&use, pic_offset_table_rtx);
11219 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11221 rtx al = gen_rtx_REG (QImode, 0);
11222 emit_move_insn (al, callarg2);
11223 use_reg (&use, al);
11225 #endif /* TARGET_MACHO */
11227 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11229 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11230 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11232 if (sibcall && TARGET_64BIT
11233 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11236 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11237 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11238 emit_move_insn (fnaddr, addr);
11239 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11242 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11244 call = gen_rtx_SET (VOIDmode, retval, call);
11247 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11248 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11249 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11252 call = emit_call_insn (call);
11254 CALL_INSN_FUNCTION_USAGE (call) = use;
11258 /* Clear stack slot assignments remembered from previous functions.
11259 This is called from INIT_EXPANDERS once before RTL is emitted for each
11262 static struct machine_function *
11263 ix86_init_machine_status (void)
11265 struct machine_function *f;
11267 f = ggc_alloc_cleared (sizeof (struct machine_function));
11268 f->use_fast_prologue_epilogue_nregs = -1;
11273 /* Return a MEM corresponding to a stack slot with mode MODE.
11274 Allocate a new slot if necessary.
11276 The RTL for a function can have several slots available: N is
11277 which slot to use. */
11280 assign_386_stack_local (enum machine_mode mode, int n)
11282 struct stack_local_entry *s;
11284 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11287 for (s = ix86_stack_locals; s; s = s->next)
11288 if (s->mode == mode && s->n == n)
11291 s = (struct stack_local_entry *)
11292 ggc_alloc (sizeof (struct stack_local_entry));
11295 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11297 s->next = ix86_stack_locals;
11298 ix86_stack_locals = s;
11302 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11304 static GTY(()) rtx ix86_tls_symbol;
11306 ix86_tls_get_addr (void)
11309 if (!ix86_tls_symbol)
11311 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11312 (TARGET_GNU_TLS && !TARGET_64BIT)
11313 ? "___tls_get_addr"
11314 : "__tls_get_addr");
11317 return ix86_tls_symbol;
11320 /* Calculate the length of the memory address in the instruction
11321 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11324 memory_address_length (rtx addr)
11326 struct ix86_address parts;
11327 rtx base, index, disp;
11330 if (GET_CODE (addr) == PRE_DEC
11331 || GET_CODE (addr) == POST_INC
11332 || GET_CODE (addr) == PRE_MODIFY
11333 || GET_CODE (addr) == POST_MODIFY)
11336 if (! ix86_decompose_address (addr, &parts))
11340 index = parts.index;
11345 - esp as the base always wants an index,
11346 - ebp as the base always wants a displacement. */
11348 /* Register Indirect. */
11349 if (base && !index && !disp)
11351 /* esp (for its index) and ebp (for its displacement) need
11352 the two-byte modrm form. */
11353 if (addr == stack_pointer_rtx
11354 || addr == arg_pointer_rtx
11355 || addr == frame_pointer_rtx
11356 || addr == hard_frame_pointer_rtx)
11360 /* Direct Addressing. */
11361 else if (disp && !base && !index)
11366 /* Find the length of the displacement constant. */
11369 if (GET_CODE (disp) == CONST_INT
11370 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11376 /* ebp always wants a displacement. */
11377 else if (base == hard_frame_pointer_rtx)
11380 /* An index requires the two-byte modrm form.... */
11382 /* ...like esp, which always wants an index. */
11383 || base == stack_pointer_rtx
11384 || base == arg_pointer_rtx
11385 || base == frame_pointer_rtx)
11392 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11393 is set, expect that insn have 8bit immediate alternative. */
11395 ix86_attr_length_immediate_default (rtx insn, int shortform)
11399 extract_insn_cached (insn);
11400 for (i = recog_data.n_operands - 1; i >= 0; --i)
11401 if (CONSTANT_P (recog_data.operand[i]))
11406 && GET_CODE (recog_data.operand[i]) == CONST_INT
11407 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11411 switch (get_attr_mode (insn))
11422 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11427 fatal_insn ("unknown insn mode", insn);
11433 /* Compute default value for "length_address" attribute. */
11435 ix86_attr_length_address_default (rtx insn)
11439 if (get_attr_type (insn) == TYPE_LEA)
11441 rtx set = PATTERN (insn);
11442 if (GET_CODE (set) == SET)
11444 else if (GET_CODE (set) == PARALLEL
11445 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11446 set = XVECEXP (set, 0, 0);
11449 #ifdef ENABLE_CHECKING
11455 return memory_address_length (SET_SRC (set));
11458 extract_insn_cached (insn);
11459 for (i = recog_data.n_operands - 1; i >= 0; --i)
11460 if (GET_CODE (recog_data.operand[i]) == MEM)
11462 return memory_address_length (XEXP (recog_data.operand[i], 0));
11468 /* Return the maximum number of instructions a cpu can issue. */
11471 ix86_issue_rate (void)
11475 case PROCESSOR_PENTIUM:
11479 case PROCESSOR_PENTIUMPRO:
11480 case PROCESSOR_PENTIUM4:
11481 case PROCESSOR_ATHLON:
11483 case PROCESSOR_NOCONA:
11491 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11492 by DEP_INSN and nothing set by DEP_INSN. */
11495 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11499 /* Simplify the test for uninteresting insns. */
11500 if (insn_type != TYPE_SETCC
11501 && insn_type != TYPE_ICMOV
11502 && insn_type != TYPE_FCMOV
11503 && insn_type != TYPE_IBR)
11506 if ((set = single_set (dep_insn)) != 0)
11508 set = SET_DEST (set);
11511 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11512 && XVECLEN (PATTERN (dep_insn), 0) == 2
11513 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11514 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11516 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11517 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11522 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11525 /* This test is true if the dependent insn reads the flags but
11526 not any other potentially set register. */
11527 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11530 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11536 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11537 address with operands set by DEP_INSN. */
11540 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11544 if (insn_type == TYPE_LEA
11547 addr = PATTERN (insn);
11548 if (GET_CODE (addr) == SET)
11550 else if (GET_CODE (addr) == PARALLEL
11551 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11552 addr = XVECEXP (addr, 0, 0);
11555 addr = SET_SRC (addr);
11560 extract_insn_cached (insn);
11561 for (i = recog_data.n_operands - 1; i >= 0; --i)
11562 if (GET_CODE (recog_data.operand[i]) == MEM)
11564 addr = XEXP (recog_data.operand[i], 0);
11571 return modified_in_p (addr, dep_insn);
11575 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11577 enum attr_type insn_type, dep_insn_type;
11578 enum attr_memory memory;
11580 int dep_insn_code_number;
11582 /* Anti and output dependencies have zero cost on all CPUs. */
11583 if (REG_NOTE_KIND (link) != 0)
11586 dep_insn_code_number = recog_memoized (dep_insn);
11588 /* If we can't recognize the insns, we can't really do anything. */
11589 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11592 insn_type = get_attr_type (insn);
11593 dep_insn_type = get_attr_type (dep_insn);
11597 case PROCESSOR_PENTIUM:
11598 /* Address Generation Interlock adds a cycle of latency. */
11599 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11602 /* ??? Compares pair with jump/setcc. */
11603 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11606 /* Floating point stores require value to be ready one cycle earlier. */
11607 if (insn_type == TYPE_FMOV
11608 && get_attr_memory (insn) == MEMORY_STORE
11609 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11613 case PROCESSOR_PENTIUMPRO:
11614 memory = get_attr_memory (insn);
11616 /* INT->FP conversion is expensive. */
11617 if (get_attr_fp_int_src (dep_insn))
11620 /* There is one cycle extra latency between an FP op and a store. */
11621 if (insn_type == TYPE_FMOV
11622 && (set = single_set (dep_insn)) != NULL_RTX
11623 && (set2 = single_set (insn)) != NULL_RTX
11624 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11625 && GET_CODE (SET_DEST (set2)) == MEM)
11628 /* Show ability of reorder buffer to hide latency of load by executing
11629 in parallel with previous instruction in case
11630 previous instruction is not needed to compute the address. */
11631 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11632 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11634 /* Claim moves to take one cycle, as core can issue one load
11635 at time and the next load can start cycle later. */
11636 if (dep_insn_type == TYPE_IMOV
11637 || dep_insn_type == TYPE_FMOV)
11645 memory = get_attr_memory (insn);
11647 /* The esp dependency is resolved before the instruction is really
11649 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11650 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11653 /* INT->FP conversion is expensive. */
11654 if (get_attr_fp_int_src (dep_insn))
11657 /* Show ability of reorder buffer to hide latency of load by executing
11658 in parallel with previous instruction in case
11659 previous instruction is not needed to compute the address. */
11660 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11661 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11663 /* Claim moves to take one cycle, as core can issue one load
11664 at time and the next load can start cycle later. */
11665 if (dep_insn_type == TYPE_IMOV
11666 || dep_insn_type == TYPE_FMOV)
11675 case PROCESSOR_ATHLON:
11677 memory = get_attr_memory (insn);
11679 /* Show ability of reorder buffer to hide latency of load by executing
11680 in parallel with previous instruction in case
11681 previous instruction is not needed to compute the address. */
11682 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11683 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11685 enum attr_unit unit = get_attr_unit (insn);
11688 /* Because of the difference between the length of integer and
11689 floating unit pipeline preparation stages, the memory operands
11690 for floating point are cheaper.
11692 ??? For Athlon it the difference is most probably 2. */
11693 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11696 loadcost = TARGET_ATHLON ? 2 : 0;
11698 if (cost >= loadcost)
11711 /* How many alternative schedules to try. This should be as wide as the
11712 scheduling freedom in the DFA, but no wider. Making this value too
11713 large results extra work for the scheduler. */
11716 ia32_multipass_dfa_lookahead (void)
11718 if (ix86_tune == PROCESSOR_PENTIUM)
11721 if (ix86_tune == PROCESSOR_PENTIUMPRO
11722 || ix86_tune == PROCESSOR_K6)
11730 /* Implement the target hook targetm.vectorize.misaligned_mem_ok. */
11733 ix86_misaligned_mem_ok (enum machine_mode mode)
11735 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
11741 /* Compute the alignment given to a constant that is being placed in memory.
11742 EXP is the constant and ALIGN is the alignment that the object would
11744 The value of this function is used instead of that alignment to align
11748 ix86_constant_alignment (tree exp, int align)
11750 if (TREE_CODE (exp) == REAL_CST)
11752 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11754 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11757 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11758 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11759 return BITS_PER_WORD;
11764 /* Compute the alignment for a static variable.
11765 TYPE is the data type, and ALIGN is the alignment that
11766 the object would ordinarily have. The value of this function is used
11767 instead of that alignment to align the object. */
11770 ix86_data_alignment (tree type, int align)
11772 if (AGGREGATE_TYPE_P (type)
11773 && TYPE_SIZE (type)
11774 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11775 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11776 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11779 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11780 to 16byte boundary. */
11783 if (AGGREGATE_TYPE_P (type)
11784 && TYPE_SIZE (type)
11785 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11786 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11787 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11791 if (TREE_CODE (type) == ARRAY_TYPE)
11793 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11795 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11798 else if (TREE_CODE (type) == COMPLEX_TYPE)
11801 if (TYPE_MODE (type) == DCmode && align < 64)
11803 if (TYPE_MODE (type) == XCmode && align < 128)
11806 else if ((TREE_CODE (type) == RECORD_TYPE
11807 || TREE_CODE (type) == UNION_TYPE
11808 || TREE_CODE (type) == QUAL_UNION_TYPE)
11809 && TYPE_FIELDS (type))
11811 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11813 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11816 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11817 || TREE_CODE (type) == INTEGER_TYPE)
11819 if (TYPE_MODE (type) == DFmode && align < 64)
11821 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11828 /* Compute the alignment for a local variable.
11829 TYPE is the data type, and ALIGN is the alignment that
11830 the object would ordinarily have. The value of this macro is used
11831 instead of that alignment to align the object. */
11834 ix86_local_alignment (tree type, int align)
11836 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11837 to 16byte boundary. */
11840 if (AGGREGATE_TYPE_P (type)
11841 && TYPE_SIZE (type)
11842 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11843 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11844 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11847 if (TREE_CODE (type) == ARRAY_TYPE)
11849 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11851 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11854 else if (TREE_CODE (type) == COMPLEX_TYPE)
11856 if (TYPE_MODE (type) == DCmode && align < 64)
11858 if (TYPE_MODE (type) == XCmode && align < 128)
11861 else if ((TREE_CODE (type) == RECORD_TYPE
11862 || TREE_CODE (type) == UNION_TYPE
11863 || TREE_CODE (type) == QUAL_UNION_TYPE)
11864 && TYPE_FIELDS (type))
11866 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11868 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11871 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11872 || TREE_CODE (type) == INTEGER_TYPE)
11875 if (TYPE_MODE (type) == DFmode && align < 64)
11877 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11883 /* Emit RTL insns to initialize the variable parts of a trampoline.
11884 FNADDR is an RTX for the address of the function's pure code.
11885 CXT is an RTX for the static chain value for the function. */
11887 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11891 /* Compute offset from the end of the jmp to the target function. */
11892 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11893 plus_constant (tramp, 10),
11894 NULL_RTX, 1, OPTAB_DIRECT);
11895 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11896 gen_int_mode (0xb9, QImode));
11897 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11898 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11899 gen_int_mode (0xe9, QImode));
11900 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11905 /* Try to load address using shorter movl instead of movabs.
11906 We may want to support movq for kernel mode, but kernel does not use
11907 trampolines at the moment. */
11908 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
11910 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11911 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11912 gen_int_mode (0xbb41, HImode));
11913 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11914 gen_lowpart (SImode, fnaddr));
11919 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11920 gen_int_mode (0xbb49, HImode));
11921 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11925 /* Load static chain using movabs to r10. */
11926 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11927 gen_int_mode (0xba49, HImode));
11928 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11931 /* Jump to the r11 */
11932 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11933 gen_int_mode (0xff49, HImode));
11934 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11935 gen_int_mode (0xe3, QImode));
11937 if (offset > TRAMPOLINE_SIZE)
11941 #ifdef ENABLE_EXECUTE_STACK
11942 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
11943 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11947 #define def_builtin(MASK, NAME, TYPE, CODE) \
11949 if ((MASK) & target_flags \
11950 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11951 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11952 NULL, NULL_TREE); \
11955 struct builtin_description
11957 const unsigned int mask;
11958 const enum insn_code icode;
11959 const char *const name;
11960 const enum ix86_builtins code;
11961 const enum rtx_code comparison;
11962 const unsigned int flag;
11965 static const struct builtin_description bdesc_comi[] =
11967 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11968 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11969 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11970 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11971 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11972 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11973 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11974 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11975 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11976 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11977 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11978 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11979 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11980 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11981 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11982 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11983 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11984 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11985 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11986 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11987 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11988 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11989 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11990 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11993 static const struct builtin_description bdesc_2arg[] =
11996 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11997 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11998 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11999 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12000 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12001 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12002 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12003 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12005 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12006 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12007 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12008 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12009 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12010 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12011 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12012 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12013 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12014 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12015 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12016 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12017 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12018 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12019 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12020 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12021 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12022 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12023 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12024 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12026 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12027 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12028 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12029 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12031 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12032 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12033 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12034 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12036 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12037 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12038 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12039 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12040 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12043 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12044 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12045 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12046 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12047 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12048 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12049 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12050 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12052 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12053 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12054 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12055 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12056 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12057 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12058 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12059 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12061 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12062 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12063 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12065 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12066 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12067 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12068 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12070 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12071 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12073 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12074 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12075 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12076 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12077 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12078 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12080 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12081 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12082 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12083 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12085 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12086 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12087 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12088 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12089 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12090 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12093 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12094 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12095 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12097 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12098 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12099 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12101 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12102 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12103 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12104 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12105 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12106 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12108 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12109 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12110 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12111 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12112 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12113 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12115 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12116 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12117 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12118 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12120 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12121 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12124 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12125 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12126 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12127 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12128 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12129 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12130 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12133 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12134 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12135 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12136 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12137 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12138 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12139 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12140 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12141 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12142 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12143 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12144 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12145 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12146 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12147 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12148 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12149 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12150 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12151 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12152 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12154 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12155 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12156 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12157 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12159 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12160 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12161 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12162 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12164 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12165 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12166 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12170 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12171 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12172 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12173 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12174 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12175 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12176 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12178 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12179 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12180 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12181 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12182 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12183 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12184 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12185 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12187 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12188 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12190 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12191 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12192 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12193 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12195 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12196 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12198 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12199 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12200 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12201 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12202 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12203 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12205 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12206 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12207 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12208 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12210 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12211 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12212 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12213 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12214 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12215 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12216 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12217 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12219 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12220 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12221 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12223 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12224 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12226 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12227 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12229 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12230 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12231 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12232 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12233 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12234 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12236 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12237 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12238 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12239 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12240 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12241 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12243 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12244 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12245 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12246 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12248 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12250 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12251 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12252 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12253 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12256 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12257 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12258 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12259 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12260 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12261 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12264 static const struct builtin_description bdesc_1arg[] =
12266 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12267 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12269 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12270 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12271 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12273 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12274 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12275 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12276 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12277 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12278 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12280 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12281 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12282 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12283 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12285 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12287 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12288 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12290 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12291 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12292 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12293 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12294 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12296 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12298 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12299 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12300 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12301 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12303 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12304 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12305 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12307 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12310 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12311 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12312 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12316 ix86_init_builtins (void)
12319 ix86_init_mmx_sse_builtins ();
12322 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12323 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12326 ix86_init_mmx_sse_builtins (void)
12328 const struct builtin_description * d;
12331 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12332 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12333 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12334 tree V2DI_type_node
12335 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
12336 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12337 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12338 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12339 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12340 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12341 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12343 tree pchar_type_node = build_pointer_type (char_type_node);
12344 tree pcchar_type_node = build_pointer_type (
12345 build_type_variant (char_type_node, 1, 0));
12346 tree pfloat_type_node = build_pointer_type (float_type_node);
12347 tree pcfloat_type_node = build_pointer_type (
12348 build_type_variant (float_type_node, 1, 0));
12349 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12350 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12351 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12354 tree int_ftype_v4sf_v4sf
12355 = build_function_type_list (integer_type_node,
12356 V4SF_type_node, V4SF_type_node, NULL_TREE);
12357 tree v4si_ftype_v4sf_v4sf
12358 = build_function_type_list (V4SI_type_node,
12359 V4SF_type_node, V4SF_type_node, NULL_TREE);
12360 /* MMX/SSE/integer conversions. */
12361 tree int_ftype_v4sf
12362 = build_function_type_list (integer_type_node,
12363 V4SF_type_node, NULL_TREE);
12364 tree int64_ftype_v4sf
12365 = build_function_type_list (long_long_integer_type_node,
12366 V4SF_type_node, NULL_TREE);
12367 tree int_ftype_v8qi
12368 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12369 tree v4sf_ftype_v4sf_int
12370 = build_function_type_list (V4SF_type_node,
12371 V4SF_type_node, integer_type_node, NULL_TREE);
12372 tree v4sf_ftype_v4sf_int64
12373 = build_function_type_list (V4SF_type_node,
12374 V4SF_type_node, long_long_integer_type_node,
12376 tree v4sf_ftype_v4sf_v2si
12377 = build_function_type_list (V4SF_type_node,
12378 V4SF_type_node, V2SI_type_node, NULL_TREE);
12379 tree int_ftype_v4hi_int
12380 = build_function_type_list (integer_type_node,
12381 V4HI_type_node, integer_type_node, NULL_TREE);
12382 tree v4hi_ftype_v4hi_int_int
12383 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12384 integer_type_node, integer_type_node,
12386 /* Miscellaneous. */
12387 tree v8qi_ftype_v4hi_v4hi
12388 = build_function_type_list (V8QI_type_node,
12389 V4HI_type_node, V4HI_type_node, NULL_TREE);
12390 tree v4hi_ftype_v2si_v2si
12391 = build_function_type_list (V4HI_type_node,
12392 V2SI_type_node, V2SI_type_node, NULL_TREE);
12393 tree v4sf_ftype_v4sf_v4sf_int
12394 = build_function_type_list (V4SF_type_node,
12395 V4SF_type_node, V4SF_type_node,
12396 integer_type_node, NULL_TREE);
12397 tree v2si_ftype_v4hi_v4hi
12398 = build_function_type_list (V2SI_type_node,
12399 V4HI_type_node, V4HI_type_node, NULL_TREE);
12400 tree v4hi_ftype_v4hi_int
12401 = build_function_type_list (V4HI_type_node,
12402 V4HI_type_node, integer_type_node, NULL_TREE);
12403 tree v4hi_ftype_v4hi_di
12404 = build_function_type_list (V4HI_type_node,
12405 V4HI_type_node, long_long_unsigned_type_node,
12407 tree v2si_ftype_v2si_di
12408 = build_function_type_list (V2SI_type_node,
12409 V2SI_type_node, long_long_unsigned_type_node,
12411 tree void_ftype_void
12412 = build_function_type (void_type_node, void_list_node);
12413 tree void_ftype_unsigned
12414 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12415 tree void_ftype_unsigned_unsigned
12416 = build_function_type_list (void_type_node, unsigned_type_node,
12417 unsigned_type_node, NULL_TREE);
12418 tree void_ftype_pcvoid_unsigned_unsigned
12419 = build_function_type_list (void_type_node, const_ptr_type_node,
12420 unsigned_type_node, unsigned_type_node,
12422 tree unsigned_ftype_void
12423 = build_function_type (unsigned_type_node, void_list_node);
12425 = build_function_type (long_long_unsigned_type_node, void_list_node);
12426 tree v4sf_ftype_void
12427 = build_function_type (V4SF_type_node, void_list_node);
12428 tree v2si_ftype_v4sf
12429 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12430 /* Loads/stores. */
12431 tree void_ftype_v8qi_v8qi_pchar
12432 = build_function_type_list (void_type_node,
12433 V8QI_type_node, V8QI_type_node,
12434 pchar_type_node, NULL_TREE);
12435 tree v4sf_ftype_pcfloat
12436 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12437 /* @@@ the type is bogus */
12438 tree v4sf_ftype_v4sf_pv2si
12439 = build_function_type_list (V4SF_type_node,
12440 V4SF_type_node, pv2si_type_node, NULL_TREE);
12441 tree void_ftype_pv2si_v4sf
12442 = build_function_type_list (void_type_node,
12443 pv2si_type_node, V4SF_type_node, NULL_TREE);
12444 tree void_ftype_pfloat_v4sf
12445 = build_function_type_list (void_type_node,
12446 pfloat_type_node, V4SF_type_node, NULL_TREE);
12447 tree void_ftype_pdi_di
12448 = build_function_type_list (void_type_node,
12449 pdi_type_node, long_long_unsigned_type_node,
12451 tree void_ftype_pv2di_v2di
12452 = build_function_type_list (void_type_node,
12453 pv2di_type_node, V2DI_type_node, NULL_TREE);
12454 /* Normal vector unops. */
12455 tree v4sf_ftype_v4sf
12456 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12458 /* Normal vector binops. */
12459 tree v4sf_ftype_v4sf_v4sf
12460 = build_function_type_list (V4SF_type_node,
12461 V4SF_type_node, V4SF_type_node, NULL_TREE);
12462 tree v8qi_ftype_v8qi_v8qi
12463 = build_function_type_list (V8QI_type_node,
12464 V8QI_type_node, V8QI_type_node, NULL_TREE);
12465 tree v4hi_ftype_v4hi_v4hi
12466 = build_function_type_list (V4HI_type_node,
12467 V4HI_type_node, V4HI_type_node, NULL_TREE);
12468 tree v2si_ftype_v2si_v2si
12469 = build_function_type_list (V2SI_type_node,
12470 V2SI_type_node, V2SI_type_node, NULL_TREE);
12471 tree di_ftype_di_di
12472 = build_function_type_list (long_long_unsigned_type_node,
12473 long_long_unsigned_type_node,
12474 long_long_unsigned_type_node, NULL_TREE);
12476 tree v2si_ftype_v2sf
12477 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12478 tree v2sf_ftype_v2si
12479 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12480 tree v2si_ftype_v2si
12481 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12482 tree v2sf_ftype_v2sf
12483 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12484 tree v2sf_ftype_v2sf_v2sf
12485 = build_function_type_list (V2SF_type_node,
12486 V2SF_type_node, V2SF_type_node, NULL_TREE);
12487 tree v2si_ftype_v2sf_v2sf
12488 = build_function_type_list (V2SI_type_node,
12489 V2SF_type_node, V2SF_type_node, NULL_TREE);
12490 tree pint_type_node = build_pointer_type (integer_type_node);
12491 tree pcint_type_node = build_pointer_type (
12492 build_type_variant (integer_type_node, 1, 0));
12493 tree pdouble_type_node = build_pointer_type (double_type_node);
12494 tree pcdouble_type_node = build_pointer_type (
12495 build_type_variant (double_type_node, 1, 0));
12496 tree int_ftype_v2df_v2df
12497 = build_function_type_list (integer_type_node,
12498 V2DF_type_node, V2DF_type_node, NULL_TREE);
12501 = build_function_type (intTI_type_node, void_list_node);
12502 tree v2di_ftype_void
12503 = build_function_type (V2DI_type_node, void_list_node);
12504 tree ti_ftype_ti_ti
12505 = build_function_type_list (intTI_type_node,
12506 intTI_type_node, intTI_type_node, NULL_TREE);
12507 tree void_ftype_pcvoid
12508 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12510 = build_function_type_list (V2DI_type_node,
12511 long_long_unsigned_type_node, NULL_TREE);
12513 = build_function_type_list (long_long_unsigned_type_node,
12514 V2DI_type_node, NULL_TREE);
12515 tree v4sf_ftype_v4si
12516 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12517 tree v4si_ftype_v4sf
12518 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12519 tree v2df_ftype_v4si
12520 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12521 tree v4si_ftype_v2df
12522 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12523 tree v2si_ftype_v2df
12524 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12525 tree v4sf_ftype_v2df
12526 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12527 tree v2df_ftype_v2si
12528 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12529 tree v2df_ftype_v4sf
12530 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12531 tree int_ftype_v2df
12532 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12533 tree int64_ftype_v2df
12534 = build_function_type_list (long_long_integer_type_node,
12535 V2DF_type_node, NULL_TREE);
12536 tree v2df_ftype_v2df_int
12537 = build_function_type_list (V2DF_type_node,
12538 V2DF_type_node, integer_type_node, NULL_TREE);
12539 tree v2df_ftype_v2df_int64
12540 = build_function_type_list (V2DF_type_node,
12541 V2DF_type_node, long_long_integer_type_node,
12543 tree v4sf_ftype_v4sf_v2df
12544 = build_function_type_list (V4SF_type_node,
12545 V4SF_type_node, V2DF_type_node, NULL_TREE);
12546 tree v2df_ftype_v2df_v4sf
12547 = build_function_type_list (V2DF_type_node,
12548 V2DF_type_node, V4SF_type_node, NULL_TREE);
12549 tree v2df_ftype_v2df_v2df_int
12550 = build_function_type_list (V2DF_type_node,
12551 V2DF_type_node, V2DF_type_node,
12554 tree v2df_ftype_v2df_pcdouble
12555 = build_function_type_list (V2DF_type_node,
12556 V2DF_type_node, pcdouble_type_node, NULL_TREE);
12557 tree void_ftype_pdouble_v2df
12558 = build_function_type_list (void_type_node,
12559 pdouble_type_node, V2DF_type_node, NULL_TREE);
12560 tree void_ftype_pint_int
12561 = build_function_type_list (void_type_node,
12562 pint_type_node, integer_type_node, NULL_TREE);
12563 tree void_ftype_v16qi_v16qi_pchar
12564 = build_function_type_list (void_type_node,
12565 V16QI_type_node, V16QI_type_node,
12566 pchar_type_node, NULL_TREE);
12567 tree v2df_ftype_pcdouble
12568 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12569 tree v2df_ftype_v2df_v2df
12570 = build_function_type_list (V2DF_type_node,
12571 V2DF_type_node, V2DF_type_node, NULL_TREE);
12572 tree v16qi_ftype_v16qi_v16qi
12573 = build_function_type_list (V16QI_type_node,
12574 V16QI_type_node, V16QI_type_node, NULL_TREE);
12575 tree v8hi_ftype_v8hi_v8hi
12576 = build_function_type_list (V8HI_type_node,
12577 V8HI_type_node, V8HI_type_node, NULL_TREE);
12578 tree v4si_ftype_v4si_v4si
12579 = build_function_type_list (V4SI_type_node,
12580 V4SI_type_node, V4SI_type_node, NULL_TREE);
12581 tree v2di_ftype_v2di_v2di
12582 = build_function_type_list (V2DI_type_node,
12583 V2DI_type_node, V2DI_type_node, NULL_TREE);
12584 tree v2di_ftype_v2df_v2df
12585 = build_function_type_list (V2DI_type_node,
12586 V2DF_type_node, V2DF_type_node, NULL_TREE);
12587 tree v2df_ftype_v2df
12588 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12589 tree v2df_ftype_double
12590 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12591 tree v2df_ftype_double_double
12592 = build_function_type_list (V2DF_type_node,
12593 double_type_node, double_type_node, NULL_TREE);
12594 tree int_ftype_v8hi_int
12595 = build_function_type_list (integer_type_node,
12596 V8HI_type_node, integer_type_node, NULL_TREE);
12597 tree v8hi_ftype_v8hi_int_int
12598 = build_function_type_list (V8HI_type_node,
12599 V8HI_type_node, integer_type_node,
12600 integer_type_node, NULL_TREE);
12601 tree v2di_ftype_v2di_int
12602 = build_function_type_list (V2DI_type_node,
12603 V2DI_type_node, integer_type_node, NULL_TREE);
12604 tree v4si_ftype_v4si_int
12605 = build_function_type_list (V4SI_type_node,
12606 V4SI_type_node, integer_type_node, NULL_TREE);
12607 tree v8hi_ftype_v8hi_int
12608 = build_function_type_list (V8HI_type_node,
12609 V8HI_type_node, integer_type_node, NULL_TREE);
12610 tree v8hi_ftype_v8hi_v2di
12611 = build_function_type_list (V8HI_type_node,
12612 V8HI_type_node, V2DI_type_node, NULL_TREE);
12613 tree v4si_ftype_v4si_v2di
12614 = build_function_type_list (V4SI_type_node,
12615 V4SI_type_node, V2DI_type_node, NULL_TREE);
12616 tree v4si_ftype_v8hi_v8hi
12617 = build_function_type_list (V4SI_type_node,
12618 V8HI_type_node, V8HI_type_node, NULL_TREE);
12619 tree di_ftype_v8qi_v8qi
12620 = build_function_type_list (long_long_unsigned_type_node,
12621 V8QI_type_node, V8QI_type_node, NULL_TREE);
12622 tree di_ftype_v2si_v2si
12623 = build_function_type_list (long_long_unsigned_type_node,
12624 V2SI_type_node, V2SI_type_node, NULL_TREE);
12625 tree v2di_ftype_v16qi_v16qi
12626 = build_function_type_list (V2DI_type_node,
12627 V16QI_type_node, V16QI_type_node, NULL_TREE);
12628 tree v2di_ftype_v4si_v4si
12629 = build_function_type_list (V2DI_type_node,
12630 V4SI_type_node, V4SI_type_node, NULL_TREE);
12631 tree int_ftype_v16qi
12632 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12633 tree v16qi_ftype_pcchar
12634 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12635 tree void_ftype_pchar_v16qi
12636 = build_function_type_list (void_type_node,
12637 pchar_type_node, V16QI_type_node, NULL_TREE);
12638 tree v4si_ftype_pcint
12639 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12640 tree void_ftype_pcint_v4si
12641 = build_function_type_list (void_type_node,
12642 pcint_type_node, V4SI_type_node, NULL_TREE);
12643 tree v2di_ftype_v2di
12644 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12647 tree float128_type;
12649 /* The __float80 type. */
12650 if (TYPE_MODE (long_double_type_node) == XFmode)
12651 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12655 /* The __float80 type. */
12656 float80_type = make_node (REAL_TYPE);
12657 TYPE_PRECISION (float80_type) = 80;
12658 layout_type (float80_type);
12659 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12662 float128_type = make_node (REAL_TYPE);
12663 TYPE_PRECISION (float128_type) = 128;
12664 layout_type (float128_type);
12665 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12667 /* Add all builtins that are more or less simple operations on two
12669 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12671 /* Use one of the operands; the target can have a different mode for
12672 mask-generating compares. */
12673 enum machine_mode mode;
12678 mode = insn_data[d->icode].operand[1].mode;
12683 type = v16qi_ftype_v16qi_v16qi;
12686 type = v8hi_ftype_v8hi_v8hi;
12689 type = v4si_ftype_v4si_v4si;
12692 type = v2di_ftype_v2di_v2di;
12695 type = v2df_ftype_v2df_v2df;
12698 type = ti_ftype_ti_ti;
12701 type = v4sf_ftype_v4sf_v4sf;
12704 type = v8qi_ftype_v8qi_v8qi;
12707 type = v4hi_ftype_v4hi_v4hi;
12710 type = v2si_ftype_v2si_v2si;
12713 type = di_ftype_di_di;
12720 /* Override for comparisons. */
12721 if (d->icode == CODE_FOR_maskcmpv4sf3
12722 || d->icode == CODE_FOR_maskncmpv4sf3
12723 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12724 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12725 type = v4si_ftype_v4sf_v4sf;
12727 if (d->icode == CODE_FOR_maskcmpv2df3
12728 || d->icode == CODE_FOR_maskncmpv2df3
12729 || d->icode == CODE_FOR_vmmaskcmpv2df3
12730 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12731 type = v2di_ftype_v2df_v2df;
12733 def_builtin (d->mask, d->name, type, d->code);
12736 /* Add the remaining MMX insns with somewhat more complicated types. */
12737 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12738 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12739 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12740 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12741 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12743 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12744 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12745 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12747 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12748 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12750 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12751 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12753 /* comi/ucomi insns. */
12754 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12755 if (d->mask == MASK_SSE2)
12756 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12758 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12760 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12761 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12762 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12764 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12765 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12766 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12767 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12768 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12769 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12770 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12771 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12772 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12773 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12774 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12776 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12777 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12779 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12781 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12782 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12783 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12784 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12785 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12786 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12788 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12789 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12790 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12791 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12793 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12794 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12795 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12796 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12798 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12800 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12802 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12803 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12804 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12805 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12806 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12807 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12809 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12811 /* Original 3DNow! */
12812 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12813 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12814 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12815 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12816 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12817 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12818 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12819 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12820 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12821 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12822 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12823 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12824 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12825 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12826 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12827 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12828 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12829 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12830 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12831 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12833 /* 3DNow! extension as used in the Athlon CPU. */
12834 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12835 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12836 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12837 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12838 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12839 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12841 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12844 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12845 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12847 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12848 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12849 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12851 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12852 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12853 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12854 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12855 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12856 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12858 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
12859 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
12860 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREHPD);
12861 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORELPD);
12863 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12864 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12865 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12866 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12867 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12869 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12870 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12871 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12872 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12874 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12875 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12877 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12879 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12880 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12882 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12883 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12884 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12885 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12886 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12888 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12890 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12891 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12892 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12893 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12895 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12896 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12897 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12899 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12900 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
12901 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12902 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12904 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12905 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12906 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12907 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12908 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
12909 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12910 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12912 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
12913 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12914 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12916 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12917 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12918 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
12919 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12920 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12921 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
12922 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12924 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12926 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
12927 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
12929 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12930 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12931 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12933 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12934 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12935 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12937 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12938 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12940 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12941 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12942 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12943 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12945 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12946 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12947 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12948 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12950 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12951 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12953 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12955 /* Prescott New Instructions. */
12956 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
12957 void_ftype_pcvoid_unsigned_unsigned,
12958 IX86_BUILTIN_MONITOR);
12959 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
12960 void_ftype_unsigned_unsigned,
12961 IX86_BUILTIN_MWAIT);
12962 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
12964 IX86_BUILTIN_MOVSHDUP);
12965 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
12967 IX86_BUILTIN_MOVSLDUP);
12968 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
12969 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
12970 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
12971 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
12972 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
12973 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
12976 /* Errors in the source file can cause expand_expr to return const0_rtx
12977 where we expect a vector. To avoid crashing, use one of the vector
12978 clear instructions. */
12980 safe_vector_operand (rtx x, enum machine_mode mode)
12982 if (x != const0_rtx)
12984 x = gen_reg_rtx (mode);
12986 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12987 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12988 : gen_rtx_SUBREG (DImode, x, 0)));
12990 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12991 : gen_rtx_SUBREG (V4SFmode, x, 0),
12992 CONST0_RTX (V4SFmode)));
12996 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12999 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13002 tree arg0 = TREE_VALUE (arglist);
13003 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13004 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13005 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13006 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13007 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13008 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13010 if (VECTOR_MODE_P (mode0))
13011 op0 = safe_vector_operand (op0, mode0);
13012 if (VECTOR_MODE_P (mode1))
13013 op1 = safe_vector_operand (op1, mode1);
13016 || GET_MODE (target) != tmode
13017 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13018 target = gen_reg_rtx (tmode);
13020 if (GET_MODE (op1) == SImode && mode1 == TImode)
13022 rtx x = gen_reg_rtx (V4SImode);
13023 emit_insn (gen_sse2_loadd (x, op1));
13024 op1 = gen_lowpart (TImode, x);
13027 /* In case the insn wants input operands in modes different from
13028 the result, abort. */
13029 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13030 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13033 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13034 op0 = copy_to_mode_reg (mode0, op0);
13035 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13036 op1 = copy_to_mode_reg (mode1, op1);
13038 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13039 yet one of the two must not be a memory. This is normally enforced
13040 by expanders, but we didn't bother to create one here. */
13041 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13042 op0 = copy_to_mode_reg (mode0, op0);
13044 pat = GEN_FCN (icode) (target, op0, op1);
13051 /* Subroutine of ix86_expand_builtin to take care of stores. */
13054 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13057 tree arg0 = TREE_VALUE (arglist);
13058 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13059 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13060 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13061 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13062 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13064 if (VECTOR_MODE_P (mode1))
13065 op1 = safe_vector_operand (op1, mode1);
13067 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13068 op1 = copy_to_mode_reg (mode1, op1);
13070 pat = GEN_FCN (icode) (op0, op1);
13076 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13079 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13080 rtx target, int do_load)
13083 tree arg0 = TREE_VALUE (arglist);
13084 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13085 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13086 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13089 || GET_MODE (target) != tmode
13090 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13091 target = gen_reg_rtx (tmode);
13093 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13096 if (VECTOR_MODE_P (mode0))
13097 op0 = safe_vector_operand (op0, mode0);
13099 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13100 op0 = copy_to_mode_reg (mode0, op0);
13103 pat = GEN_FCN (icode) (target, op0);
13110 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13111 sqrtss, rsqrtss, rcpss. */
13114 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13117 tree arg0 = TREE_VALUE (arglist);
13118 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13119 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13120 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13123 || GET_MODE (target) != tmode
13124 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13125 target = gen_reg_rtx (tmode);
13127 if (VECTOR_MODE_P (mode0))
13128 op0 = safe_vector_operand (op0, mode0);
13130 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13131 op0 = copy_to_mode_reg (mode0, op0);
13134 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13135 op1 = copy_to_mode_reg (mode0, op1);
13137 pat = GEN_FCN (icode) (target, op0, op1);
13144 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13147 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13151 tree arg0 = TREE_VALUE (arglist);
13152 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13153 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13154 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13156 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13157 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13158 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13159 enum rtx_code comparison = d->comparison;
13161 if (VECTOR_MODE_P (mode0))
13162 op0 = safe_vector_operand (op0, mode0);
13163 if (VECTOR_MODE_P (mode1))
13164 op1 = safe_vector_operand (op1, mode1);
13166 /* Swap operands if we have a comparison that isn't available in
13170 rtx tmp = gen_reg_rtx (mode1);
13171 emit_move_insn (tmp, op1);
13177 || GET_MODE (target) != tmode
13178 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13179 target = gen_reg_rtx (tmode);
13181 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13182 op0 = copy_to_mode_reg (mode0, op0);
13183 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13184 op1 = copy_to_mode_reg (mode1, op1);
13186 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13187 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13194 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13197 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13201 tree arg0 = TREE_VALUE (arglist);
13202 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13203 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13204 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13206 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13207 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13208 enum rtx_code comparison = d->comparison;
13210 if (VECTOR_MODE_P (mode0))
13211 op0 = safe_vector_operand (op0, mode0);
13212 if (VECTOR_MODE_P (mode1))
13213 op1 = safe_vector_operand (op1, mode1);
13215 /* Swap operands if we have a comparison that isn't available in
13224 target = gen_reg_rtx (SImode);
13225 emit_move_insn (target, const0_rtx);
13226 target = gen_rtx_SUBREG (QImode, target, 0);
13228 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13229 op0 = copy_to_mode_reg (mode0, op0);
13230 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13231 op1 = copy_to_mode_reg (mode1, op1);
13233 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13234 pat = GEN_FCN (d->icode) (op0, op1);
13238 emit_insn (gen_rtx_SET (VOIDmode,
13239 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13240 gen_rtx_fmt_ee (comparison, QImode,
13244 return SUBREG_REG (target);
13247 /* Expand an expression EXP that calls a built-in function,
13248 with result going to TARGET if that's convenient
13249 (and in mode MODE if that's convenient).
13250 SUBTARGET may be used as the target for computing one of EXP's operands.
13251 IGNORE is nonzero if the value is to be ignored. */
13254 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13255 enum machine_mode mode ATTRIBUTE_UNUSED,
13256 int ignore ATTRIBUTE_UNUSED)
13258 const struct builtin_description *d;
13260 enum insn_code icode;
13261 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13262 tree arglist = TREE_OPERAND (exp, 1);
13263 tree arg0, arg1, arg2;
13264 rtx op0, op1, op2, pat;
13265 enum machine_mode tmode, mode0, mode1, mode2;
13266 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13270 case IX86_BUILTIN_EMMS:
13271 emit_insn (gen_emms ());
13274 case IX86_BUILTIN_SFENCE:
13275 emit_insn (gen_sfence ());
13278 case IX86_BUILTIN_PEXTRW:
13279 case IX86_BUILTIN_PEXTRW128:
13280 icode = (fcode == IX86_BUILTIN_PEXTRW
13281 ? CODE_FOR_mmx_pextrw
13282 : CODE_FOR_sse2_pextrw);
13283 arg0 = TREE_VALUE (arglist);
13284 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13285 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13286 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13287 tmode = insn_data[icode].operand[0].mode;
13288 mode0 = insn_data[icode].operand[1].mode;
13289 mode1 = insn_data[icode].operand[2].mode;
13291 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13292 op0 = copy_to_mode_reg (mode0, op0);
13293 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13295 error ("selector must be an integer constant in the range 0..%i",
13296 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13297 return gen_reg_rtx (tmode);
13300 || GET_MODE (target) != tmode
13301 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13302 target = gen_reg_rtx (tmode);
13303 pat = GEN_FCN (icode) (target, op0, op1);
13309 case IX86_BUILTIN_PINSRW:
13310 case IX86_BUILTIN_PINSRW128:
13311 icode = (fcode == IX86_BUILTIN_PINSRW
13312 ? CODE_FOR_mmx_pinsrw
13313 : CODE_FOR_sse2_pinsrw);
13314 arg0 = TREE_VALUE (arglist);
13315 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13316 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13317 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13318 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13319 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13320 tmode = insn_data[icode].operand[0].mode;
13321 mode0 = insn_data[icode].operand[1].mode;
13322 mode1 = insn_data[icode].operand[2].mode;
13323 mode2 = insn_data[icode].operand[3].mode;
13325 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13326 op0 = copy_to_mode_reg (mode0, op0);
13327 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13328 op1 = copy_to_mode_reg (mode1, op1);
13329 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13331 error ("selector must be an integer constant in the range 0..%i",
13332 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13336 || GET_MODE (target) != tmode
13337 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13338 target = gen_reg_rtx (tmode);
13339 pat = GEN_FCN (icode) (target, op0, op1, op2);
13345 case IX86_BUILTIN_MASKMOVQ:
13346 case IX86_BUILTIN_MASKMOVDQU:
13347 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13348 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13349 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13350 : CODE_FOR_sse2_maskmovdqu));
13351 /* Note the arg order is different from the operand order. */
13352 arg1 = TREE_VALUE (arglist);
13353 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13354 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13355 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13356 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13357 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13358 mode0 = insn_data[icode].operand[0].mode;
13359 mode1 = insn_data[icode].operand[1].mode;
13360 mode2 = insn_data[icode].operand[2].mode;
13362 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13363 op0 = copy_to_mode_reg (mode0, op0);
13364 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13365 op1 = copy_to_mode_reg (mode1, op1);
13366 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13367 op2 = copy_to_mode_reg (mode2, op2);
13368 pat = GEN_FCN (icode) (op0, op1, op2);
13374 case IX86_BUILTIN_SQRTSS:
13375 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13376 case IX86_BUILTIN_RSQRTSS:
13377 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13378 case IX86_BUILTIN_RCPSS:
13379 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13381 case IX86_BUILTIN_LOADAPS:
13382 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13384 case IX86_BUILTIN_LOADUPS:
13385 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13387 case IX86_BUILTIN_STOREAPS:
13388 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13390 case IX86_BUILTIN_STOREUPS:
13391 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13393 case IX86_BUILTIN_LOADSS:
13394 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13396 case IX86_BUILTIN_STORESS:
13397 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13399 case IX86_BUILTIN_LOADHPS:
13400 case IX86_BUILTIN_LOADLPS:
13401 case IX86_BUILTIN_LOADHPD:
13402 case IX86_BUILTIN_LOADLPD:
13403 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13404 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13405 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
13406 : CODE_FOR_sse2_loadlpd);
13407 arg0 = TREE_VALUE (arglist);
13408 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13409 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13410 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13411 tmode = insn_data[icode].operand[0].mode;
13412 mode0 = insn_data[icode].operand[1].mode;
13413 mode1 = insn_data[icode].operand[2].mode;
13415 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13416 op0 = copy_to_mode_reg (mode0, op0);
13417 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13419 || GET_MODE (target) != tmode
13420 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13421 target = gen_reg_rtx (tmode);
13422 pat = GEN_FCN (icode) (target, op0, op1);
13428 case IX86_BUILTIN_STOREHPS:
13429 case IX86_BUILTIN_STORELPS:
13430 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13431 : CODE_FOR_sse_movlps);
13432 arg0 = TREE_VALUE (arglist);
13433 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13434 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13435 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13436 mode0 = insn_data[icode].operand[1].mode;
13437 mode1 = insn_data[icode].operand[2].mode;
13439 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13440 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13441 op1 = copy_to_mode_reg (mode1, op1);
13443 pat = GEN_FCN (icode) (op0, op0, op1);
13449 case IX86_BUILTIN_STOREHPD:
13450 case IX86_BUILTIN_STORELPD:
13451 icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
13452 : CODE_FOR_sse2_storelpd);
13453 arg0 = TREE_VALUE (arglist);
13454 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13455 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13456 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13457 mode0 = insn_data[icode].operand[0].mode;
13458 mode1 = insn_data[icode].operand[1].mode;
13460 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13461 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13462 op1 = copy_to_mode_reg (mode1, op1);
13464 pat = GEN_FCN (icode) (op0, op1);
13470 case IX86_BUILTIN_MOVNTPS:
13471 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13472 case IX86_BUILTIN_MOVNTQ:
13473 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13475 case IX86_BUILTIN_LDMXCSR:
13476 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13477 target = assign_386_stack_local (SImode, 0);
13478 emit_move_insn (target, op0);
13479 emit_insn (gen_ldmxcsr (target));
13482 case IX86_BUILTIN_STMXCSR:
13483 target = assign_386_stack_local (SImode, 0);
13484 emit_insn (gen_stmxcsr (target));
13485 return copy_to_mode_reg (SImode, target);
13487 case IX86_BUILTIN_SHUFPS:
13488 case IX86_BUILTIN_SHUFPD:
13489 icode = (fcode == IX86_BUILTIN_SHUFPS
13490 ? CODE_FOR_sse_shufps
13491 : CODE_FOR_sse2_shufpd);
13492 arg0 = TREE_VALUE (arglist);
13493 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13494 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13495 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13496 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13497 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13498 tmode = insn_data[icode].operand[0].mode;
13499 mode0 = insn_data[icode].operand[1].mode;
13500 mode1 = insn_data[icode].operand[2].mode;
13501 mode2 = insn_data[icode].operand[3].mode;
13503 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13504 op0 = copy_to_mode_reg (mode0, op0);
13505 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13506 op1 = copy_to_mode_reg (mode1, op1);
13507 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13509 /* @@@ better error message */
13510 error ("mask must be an immediate");
13511 return gen_reg_rtx (tmode);
13514 || GET_MODE (target) != tmode
13515 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13516 target = gen_reg_rtx (tmode);
13517 pat = GEN_FCN (icode) (target, op0, op1, op2);
13523 case IX86_BUILTIN_PSHUFW:
13524 case IX86_BUILTIN_PSHUFD:
13525 case IX86_BUILTIN_PSHUFHW:
13526 case IX86_BUILTIN_PSHUFLW:
13527 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13528 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13529 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13530 : CODE_FOR_mmx_pshufw);
13531 arg0 = TREE_VALUE (arglist);
13532 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13533 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13534 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13535 tmode = insn_data[icode].operand[0].mode;
13536 mode1 = insn_data[icode].operand[1].mode;
13537 mode2 = insn_data[icode].operand[2].mode;
13539 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13540 op0 = copy_to_mode_reg (mode1, op0);
13541 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13543 /* @@@ better error message */
13544 error ("mask must be an immediate");
13548 || GET_MODE (target) != tmode
13549 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13550 target = gen_reg_rtx (tmode);
13551 pat = GEN_FCN (icode) (target, op0, op1);
13557 case IX86_BUILTIN_PSLLDQI128:
13558 case IX86_BUILTIN_PSRLDQI128:
13559 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13560 : CODE_FOR_sse2_lshrti3);
13561 arg0 = TREE_VALUE (arglist);
13562 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13563 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13564 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13565 tmode = insn_data[icode].operand[0].mode;
13566 mode1 = insn_data[icode].operand[1].mode;
13567 mode2 = insn_data[icode].operand[2].mode;
13569 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13571 op0 = copy_to_reg (op0);
13572 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13574 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13576 error ("shift must be an immediate");
13579 target = gen_reg_rtx (V2DImode);
13580 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13586 case IX86_BUILTIN_FEMMS:
13587 emit_insn (gen_femms ());
13590 case IX86_BUILTIN_PAVGUSB:
13591 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13593 case IX86_BUILTIN_PF2ID:
13594 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13596 case IX86_BUILTIN_PFACC:
13597 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13599 case IX86_BUILTIN_PFADD:
13600 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13602 case IX86_BUILTIN_PFCMPEQ:
13603 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13605 case IX86_BUILTIN_PFCMPGE:
13606 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13608 case IX86_BUILTIN_PFCMPGT:
13609 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13611 case IX86_BUILTIN_PFMAX:
13612 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13614 case IX86_BUILTIN_PFMIN:
13615 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13617 case IX86_BUILTIN_PFMUL:
13618 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13620 case IX86_BUILTIN_PFRCP:
13621 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13623 case IX86_BUILTIN_PFRCPIT1:
13624 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13626 case IX86_BUILTIN_PFRCPIT2:
13627 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13629 case IX86_BUILTIN_PFRSQIT1:
13630 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13632 case IX86_BUILTIN_PFRSQRT:
13633 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13635 case IX86_BUILTIN_PFSUB:
13636 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13638 case IX86_BUILTIN_PFSUBR:
13639 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13641 case IX86_BUILTIN_PI2FD:
13642 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13644 case IX86_BUILTIN_PMULHRW:
13645 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13647 case IX86_BUILTIN_PF2IW:
13648 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13650 case IX86_BUILTIN_PFNACC:
13651 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13653 case IX86_BUILTIN_PFPNACC:
13654 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13656 case IX86_BUILTIN_PI2FW:
13657 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13659 case IX86_BUILTIN_PSWAPDSI:
13660 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13662 case IX86_BUILTIN_PSWAPDSF:
13663 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13665 case IX86_BUILTIN_SSE_ZERO:
13666 target = gen_reg_rtx (V4SFmode);
13667 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13670 case IX86_BUILTIN_MMX_ZERO:
13671 target = gen_reg_rtx (DImode);
13672 emit_insn (gen_mmx_clrdi (target));
13675 case IX86_BUILTIN_CLRTI:
13676 target = gen_reg_rtx (V2DImode);
13677 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13681 case IX86_BUILTIN_SQRTSD:
13682 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13683 case IX86_BUILTIN_LOADAPD:
13684 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13685 case IX86_BUILTIN_LOADUPD:
13686 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13688 case IX86_BUILTIN_STOREAPD:
13689 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13690 case IX86_BUILTIN_STOREUPD:
13691 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13693 case IX86_BUILTIN_LOADSD:
13694 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13696 case IX86_BUILTIN_STORESD:
13697 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13699 case IX86_BUILTIN_SETPD1:
13700 target = assign_386_stack_local (DFmode, 0);
13701 arg0 = TREE_VALUE (arglist);
13702 emit_move_insn (adjust_address (target, DFmode, 0),
13703 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13704 op0 = gen_reg_rtx (V2DFmode);
13705 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13706 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13709 case IX86_BUILTIN_SETPD:
13710 target = assign_386_stack_local (V2DFmode, 0);
13711 arg0 = TREE_VALUE (arglist);
13712 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13713 emit_move_insn (adjust_address (target, DFmode, 0),
13714 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13715 emit_move_insn (adjust_address (target, DFmode, 8),
13716 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13717 op0 = gen_reg_rtx (V2DFmode);
13718 emit_insn (gen_sse2_movapd (op0, target));
13721 case IX86_BUILTIN_LOADRPD:
13722 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13723 gen_reg_rtx (V2DFmode), 1);
13724 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13727 case IX86_BUILTIN_LOADPD1:
13728 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13729 gen_reg_rtx (V2DFmode), 1);
13730 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13733 case IX86_BUILTIN_STOREPD1:
13734 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13735 case IX86_BUILTIN_STORERPD:
13736 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13738 case IX86_BUILTIN_CLRPD:
13739 target = gen_reg_rtx (V2DFmode);
13740 emit_insn (gen_sse_clrv2df (target));
13743 case IX86_BUILTIN_MFENCE:
13744 emit_insn (gen_sse2_mfence ());
13746 case IX86_BUILTIN_LFENCE:
13747 emit_insn (gen_sse2_lfence ());
13750 case IX86_BUILTIN_CLFLUSH:
13751 arg0 = TREE_VALUE (arglist);
13752 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13753 icode = CODE_FOR_sse2_clflush;
13754 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13755 op0 = copy_to_mode_reg (Pmode, op0);
13757 emit_insn (gen_sse2_clflush (op0));
13760 case IX86_BUILTIN_MOVNTPD:
13761 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13762 case IX86_BUILTIN_MOVNTDQ:
13763 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13764 case IX86_BUILTIN_MOVNTI:
13765 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13767 case IX86_BUILTIN_LOADDQA:
13768 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13769 case IX86_BUILTIN_LOADDQU:
13770 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13771 case IX86_BUILTIN_LOADD:
13772 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13774 case IX86_BUILTIN_STOREDQA:
13775 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13776 case IX86_BUILTIN_STOREDQU:
13777 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13778 case IX86_BUILTIN_STORED:
13779 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13781 case IX86_BUILTIN_MONITOR:
13782 arg0 = TREE_VALUE (arglist);
13783 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13784 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13785 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13786 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13787 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13789 op0 = copy_to_mode_reg (SImode, op0);
13791 op1 = copy_to_mode_reg (SImode, op1);
13793 op2 = copy_to_mode_reg (SImode, op2);
13794 emit_insn (gen_monitor (op0, op1, op2));
13797 case IX86_BUILTIN_MWAIT:
13798 arg0 = TREE_VALUE (arglist);
13799 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13800 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13801 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13803 op0 = copy_to_mode_reg (SImode, op0);
13805 op1 = copy_to_mode_reg (SImode, op1);
13806 emit_insn (gen_mwait (op0, op1));
13809 case IX86_BUILTIN_LOADDDUP:
13810 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13812 case IX86_BUILTIN_LDDQU:
13813 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13820 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13821 if (d->code == fcode)
13823 /* Compares are treated specially. */
13824 if (d->icode == CODE_FOR_maskcmpv4sf3
13825 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13826 || d->icode == CODE_FOR_maskncmpv4sf3
13827 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13828 || d->icode == CODE_FOR_maskcmpv2df3
13829 || d->icode == CODE_FOR_vmmaskcmpv2df3
13830 || d->icode == CODE_FOR_maskncmpv2df3
13831 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13832 return ix86_expand_sse_compare (d, arglist, target);
13834 return ix86_expand_binop_builtin (d->icode, arglist, target);
13837 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13838 if (d->code == fcode)
13839 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13841 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13842 if (d->code == fcode)
13843 return ix86_expand_sse_comi (d, arglist, target);
13845 /* @@@ Should really do something sensible here. */
13849 /* Store OPERAND to the memory after reload is completed. This means
13850 that we can't easily use assign_stack_local. */
13852 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13855 if (!reload_completed)
13857 if (TARGET_RED_ZONE)
13859 result = gen_rtx_MEM (mode,
13860 gen_rtx_PLUS (Pmode,
13862 GEN_INT (-RED_ZONE_SIZE)));
13863 emit_move_insn (result, operand);
13865 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13871 operand = gen_lowpart (DImode, operand);
13875 gen_rtx_SET (VOIDmode,
13876 gen_rtx_MEM (DImode,
13877 gen_rtx_PRE_DEC (DImode,
13878 stack_pointer_rtx)),
13884 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13893 split_di (&operand, 1, operands, operands + 1);
13895 gen_rtx_SET (VOIDmode,
13896 gen_rtx_MEM (SImode,
13897 gen_rtx_PRE_DEC (Pmode,
13898 stack_pointer_rtx)),
13901 gen_rtx_SET (VOIDmode,
13902 gen_rtx_MEM (SImode,
13903 gen_rtx_PRE_DEC (Pmode,
13904 stack_pointer_rtx)),
13909 /* It is better to store HImodes as SImodes. */
13910 if (!TARGET_PARTIAL_REG_STALL)
13911 operand = gen_lowpart (SImode, operand);
13915 gen_rtx_SET (VOIDmode,
13916 gen_rtx_MEM (GET_MODE (operand),
13917 gen_rtx_PRE_DEC (SImode,
13918 stack_pointer_rtx)),
13924 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13929 /* Free operand from the memory. */
13931 ix86_free_from_memory (enum machine_mode mode)
13933 if (!TARGET_RED_ZONE)
13937 if (mode == DImode || TARGET_64BIT)
13939 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13943 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13944 to pop or add instruction if registers are available. */
13945 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13946 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13951 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13952 QImode must go into class Q_REGS.
13953 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13954 movdf to do mem-to-mem moves through integer regs. */
13956 ix86_preferred_reload_class (rtx x, enum reg_class class)
13958 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13960 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13962 /* SSE can't load any constant directly yet. */
13963 if (SSE_CLASS_P (class))
13965 /* Floats can load 0 and 1. */
13966 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13968 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13969 if (MAYBE_SSE_CLASS_P (class))
13970 return (reg_class_subset_p (class, GENERAL_REGS)
13971 ? GENERAL_REGS : FLOAT_REGS);
13975 /* General regs can load everything. */
13976 if (reg_class_subset_p (class, GENERAL_REGS))
13977 return GENERAL_REGS;
13978 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13979 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13982 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13984 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13989 /* If we are copying between general and FP registers, we need a memory
13990 location. The same is true for SSE and MMX registers.
13992 The macro can't work reliably when one of the CLASSES is class containing
13993 registers from multiple units (SSE, MMX, integer). We avoid this by never
13994 combining those units in single alternative in the machine description.
13995 Ensure that this constraint holds to avoid unexpected surprises.
13997 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13998 enforce these sanity checks. */
14000 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14001 enum machine_mode mode, int strict)
14003 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14004 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14005 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14006 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14007 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14008 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14015 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14016 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14017 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14018 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14019 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14021 /* Return the cost of moving data from a register in class CLASS1 to
14022 one in class CLASS2.
14024 It is not required that the cost always equal 2 when FROM is the same as TO;
14025 on some machines it is expensive to move between registers if they are not
14026 general registers. */
14028 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14029 enum reg_class class2)
14031 /* In case we require secondary memory, compute cost of the store followed
14032 by load. In order to avoid bad register allocation choices, we need
14033 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14035 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14039 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14040 MEMORY_MOVE_COST (mode, class1, 1));
14041 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14042 MEMORY_MOVE_COST (mode, class2, 1));
14044 /* In case of copying from general_purpose_register we may emit multiple
14045 stores followed by single load causing memory size mismatch stall.
14046 Count this as arbitrarily high cost of 20. */
14047 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14050 /* In the case of FP/MMX moves, the registers actually overlap, and we
14051 have to switch modes in order to treat them differently. */
14052 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14053 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14059 /* Moves between SSE/MMX and integer unit are expensive. */
14060 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14061 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14062 return ix86_cost->mmxsse_to_integer;
14063 if (MAYBE_FLOAT_CLASS_P (class1))
14064 return ix86_cost->fp_move;
14065 if (MAYBE_SSE_CLASS_P (class1))
14066 return ix86_cost->sse_move;
14067 if (MAYBE_MMX_CLASS_P (class1))
14068 return ix86_cost->mmx_move;
14072 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14074 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14076 /* Flags and only flags can only hold CCmode values. */
14077 if (CC_REGNO_P (regno))
14078 return GET_MODE_CLASS (mode) == MODE_CC;
14079 if (GET_MODE_CLASS (mode) == MODE_CC
14080 || GET_MODE_CLASS (mode) == MODE_RANDOM
14081 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14083 if (FP_REGNO_P (regno))
14084 return VALID_FP_MODE_P (mode);
14085 if (SSE_REGNO_P (regno))
14087 /* We implement the move patterns for all vector modes into and
14088 out of SSE registers, even when no operation instructions
14090 return (VALID_SSE_REG_MODE (mode)
14091 || VALID_SSE2_REG_MODE (mode)
14092 || VALID_MMX_REG_MODE (mode)
14093 || VALID_MMX_REG_MODE_3DNOW (mode));
14095 if (MMX_REGNO_P (regno))
14097 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14098 so if the register is available at all, then we can move data of
14099 the given mode into or out of it. */
14100 return (VALID_MMX_REG_MODE (mode)
14101 || VALID_MMX_REG_MODE_3DNOW (mode));
14103 /* We handle both integer and floats in the general purpose registers.
14104 In future we should be able to handle vector modes as well. */
14105 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14107 /* Take care for QImode values - they can be in non-QI regs, but then
14108 they do cause partial register stalls. */
14109 if (regno < 4 || mode != QImode || TARGET_64BIT)
14111 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14114 /* Return the cost of moving data of mode M between a
14115 register and memory. A value of 2 is the default; this cost is
14116 relative to those in `REGISTER_MOVE_COST'.
14118 If moving between registers and memory is more expensive than
14119 between two registers, you should define this macro to express the
14122 Model also increased moving costs of QImode registers in non
14126 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14128 if (FLOAT_CLASS_P (class))
14145 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14147 if (SSE_CLASS_P (class))
14150 switch (GET_MODE_SIZE (mode))
14164 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14166 if (MMX_CLASS_P (class))
14169 switch (GET_MODE_SIZE (mode))
14180 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14182 switch (GET_MODE_SIZE (mode))
14186 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14187 : ix86_cost->movzbl_load);
14189 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14190 : ix86_cost->int_store[0] + 4);
14193 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14195 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14196 if (mode == TFmode)
14198 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14199 * (((int) GET_MODE_SIZE (mode)
14200 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14204 /* Compute a (partial) cost for rtx X. Return true if the complete
14205 cost has been computed, and false if subexpressions should be
14206 scanned. In either case, *TOTAL contains the cost result. */
14209 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14211 enum machine_mode mode = GET_MODE (x);
14219 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
14221 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
14223 else if (flag_pic && SYMBOLIC_CONST (x)
14225 || (!GET_CODE (x) != LABEL_REF
14226 && (GET_CODE (x) != SYMBOL_REF
14227 || !SYMBOL_REF_LOCAL_P (x)))))
14234 if (mode == VOIDmode)
14237 switch (standard_80387_constant_p (x))
14242 default: /* Other constants */
14247 /* Start with (MEM (SYMBOL_REF)), since that's where
14248 it'll probably end up. Add a penalty for size. */
14249 *total = (COSTS_N_INSNS (1)
14250 + (flag_pic != 0 && !TARGET_64BIT)
14251 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14257 /* The zero extensions is often completely free on x86_64, so make
14258 it as cheap as possible. */
14259 if (TARGET_64BIT && mode == DImode
14260 && GET_MODE (XEXP (x, 0)) == SImode)
14262 else if (TARGET_ZERO_EXTEND_WITH_AND)
14263 *total = COSTS_N_INSNS (ix86_cost->add);
14265 *total = COSTS_N_INSNS (ix86_cost->movzx);
14269 *total = COSTS_N_INSNS (ix86_cost->movsx);
14273 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14274 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14276 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14279 *total = COSTS_N_INSNS (ix86_cost->add);
14282 if ((value == 2 || value == 3)
14283 && ix86_cost->lea <= ix86_cost->shift_const)
14285 *total = COSTS_N_INSNS (ix86_cost->lea);
14295 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14297 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14299 if (INTVAL (XEXP (x, 1)) > 32)
14300 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14302 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14306 if (GET_CODE (XEXP (x, 1)) == AND)
14307 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14309 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14314 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14315 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14317 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14322 if (FLOAT_MODE_P (mode))
14324 *total = COSTS_N_INSNS (ix86_cost->fmul);
14329 rtx op0 = XEXP (x, 0);
14330 rtx op1 = XEXP (x, 1);
14332 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14334 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14335 for (nbits = 0; value != 0; value &= value - 1)
14339 /* This is arbitrary. */
14342 /* Compute costs correctly for widening multiplication. */
14343 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14344 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14345 == GET_MODE_SIZE (mode))
14347 int is_mulwiden = 0;
14348 enum machine_mode inner_mode = GET_MODE (op0);
14350 if (GET_CODE (op0) == GET_CODE (op1))
14351 is_mulwiden = 1, op1 = XEXP (op1, 0);
14352 else if (GET_CODE (op1) == CONST_INT)
14354 if (GET_CODE (op0) == SIGN_EXTEND)
14355 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14358 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14362 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14365 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14366 + nbits * ix86_cost->mult_bit)
14367 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14376 if (FLOAT_MODE_P (mode))
14377 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14379 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14383 if (FLOAT_MODE_P (mode))
14384 *total = COSTS_N_INSNS (ix86_cost->fadd);
14385 else if (GET_MODE_CLASS (mode) == MODE_INT
14386 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14388 if (GET_CODE (XEXP (x, 0)) == PLUS
14389 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14390 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14391 && CONSTANT_P (XEXP (x, 1)))
14393 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14394 if (val == 2 || val == 4 || val == 8)
14396 *total = COSTS_N_INSNS (ix86_cost->lea);
14397 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14398 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14400 *total += rtx_cost (XEXP (x, 1), outer_code);
14404 else if (GET_CODE (XEXP (x, 0)) == MULT
14405 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14407 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14408 if (val == 2 || val == 4 || val == 8)
14410 *total = COSTS_N_INSNS (ix86_cost->lea);
14411 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14412 *total += rtx_cost (XEXP (x, 1), outer_code);
14416 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14418 *total = COSTS_N_INSNS (ix86_cost->lea);
14419 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14420 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14421 *total += rtx_cost (XEXP (x, 1), outer_code);
14428 if (FLOAT_MODE_P (mode))
14430 *total = COSTS_N_INSNS (ix86_cost->fadd);
14438 if (!TARGET_64BIT && mode == DImode)
14440 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14441 + (rtx_cost (XEXP (x, 0), outer_code)
14442 << (GET_MODE (XEXP (x, 0)) != DImode))
14443 + (rtx_cost (XEXP (x, 1), outer_code)
14444 << (GET_MODE (XEXP (x, 1)) != DImode)));
14450 if (FLOAT_MODE_P (mode))
14452 *total = COSTS_N_INSNS (ix86_cost->fchs);
14458 if (!TARGET_64BIT && mode == DImode)
14459 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14461 *total = COSTS_N_INSNS (ix86_cost->add);
14465 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
14466 && XEXP (XEXP (x, 0), 1) == const1_rtx
14467 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
14468 && XEXP (x, 1) == const0_rtx)
14470 /* This kind of construct is implemented using test[bwl].
14471 Treat it as if we had an AND. */
14472 *total = (COSTS_N_INSNS (ix86_cost->add)
14473 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
14474 + rtx_cost (const1_rtx, outer_code));
14480 if (!TARGET_SSE_MATH
14482 || (mode == DFmode && !TARGET_SSE2))
14487 if (FLOAT_MODE_P (mode))
14488 *total = COSTS_N_INSNS (ix86_cost->fabs);
14492 if (FLOAT_MODE_P (mode))
14493 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14497 if (XINT (x, 1) == UNSPEC_TP)
14508 static int current_machopic_label_num;
14510 /* Given a symbol name and its associated stub, write out the
14511 definition of the stub. */
14514 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14516 unsigned int length;
14517 char *binder_name, *symbol_name, lazy_ptr_name[32];
14518 int label = ++current_machopic_label_num;
14520 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14521 symb = (*targetm.strip_name_encoding) (symb);
14523 length = strlen (stub);
14524 binder_name = alloca (length + 32);
14525 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14527 length = strlen (symb);
14528 symbol_name = alloca (length + 32);
14529 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14531 sprintf (lazy_ptr_name, "L%d$lz", label);
14534 machopic_picsymbol_stub_section ();
14536 machopic_symbol_stub_section ();
14538 fprintf (file, "%s:\n", stub);
14539 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14543 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14544 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14545 fprintf (file, "\tjmp %%edx\n");
14548 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14550 fprintf (file, "%s:\n", binder_name);
14554 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14555 fprintf (file, "\tpushl %%eax\n");
14558 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14560 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14562 machopic_lazy_symbol_ptr_section ();
14563 fprintf (file, "%s:\n", lazy_ptr_name);
14564 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14565 fprintf (file, "\t.long %s\n", binder_name);
14567 #endif /* TARGET_MACHO */
14569 /* Order the registers for register allocator. */
14572 x86_order_regs_for_local_alloc (void)
14577 /* First allocate the local general purpose registers. */
14578 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14579 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14580 reg_alloc_order [pos++] = i;
14582 /* Global general purpose registers. */
14583 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14584 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14585 reg_alloc_order [pos++] = i;
14587 /* x87 registers come first in case we are doing FP math
14589 if (!TARGET_SSE_MATH)
14590 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14591 reg_alloc_order [pos++] = i;
14593 /* SSE registers. */
14594 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14595 reg_alloc_order [pos++] = i;
14596 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14597 reg_alloc_order [pos++] = i;
14599 /* x87 registers. */
14600 if (TARGET_SSE_MATH)
14601 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14602 reg_alloc_order [pos++] = i;
14604 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14605 reg_alloc_order [pos++] = i;
14607 /* Initialize the rest of array as we do not allocate some registers
14609 while (pos < FIRST_PSEUDO_REGISTER)
14610 reg_alloc_order [pos++] = 0;
14613 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14614 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14617 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14618 struct attribute_spec.handler. */
14620 ix86_handle_struct_attribute (tree *node, tree name,
14621 tree args ATTRIBUTE_UNUSED,
14622 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14625 if (DECL_P (*node))
14627 if (TREE_CODE (*node) == TYPE_DECL)
14628 type = &TREE_TYPE (*node);
14633 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14634 || TREE_CODE (*type) == UNION_TYPE)))
14636 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
14637 *no_add_attrs = true;
14640 else if ((is_attribute_p ("ms_struct", name)
14641 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14642 || ((is_attribute_p ("gcc_struct", name)
14643 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14645 warning ("%qs incompatible attribute ignored",
14646 IDENTIFIER_POINTER (name));
14647 *no_add_attrs = true;
14654 ix86_ms_bitfield_layout_p (tree record_type)
14656 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14657 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14658 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14661 /* Returns an expression indicating where the this parameter is
14662 located on entry to the FUNCTION. */
14665 x86_this_parameter (tree function)
14667 tree type = TREE_TYPE (function);
14671 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14672 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14675 if (ix86_function_regparm (type, function) > 0)
14679 parm = TYPE_ARG_TYPES (type);
14680 /* Figure out whether or not the function has a variable number of
14682 for (; parm; parm = TREE_CHAIN (parm))
14683 if (TREE_VALUE (parm) == void_type_node)
14685 /* If not, the this parameter is in the first argument. */
14689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14691 return gen_rtx_REG (SImode, regno);
14695 if (aggregate_value_p (TREE_TYPE (type), type))
14696 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14698 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14701 /* Determine whether x86_output_mi_thunk can succeed. */
14704 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14705 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14706 HOST_WIDE_INT vcall_offset, tree function)
14708 /* 64-bit can handle anything. */
14712 /* For 32-bit, everything's fine if we have one free register. */
14713 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14716 /* Need a free register for vcall_offset. */
14720 /* Need a free register for GOT references. */
14721 if (flag_pic && !(*targetm.binds_local_p) (function))
14724 /* Otherwise ok. */
14728 /* Output the assembler code for a thunk function. THUNK_DECL is the
14729 declaration for the thunk function itself, FUNCTION is the decl for
14730 the target function. DELTA is an immediate constant offset to be
14731 added to THIS. If VCALL_OFFSET is nonzero, the word at
14732 *(*this + vcall_offset) should be added to THIS. */
14735 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14736 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14737 HOST_WIDE_INT vcall_offset, tree function)
14740 rtx this = x86_this_parameter (function);
14743 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14744 pull it in now and let DELTA benefit. */
14747 else if (vcall_offset)
14749 /* Put the this parameter into %eax. */
14751 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14752 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14755 this_reg = NULL_RTX;
14757 /* Adjust the this parameter by a fixed constant. */
14760 xops[0] = GEN_INT (delta);
14761 xops[1] = this_reg ? this_reg : this;
14764 if (!x86_64_general_operand (xops[0], DImode))
14766 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14768 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14772 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14775 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14778 /* Adjust the this parameter by a value stored in the vtable. */
14782 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14785 int tmp_regno = 2 /* ECX */;
14786 if (lookup_attribute ("fastcall",
14787 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14788 tmp_regno = 0 /* EAX */;
14789 tmp = gen_rtx_REG (SImode, tmp_regno);
14792 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14795 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14797 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14799 /* Adjust the this parameter. */
14800 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14801 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14803 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14804 xops[0] = GEN_INT (vcall_offset);
14806 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14807 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14809 xops[1] = this_reg;
14811 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14813 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14816 /* If necessary, drop THIS back to its stack slot. */
14817 if (this_reg && this_reg != this)
14819 xops[0] = this_reg;
14821 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14824 xops[0] = XEXP (DECL_RTL (function), 0);
14827 if (!flag_pic || (*targetm.binds_local_p) (function))
14828 output_asm_insn ("jmp\t%P0", xops);
14831 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14832 tmp = gen_rtx_CONST (Pmode, tmp);
14833 tmp = gen_rtx_MEM (QImode, tmp);
14835 output_asm_insn ("jmp\t%A0", xops);
14840 if (!flag_pic || (*targetm.binds_local_p) (function))
14841 output_asm_insn ("jmp\t%P0", xops);
14846 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14847 tmp = (gen_rtx_SYMBOL_REF
14849 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14850 tmp = gen_rtx_MEM (QImode, tmp);
14852 output_asm_insn ("jmp\t%0", xops);
14855 #endif /* TARGET_MACHO */
14857 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14858 output_set_got (tmp);
14861 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14862 output_asm_insn ("jmp\t{*}%1", xops);
14868 x86_file_start (void)
14870 default_file_start ();
14871 if (X86_FILE_START_VERSION_DIRECTIVE)
14872 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14873 if (X86_FILE_START_FLTUSED)
14874 fputs ("\t.global\t__fltused\n", asm_out_file);
14875 if (ix86_asm_dialect == ASM_INTEL)
14876 fputs ("\t.intel_syntax\n", asm_out_file);
14880 x86_field_alignment (tree field, int computed)
14882 enum machine_mode mode;
14883 tree type = TREE_TYPE (field);
14885 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14887 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14888 ? get_inner_array_type (type) : type);
14889 if (mode == DFmode || mode == DCmode
14890 || GET_MODE_CLASS (mode) == MODE_INT
14891 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14892 return MIN (32, computed);
14896 /* Output assembler code to FILE to increment profiler label # LABELNO
14897 for profiling a function entry. */
14899 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
14904 #ifndef NO_PROFILE_COUNTERS
14905 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14907 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14911 #ifndef NO_PROFILE_COUNTERS
14912 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14914 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14918 #ifndef NO_PROFILE_COUNTERS
14919 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14920 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14922 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14926 #ifndef NO_PROFILE_COUNTERS
14927 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14928 PROFILE_COUNT_REGISTER);
14930 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14934 /* We don't have exact information about the insn sizes, but we may assume
14935 quite safely that we are informed about all 1 byte insns and memory
14936 address sizes. This is enough to eliminate unnecessary padding in
14940 min_insn_size (rtx insn)
14944 if (!INSN_P (insn) || !active_insn_p (insn))
14947 /* Discard alignments we've emit and jump instructions. */
14948 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
14949 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
14951 if (GET_CODE (insn) == JUMP_INSN
14952 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
14953 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
14956 /* Important case - calls are always 5 bytes.
14957 It is common to have many calls in the row. */
14958 if (GET_CODE (insn) == CALL_INSN
14959 && symbolic_reference_mentioned_p (PATTERN (insn))
14960 && !SIBLING_CALL_P (insn))
14962 if (get_attr_length (insn) <= 1)
14965 /* For normal instructions we may rely on the sizes of addresses
14966 and the presence of symbol to require 4 bytes of encoding.
14967 This is not the case for jumps where references are PC relative. */
14968 if (GET_CODE (insn) != JUMP_INSN)
14970 l = get_attr_length_address (insn);
14971 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
14980 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14984 ix86_avoid_jump_misspredicts (void)
14986 rtx insn, start = get_insns ();
14987 int nbytes = 0, njumps = 0;
14990 /* Look for all minimal intervals of instructions containing 4 jumps.
14991 The intervals are bounded by START and INSN. NBYTES is the total
14992 size of instructions in the interval including INSN and not including
14993 START. When the NBYTES is smaller than 16 bytes, it is possible
14994 that the end of START and INSN ends up in the same 16byte page.
14996 The smallest offset in the page INSN can start is the case where START
14997 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14998 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15000 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15003 nbytes += min_insn_size (insn);
15005 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15006 INSN_UID (insn), min_insn_size (insn));
15007 if ((GET_CODE (insn) == JUMP_INSN
15008 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15009 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15010 || GET_CODE (insn) == CALL_INSN)
15017 start = NEXT_INSN (start);
15018 if ((GET_CODE (start) == JUMP_INSN
15019 && GET_CODE (PATTERN (start)) != ADDR_VEC
15020 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15021 || GET_CODE (start) == CALL_INSN)
15022 njumps--, isjump = 1;
15025 nbytes -= min_insn_size (start);
15030 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15031 INSN_UID (start), INSN_UID (insn), nbytes);
15033 if (njumps == 3 && isjump && nbytes < 16)
15035 int padsize = 15 - nbytes + min_insn_size (insn);
15038 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15039 INSN_UID (insn), padsize);
15040 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15045 /* AMD Athlon works faster
15046 when RET is not destination of conditional jump or directly preceded
15047 by other jump instruction. We avoid the penalty by inserting NOP just
15048 before the RET instructions in such cases. */
15050 ix86_pad_returns (void)
15055 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15057 basic_block bb = e->src;
15058 rtx ret = BB_END (bb);
15060 bool replace = false;
15062 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15063 || !maybe_hot_bb_p (bb))
15065 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15066 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15068 if (prev && GET_CODE (prev) == CODE_LABEL)
15073 FOR_EACH_EDGE (e, ei, bb->preds)
15074 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15075 && !(e->flags & EDGE_FALLTHRU))
15080 prev = prev_active_insn (ret);
15082 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15083 || GET_CODE (prev) == CALL_INSN))
15085 /* Empty functions get branch mispredict even when the jump destination
15086 is not visible to us. */
15087 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15092 emit_insn_before (gen_return_internal_long (), ret);
15098 /* Implement machine specific optimizations. We implement padding of returns
15099 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15103 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15104 ix86_pad_returns ();
15105 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15106 ix86_avoid_jump_misspredicts ();
15109 /* Return nonzero when QImode register that must be represented via REX prefix
15112 x86_extended_QIreg_mentioned_p (rtx insn)
15115 extract_insn_cached (insn);
15116 for (i = 0; i < recog_data.n_operands; i++)
15117 if (REG_P (recog_data.operand[i])
15118 && REGNO (recog_data.operand[i]) >= 4)
15123 /* Return nonzero when P points to register encoded via REX prefix.
15124 Called via for_each_rtx. */
15126 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15128 unsigned int regno;
15131 regno = REGNO (*p);
15132 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15135 /* Return true when INSN mentions register that must be encoded using REX
15138 x86_extended_reg_mentioned_p (rtx insn)
15140 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15143 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15144 optabs would emit if we didn't have TFmode patterns. */
15147 x86_emit_floatuns (rtx operands[2])
15149 rtx neglab, donelab, i0, i1, f0, in, out;
15150 enum machine_mode mode, inmode;
15152 inmode = GET_MODE (operands[1]);
15153 if (inmode != SImode
15154 && inmode != DImode)
15158 in = force_reg (inmode, operands[1]);
15159 mode = GET_MODE (out);
15160 neglab = gen_label_rtx ();
15161 donelab = gen_label_rtx ();
15162 i1 = gen_reg_rtx (Pmode);
15163 f0 = gen_reg_rtx (mode);
15165 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15167 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15168 emit_jump_insn (gen_jump (donelab));
15171 emit_label (neglab);
15173 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15174 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15175 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15176 expand_float (f0, i0, 0);
15177 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15179 emit_label (donelab);
15182 /* Initialize vector TARGET via VALS. */
15184 ix86_expand_vector_init (rtx target, rtx vals)
15186 enum machine_mode mode = GET_MODE (target);
15187 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15188 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15191 for (i = n_elts - 1; i >= 0; i--)
15192 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15193 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15196 /* Few special cases first...
15197 ... constants are best loaded from constant pool. */
15200 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15204 /* ... values where only first field is non-constant are best loaded
15205 from the pool and overwritten via move later. */
15208 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15209 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15211 switch (GET_MODE (target))
15214 emit_insn (gen_sse2_loadlpd (target, target, XVECEXP (vals, 0, 0)));
15219 /* ??? We can represent this better. */
15220 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15221 GET_MODE_INNER (mode), 0);
15222 op = force_reg (mode, op);
15223 emit_insn (gen_sse_movss (target, target, op));
15233 /* And the busy sequence doing rotations. */
15234 switch (GET_MODE (target))
15239 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15241 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15243 vecop0 = force_reg (V2DFmode, vecop0);
15244 vecop1 = force_reg (V2DFmode, vecop1);
15245 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15251 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15253 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15255 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15257 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15258 rtx tmp1 = gen_reg_rtx (V4SFmode);
15259 rtx tmp2 = gen_reg_rtx (V4SFmode);
15261 vecop0 = force_reg (V4SFmode, vecop0);
15262 vecop1 = force_reg (V4SFmode, vecop1);
15263 vecop2 = force_reg (V4SFmode, vecop2);
15264 vecop3 = force_reg (V4SFmode, vecop3);
15265 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15266 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15267 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15275 /* Implements target hook vector_mode_supported_p. */
15277 ix86_vector_mode_supported_p (enum machine_mode mode)
15279 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
15281 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
15283 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
15285 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
15290 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15292 We do this in the new i386 backend to maintain source compatibility
15293 with the old cc0-based compiler. */
15296 ix86_md_asm_clobbers (tree clobbers)
15298 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15300 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15302 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15307 /* Worker function for REVERSE_CONDITION. */
15310 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15312 return (mode != CCFPmode && mode != CCFPUmode
15313 ? reverse_condition (code)
15314 : reverse_condition_maybe_unordered (code));
15317 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15321 output_387_reg_move (rtx insn, rtx *operands)
15323 if (REG_P (operands[1])
15324 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15326 if (REGNO (operands[0]) == FIRST_STACK_REG
15327 && TARGET_USE_FFREEP)
15328 return "ffreep\t%y0";
15329 return "fstp\t%y0";
15331 if (STACK_TOP_P (operands[0]))
15332 return "fld%z1\t%y1";
15336 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15337 FP status register is set. */
15340 ix86_emit_fp_unordered_jump (rtx label)
15342 rtx reg = gen_reg_rtx (HImode);
15345 emit_insn (gen_x86_fnstsw_1 (reg));
15347 if (TARGET_USE_SAHF)
15349 emit_insn (gen_x86_sahf_1 (reg));
15351 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15352 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15356 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15358 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15359 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15362 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15363 gen_rtx_LABEL_REF (VOIDmode, label),
15365 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15366 emit_jump_insn (temp);
15369 /* Output code to perform a log1p XFmode calculation. */
15371 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15373 rtx label1 = gen_label_rtx ();
15374 rtx label2 = gen_label_rtx ();
15376 rtx tmp = gen_reg_rtx (XFmode);
15377 rtx tmp2 = gen_reg_rtx (XFmode);
15379 emit_insn (gen_absxf2 (tmp, op1));
15380 emit_insn (gen_cmpxf (tmp,
15381 CONST_DOUBLE_FROM_REAL_VALUE (
15382 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15384 emit_jump_insn (gen_bge (label1));
15386 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15387 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15388 emit_jump (label2);
15390 emit_label (label1);
15391 emit_move_insn (tmp, CONST1_RTX (XFmode));
15392 emit_insn (gen_addxf3 (tmp, op1, tmp));
15393 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15394 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15396 emit_label (label2);
15399 /* Solaris named-section hook. Parameters are as for
15400 named_section_real. */
15403 i386_solaris_elf_named_section (const char *name, unsigned int flags,
15406 /* With Binutils 2.15, the "@unwind" marker must be specified on
15407 every occurrence of the ".eh_frame" section, not just the first
15410 && strcmp (name, ".eh_frame") == 0)
15412 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
15413 flags & SECTION_WRITE ? "aw" : "a");
15416 default_elf_asm_named_section (name, flags, decl);
15419 #include "gt-i386.h"