1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
594 AREG, DREG, CREG, BREG,
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
749 > to_allocate <- FRAME_POINTER
761 int outgoing_arguments_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
780 enum cmodel ix86_cmodel;
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 unsigned int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix[16];
833 static int internal_label_prefix_len;
835 static int local_symbolic_operand (rtx, enum machine_mode);
836 static int tls_symbolic_operand_1 (rtx, enum tls_model);
837 static void output_pic_addr_const (FILE *, rtx, int);
838 static void put_condition_code (enum rtx_code, enum machine_mode,
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx *, void *);
842 static rtx maybe_get_pool_constant (rtx);
843 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
849 static rtx get_thread_pointer (int);
850 static rtx legitimize_tls_address (rtx, enum tls_model, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx gen_push (rtx);
853 static int memory_address_length (rtx addr);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
885 rtx base, index, disp;
887 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
890 static int ix86_decompose_address (rtx, struct ix86_address *);
891 static int ix86_address_cost (rtx);
892 static bool ix86_cannot_force_const_mem (rtx);
893 static rtx ix86_delegitimize_address (rtx);
895 struct builtin_description;
896 static rtx ix86_expand_sse_comi (const struct builtin_description *,
898 static rtx ix86_expand_sse_compare (const struct builtin_description *,
900 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
901 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
902 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
903 static rtx ix86_expand_store_builtin (enum insn_code, tree);
904 static rtx safe_vector_operand (rtx, enum machine_mode);
905 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
906 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
907 enum rtx_code *, enum rtx_code *);
908 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912 static int ix86_fp_comparison_cost (enum rtx_code code);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame *);
916 static int ix86_comp_type_attributes (tree, tree);
917 static int ix86_function_regparm (tree, tree);
918 const struct attribute_spec ix86_attribute_table[];
919 static bool ix86_function_ok_for_sibcall (tree, tree);
920 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922 static int ix86_value_regno (enum machine_mode);
923 static bool contains_128bit_aligned_vector_p (tree);
924 static rtx ix86_struct_value_rtx (tree, int);
925 static bool ix86_ms_bitfield_layout_p (tree);
926 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927 static int extended_reg_mentioned_1 (rtx *, void *);
928 static bool ix86_rtx_costs (rtx, int, int, int *);
929 static int min_insn_size (rtx);
930 static tree ix86_md_asm_clobbers (tree clobbers);
931 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
932 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
935 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
936 static void ix86_svr3_asm_out_constructor (rtx, int);
939 /* Register class used for passing given 64bit part of the argument.
940 These represent classes as documented by the PS ABI, with the exception
941 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
942 use SF or DFmode move instead of DImode to avoid reformatting penalties.
944 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
945 whenever possible (upper half does contain padding).
947 enum x86_64_reg_class
950 X86_64_INTEGER_CLASS,
951 X86_64_INTEGERSI_CLASS,
960 static const char * const x86_64_reg_class_name[] =
961 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
963 #define MAX_CLASSES 4
964 static int classify_argument (enum machine_mode, tree,
965 enum x86_64_reg_class [MAX_CLASSES], int);
966 static int examine_argument (enum machine_mode, tree, int, int *, int *);
967 static rtx construct_container (enum machine_mode, tree, int, int, int,
969 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
970 enum x86_64_reg_class);
972 /* Table of constants used by fldpi, fldln2, etc.... */
973 static REAL_VALUE_TYPE ext_80387_constants_table [5];
974 static bool ext_80387_constants_init = 0;
975 static void init_ext_80387_constants (void);
977 /* Initialize the GCC target structure. */
978 #undef TARGET_ATTRIBUTE_TABLE
979 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
980 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
981 # undef TARGET_MERGE_DECL_ATTRIBUTES
982 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
985 #undef TARGET_COMP_TYPE_ATTRIBUTES
986 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
988 #undef TARGET_INIT_BUILTINS
989 #define TARGET_INIT_BUILTINS ix86_init_builtins
991 #undef TARGET_EXPAND_BUILTIN
992 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
994 #undef TARGET_ASM_FUNCTION_EPILOGUE
995 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
997 #undef TARGET_ASM_OPEN_PAREN
998 #define TARGET_ASM_OPEN_PAREN ""
999 #undef TARGET_ASM_CLOSE_PAREN
1000 #define TARGET_ASM_CLOSE_PAREN ""
1002 #undef TARGET_ASM_ALIGNED_HI_OP
1003 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1004 #undef TARGET_ASM_ALIGNED_SI_OP
1005 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1007 #undef TARGET_ASM_ALIGNED_DI_OP
1008 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1011 #undef TARGET_ASM_UNALIGNED_HI_OP
1012 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1013 #undef TARGET_ASM_UNALIGNED_SI_OP
1014 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1015 #undef TARGET_ASM_UNALIGNED_DI_OP
1016 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1018 #undef TARGET_SCHED_ADJUST_COST
1019 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1020 #undef TARGET_SCHED_ISSUE_RATE
1021 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1022 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1023 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1024 ia32_multipass_dfa_lookahead
1026 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1027 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1030 #undef TARGET_HAVE_TLS
1031 #define TARGET_HAVE_TLS true
1033 #undef TARGET_CANNOT_FORCE_CONST_MEM
1034 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1036 #undef TARGET_DELEGITIMIZE_ADDRESS
1037 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1039 #undef TARGET_MS_BITFIELD_LAYOUT_P
1040 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1042 #undef TARGET_ASM_OUTPUT_MI_THUNK
1043 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1044 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1045 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1047 #undef TARGET_ASM_FILE_START
1048 #define TARGET_ASM_FILE_START x86_file_start
1050 #undef TARGET_RTX_COSTS
1051 #define TARGET_RTX_COSTS ix86_rtx_costs
1052 #undef TARGET_ADDRESS_COST
1053 #define TARGET_ADDRESS_COST ix86_address_cost
1055 #undef TARGET_FIXED_CONDITION_CODE_REGS
1056 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1057 #undef TARGET_CC_MODES_COMPATIBLE
1058 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1060 #undef TARGET_MACHINE_DEPENDENT_REORG
1061 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1063 #undef TARGET_BUILD_BUILTIN_VA_LIST
1064 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1066 #undef TARGET_MD_ASM_CLOBBERS
1067 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1069 #undef TARGET_PROMOTE_PROTOTYPES
1070 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1071 #undef TARGET_STRUCT_VALUE_RTX
1072 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1073 #undef TARGET_SETUP_INCOMING_VARARGS
1074 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1075 #undef TARGET_MUST_PASS_IN_STACK
1076 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1077 #undef TARGET_PASS_BY_REFERENCE
1078 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1080 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1081 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1083 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1084 #undef TARGET_INSERT_ATTRIBUTES
1085 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1088 struct gcc_target targetm = TARGET_INITIALIZER;
1091 /* The svr4 ABI for the i386 says that records and unions are returned
1093 #ifndef DEFAULT_PCC_STRUCT_RETURN
1094 #define DEFAULT_PCC_STRUCT_RETURN 1
1097 /* Sometimes certain combinations of command options do not make
1098 sense on a particular target machine. You can define a macro
1099 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1100 defined, is executed once just after all the command options have
1103 Don't use this macro to turn on various extra optimizations for
1104 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1107 override_options (void)
1110 int ix86_tune_defaulted = 0;
1112 /* Comes from final.c -- no real reason to change it. */
1113 #define MAX_CODE_ALIGN 16
1117 const struct processor_costs *cost; /* Processor costs */
1118 const int target_enable; /* Target flags to enable. */
1119 const int target_disable; /* Target flags to disable. */
1120 const int align_loop; /* Default alignments. */
1121 const int align_loop_max_skip;
1122 const int align_jump;
1123 const int align_jump_max_skip;
1124 const int align_func;
1126 const processor_target_table[PROCESSOR_max] =
1128 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1129 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1130 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1131 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1132 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1133 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1134 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1135 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1136 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1139 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1142 const char *const name; /* processor name or nickname. */
1143 const enum processor_type processor;
1144 const enum pta_flags
1150 PTA_PREFETCH_SSE = 16,
1156 const processor_alias_table[] =
1158 {"i386", PROCESSOR_I386, 0},
1159 {"i486", PROCESSOR_I486, 0},
1160 {"i586", PROCESSOR_PENTIUM, 0},
1161 {"pentium", PROCESSOR_PENTIUM, 0},
1162 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1163 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1164 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1165 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1166 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1167 {"i686", PROCESSOR_PENTIUMPRO, 0},
1168 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1169 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1170 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1171 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1172 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1173 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1176 | PTA_MMX | PTA_PREFETCH_SSE},
1177 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1178 | PTA_MMX | PTA_PREFETCH_SSE},
1179 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1180 | PTA_MMX | PTA_PREFETCH_SSE},
1181 {"k6", PROCESSOR_K6, PTA_MMX},
1182 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1183 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1184 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1186 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1187 | PTA_3DNOW | PTA_3DNOW_A},
1188 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1189 | PTA_3DNOW_A | PTA_SSE},
1190 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1191 | PTA_3DNOW_A | PTA_SSE},
1192 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1193 | PTA_3DNOW_A | PTA_SSE},
1194 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1195 | PTA_SSE | PTA_SSE2 },
1196 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1198 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1199 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1200 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1201 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1202 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1203 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1206 int const pta_size = ARRAY_SIZE (processor_alias_table);
1208 /* Set the default values for switches whose default depends on TARGET_64BIT
1209 in case they weren't overwritten by command line options. */
1212 if (flag_omit_frame_pointer == 2)
1213 flag_omit_frame_pointer = 1;
1214 if (flag_asynchronous_unwind_tables == 2)
1215 flag_asynchronous_unwind_tables = 1;
1216 if (flag_pcc_struct_return == 2)
1217 flag_pcc_struct_return = 0;
1221 if (flag_omit_frame_pointer == 2)
1222 flag_omit_frame_pointer = 0;
1223 if (flag_asynchronous_unwind_tables == 2)
1224 flag_asynchronous_unwind_tables = 0;
1225 if (flag_pcc_struct_return == 2)
1226 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1229 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1230 SUBTARGET_OVERRIDE_OPTIONS;
1233 if (!ix86_tune_string && ix86_arch_string)
1234 ix86_tune_string = ix86_arch_string;
1235 if (!ix86_tune_string)
1237 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1238 ix86_tune_defaulted = 1;
1240 if (!ix86_arch_string)
1241 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1243 if (ix86_cmodel_string != 0)
1245 if (!strcmp (ix86_cmodel_string, "small"))
1246 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1248 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1249 else if (!strcmp (ix86_cmodel_string, "32"))
1250 ix86_cmodel = CM_32;
1251 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1252 ix86_cmodel = CM_KERNEL;
1253 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1254 ix86_cmodel = CM_MEDIUM;
1255 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1256 ix86_cmodel = CM_LARGE;
1258 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1262 ix86_cmodel = CM_32;
1264 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1266 if (ix86_asm_string != 0)
1268 if (!strcmp (ix86_asm_string, "intel"))
1269 ix86_asm_dialect = ASM_INTEL;
1270 else if (!strcmp (ix86_asm_string, "att"))
1271 ix86_asm_dialect = ASM_ATT;
1273 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1275 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1276 error ("code model `%s' not supported in the %s bit mode",
1277 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1278 if (ix86_cmodel == CM_LARGE)
1279 sorry ("code model `large' not supported yet");
1280 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1281 sorry ("%i-bit mode not compiled in",
1282 (target_flags & MASK_64BIT) ? 64 : 32);
1284 for (i = 0; i < pta_size; i++)
1285 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1287 ix86_arch = processor_alias_table[i].processor;
1288 /* Default cpu tuning to the architecture. */
1289 ix86_tune = ix86_arch;
1290 if (processor_alias_table[i].flags & PTA_MMX
1291 && !(target_flags_explicit & MASK_MMX))
1292 target_flags |= MASK_MMX;
1293 if (processor_alias_table[i].flags & PTA_3DNOW
1294 && !(target_flags_explicit & MASK_3DNOW))
1295 target_flags |= MASK_3DNOW;
1296 if (processor_alias_table[i].flags & PTA_3DNOW_A
1297 && !(target_flags_explicit & MASK_3DNOW_A))
1298 target_flags |= MASK_3DNOW_A;
1299 if (processor_alias_table[i].flags & PTA_SSE
1300 && !(target_flags_explicit & MASK_SSE))
1301 target_flags |= MASK_SSE;
1302 if (processor_alias_table[i].flags & PTA_SSE2
1303 && !(target_flags_explicit & MASK_SSE2))
1304 target_flags |= MASK_SSE2;
1305 if (processor_alias_table[i].flags & PTA_SSE3
1306 && !(target_flags_explicit & MASK_SSE3))
1307 target_flags |= MASK_SSE3;
1308 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1309 x86_prefetch_sse = true;
1310 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1312 if (ix86_tune_defaulted)
1314 ix86_tune_string = "x86-64";
1315 for (i = 0; i < pta_size; i++)
1316 if (! strcmp (ix86_tune_string,
1317 processor_alias_table[i].name))
1319 ix86_tune = processor_alias_table[i].processor;
1322 error ("CPU you selected does not support x86-64 "
1329 error ("bad value (%s) for -march= switch", ix86_arch_string);
1331 for (i = 0; i < pta_size; i++)
1332 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1334 ix86_tune = processor_alias_table[i].processor;
1335 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1336 error ("CPU you selected does not support x86-64 instruction set");
1338 /* Intel CPUs have always interpreted SSE prefetch instructions as
1339 NOPs; so, we can enable SSE prefetch instructions even when
1340 -mtune (rather than -march) points us to a processor that has them.
1341 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1342 higher processors. */
1343 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1344 x86_prefetch_sse = true;
1348 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1351 ix86_cost = &size_cost;
1353 ix86_cost = processor_target_table[ix86_tune].cost;
1354 target_flags |= processor_target_table[ix86_tune].target_enable;
1355 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1357 /* Arrange to set up i386_stack_locals for all functions. */
1358 init_machine_status = ix86_init_machine_status;
1360 /* Validate -mregparm= value. */
1361 if (ix86_regparm_string)
1363 i = atoi (ix86_regparm_string);
1364 if (i < 0 || i > REGPARM_MAX)
1365 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1371 ix86_regparm = REGPARM_MAX;
1373 /* If the user has provided any of the -malign-* options,
1374 warn and use that value only if -falign-* is not set.
1375 Remove this code in GCC 3.2 or later. */
1376 if (ix86_align_loops_string)
1378 warning ("-malign-loops is obsolete, use -falign-loops");
1379 if (align_loops == 0)
1381 i = atoi (ix86_align_loops_string);
1382 if (i < 0 || i > MAX_CODE_ALIGN)
1383 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1385 align_loops = 1 << i;
1389 if (ix86_align_jumps_string)
1391 warning ("-malign-jumps is obsolete, use -falign-jumps");
1392 if (align_jumps == 0)
1394 i = atoi (ix86_align_jumps_string);
1395 if (i < 0 || i > MAX_CODE_ALIGN)
1396 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1398 align_jumps = 1 << i;
1402 if (ix86_align_funcs_string)
1404 warning ("-malign-functions is obsolete, use -falign-functions");
1405 if (align_functions == 0)
1407 i = atoi (ix86_align_funcs_string);
1408 if (i < 0 || i > MAX_CODE_ALIGN)
1409 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1411 align_functions = 1 << i;
1415 /* Default align_* from the processor table. */
1416 if (align_loops == 0)
1418 align_loops = processor_target_table[ix86_tune].align_loop;
1419 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1421 if (align_jumps == 0)
1423 align_jumps = processor_target_table[ix86_tune].align_jump;
1424 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1426 if (align_functions == 0)
1428 align_functions = processor_target_table[ix86_tune].align_func;
1431 /* Validate -mpreferred-stack-boundary= value, or provide default.
1432 The default of 128 bits is for Pentium III's SSE __m128, but we
1433 don't want additional code to keep the stack aligned when
1434 optimizing for code size. */
1435 ix86_preferred_stack_boundary = (optimize_size
1436 ? TARGET_64BIT ? 128 : 32
1438 if (ix86_preferred_stack_boundary_string)
1440 i = atoi (ix86_preferred_stack_boundary_string);
1441 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1442 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1443 TARGET_64BIT ? 4 : 2);
1445 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1448 /* Validate -mbranch-cost= value, or provide default. */
1449 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1450 if (ix86_branch_cost_string)
1452 i = atoi (ix86_branch_cost_string);
1454 error ("-mbranch-cost=%d is not between 0 and 5", i);
1456 ix86_branch_cost = i;
1459 if (ix86_tls_dialect_string)
1461 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1462 ix86_tls_dialect = TLS_DIALECT_GNU;
1463 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1464 ix86_tls_dialect = TLS_DIALECT_SUN;
1466 error ("bad value (%s) for -mtls-dialect= switch",
1467 ix86_tls_dialect_string);
1470 /* Keep nonleaf frame pointers. */
1471 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1472 flag_omit_frame_pointer = 1;
1474 /* If we're doing fast math, we don't care about comparison order
1475 wrt NaNs. This lets us use a shorter comparison sequence. */
1476 if (flag_unsafe_math_optimizations)
1477 target_flags &= ~MASK_IEEE_FP;
1479 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1480 since the insns won't need emulation. */
1481 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1482 target_flags &= ~MASK_NO_FANCY_MATH_387;
1484 /* Turn on SSE2 builtins for -msse3. */
1486 target_flags |= MASK_SSE2;
1488 /* Turn on SSE builtins for -msse2. */
1490 target_flags |= MASK_SSE;
1494 if (TARGET_ALIGN_DOUBLE)
1495 error ("-malign-double makes no sense in the 64bit mode");
1497 error ("-mrtd calling convention not supported in the 64bit mode");
1498 /* Enable by default the SSE and MMX builtins. */
1499 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1500 ix86_fpmath = FPMATH_SSE;
1504 ix86_fpmath = FPMATH_387;
1505 /* i386 ABI does not specify red zone. It still makes sense to use it
1506 when programmer takes care to stack from being destroyed. */
1507 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1508 target_flags |= MASK_NO_RED_ZONE;
1511 if (ix86_fpmath_string != 0)
1513 if (! strcmp (ix86_fpmath_string, "387"))
1514 ix86_fpmath = FPMATH_387;
1515 else if (! strcmp (ix86_fpmath_string, "sse"))
1519 warning ("SSE instruction set disabled, using 387 arithmetics");
1520 ix86_fpmath = FPMATH_387;
1523 ix86_fpmath = FPMATH_SSE;
1525 else if (! strcmp (ix86_fpmath_string, "387,sse")
1526 || ! strcmp (ix86_fpmath_string, "sse,387"))
1530 warning ("SSE instruction set disabled, using 387 arithmetics");
1531 ix86_fpmath = FPMATH_387;
1533 else if (!TARGET_80387)
1535 warning ("387 instruction set disabled, using SSE arithmetics");
1536 ix86_fpmath = FPMATH_SSE;
1539 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1542 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1545 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1549 target_flags |= MASK_MMX;
1550 x86_prefetch_sse = true;
1553 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1556 target_flags |= MASK_MMX;
1557 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1558 extensions it adds. */
1559 if (x86_3dnow_a & (1 << ix86_arch))
1560 target_flags |= MASK_3DNOW_A;
1562 if ((x86_accumulate_outgoing_args & TUNEMASK)
1563 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1565 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1567 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1570 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1571 p = strchr (internal_label_prefix, 'X');
1572 internal_label_prefix_len = p - internal_label_prefix;
1578 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1580 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1581 make the problem with not enough registers even worse. */
1582 #ifdef INSN_SCHEDULING
1584 flag_schedule_insns = 0;
1587 /* The default values of these switches depend on the TARGET_64BIT
1588 that is not known at this moment. Mark these values with 2 and
1589 let user the to override these. In case there is no command line option
1590 specifying them, we will set the defaults in override_options. */
1592 flag_omit_frame_pointer = 2;
1593 flag_pcc_struct_return = 2;
1594 flag_asynchronous_unwind_tables = 2;
1597 /* Table of valid machine attributes. */
1598 const struct attribute_spec ix86_attribute_table[] =
1600 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1601 /* Stdcall attribute says callee is responsible for popping arguments
1602 if they are not variable. */
1603 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1604 /* Fastcall attribute says callee is responsible for popping arguments
1605 if they are not variable. */
1606 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1607 /* Cdecl attribute says the callee is a normal C declaration */
1608 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1609 /* Regparm attribute specifies how many integer arguments are to be
1610 passed in registers. */
1611 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1612 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1613 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1614 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1615 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1617 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1618 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1619 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1620 SUBTARGET_ATTRIBUTE_TABLE,
1622 { NULL, 0, 0, false, false, false, NULL }
1625 /* Decide whether we can make a sibling call to a function. DECL is the
1626 declaration of the function being targeted by the call and EXP is the
1627 CALL_EXPR representing the call. */
1630 ix86_function_ok_for_sibcall (tree decl, tree exp)
1632 /* If we are generating position-independent code, we cannot sibcall
1633 optimize any indirect call, or a direct call to a global function,
1634 as the PLT requires %ebx be live. */
1635 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1638 /* If we are returning floats on the 80387 register stack, we cannot
1639 make a sibcall from a function that doesn't return a float to a
1640 function that does or, conversely, from a function that does return
1641 a float to a function that doesn't; the necessary stack adjustment
1642 would not be executed. */
1643 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1644 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1647 /* If this call is indirect, we'll need to be able to use a call-clobbered
1648 register for the address of the target function. Make sure that all
1649 such registers are not used for passing parameters. */
1650 if (!decl && !TARGET_64BIT)
1654 /* We're looking at the CALL_EXPR, we need the type of the function. */
1655 type = TREE_OPERAND (exp, 0); /* pointer expression */
1656 type = TREE_TYPE (type); /* pointer type */
1657 type = TREE_TYPE (type); /* function type */
1659 if (ix86_function_regparm (type, NULL) >= 3)
1661 /* ??? Need to count the actual number of registers to be used,
1662 not the possible number of registers. Fix later. */
1667 /* Otherwise okay. That also includes certain types of indirect calls. */
1671 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1672 arguments as in struct attribute_spec.handler. */
1674 ix86_handle_cdecl_attribute (tree *node, tree name,
1675 tree args ATTRIBUTE_UNUSED,
1676 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1678 if (TREE_CODE (*node) != FUNCTION_TYPE
1679 && TREE_CODE (*node) != METHOD_TYPE
1680 && TREE_CODE (*node) != FIELD_DECL
1681 && TREE_CODE (*node) != TYPE_DECL)
1683 warning ("`%s' attribute only applies to functions",
1684 IDENTIFIER_POINTER (name));
1685 *no_add_attrs = true;
1689 if (is_attribute_p ("fastcall", name))
1691 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1693 error ("fastcall and stdcall attributes are not compatible");
1695 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1697 error ("fastcall and regparm attributes are not compatible");
1700 else if (is_attribute_p ("stdcall", name))
1702 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1704 error ("fastcall and stdcall attributes are not compatible");
1711 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1712 *no_add_attrs = true;
1718 /* Handle a "regparm" attribute;
1719 arguments as in struct attribute_spec.handler. */
1721 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1722 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1724 if (TREE_CODE (*node) != FUNCTION_TYPE
1725 && TREE_CODE (*node) != METHOD_TYPE
1726 && TREE_CODE (*node) != FIELD_DECL
1727 && TREE_CODE (*node) != TYPE_DECL)
1729 warning ("`%s' attribute only applies to functions",
1730 IDENTIFIER_POINTER (name));
1731 *no_add_attrs = true;
1737 cst = TREE_VALUE (args);
1738 if (TREE_CODE (cst) != INTEGER_CST)
1740 warning ("`%s' attribute requires an integer constant argument",
1741 IDENTIFIER_POINTER (name));
1742 *no_add_attrs = true;
1744 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1746 warning ("argument to `%s' attribute larger than %d",
1747 IDENTIFIER_POINTER (name), REGPARM_MAX);
1748 *no_add_attrs = true;
1751 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1753 error ("fastcall and regparm attributes are not compatible");
1760 /* Return 0 if the attributes for two types are incompatible, 1 if they
1761 are compatible, and 2 if they are nearly compatible (which causes a
1762 warning to be generated). */
1765 ix86_comp_type_attributes (tree type1, tree type2)
1767 /* Check for mismatch of non-default calling convention. */
1768 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1770 if (TREE_CODE (type1) != FUNCTION_TYPE)
1773 /* Check for mismatched fastcall types */
1774 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1775 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1778 /* Check for mismatched return types (cdecl vs stdcall). */
1779 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1780 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1782 if (ix86_function_regparm (type1, NULL)
1783 != ix86_function_regparm (type2, NULL))
1788 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1789 DECL may be NULL when calling function indirectly
1790 or considering a libcall. */
1793 ix86_function_regparm (tree type, tree decl)
1796 int regparm = ix86_regparm;
1797 bool user_convention = false;
1801 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1804 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1805 user_convention = true;
1808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1811 user_convention = true;
1814 /* Use register calling convention for local functions when possible. */
1815 if (!TARGET_64BIT && !user_convention && decl
1816 && flag_unit_at_a_time && !profile_flag)
1818 struct cgraph_local_info *i = cgraph_local_info (decl);
1821 /* We can't use regparm(3) for nested functions as these use
1822 static chain pointer in third argument. */
1823 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1833 /* Return true if EAX is live at the start of the function. Used by
1834 ix86_expand_prologue to determine if we need special help before
1835 calling allocate_stack_worker. */
1838 ix86_eax_live_at_start_p (void)
1840 /* Cheat. Don't bother working forward from ix86_function_regparm
1841 to the function type to whether an actual argument is located in
1842 eax. Instead just look at cfg info, which is still close enough
1843 to correct at this point. This gives false positives for broken
1844 functions that might use uninitialized data that happens to be
1845 allocated in eax, but who cares? */
1846 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1849 /* Value is the number of bytes of arguments automatically
1850 popped when returning from a subroutine call.
1851 FUNDECL is the declaration node of the function (as a tree),
1852 FUNTYPE is the data type of the function (as a tree),
1853 or for a library call it is an identifier node for the subroutine name.
1854 SIZE is the number of bytes of arguments passed on the stack.
1856 On the 80386, the RTD insn may be used to pop them if the number
1857 of args is fixed, but if the number is variable then the caller
1858 must pop them all. RTD can't be used for library calls now
1859 because the library is compiled with the Unix compiler.
1860 Use of RTD is a selectable option, since it is incompatible with
1861 standard Unix calling sequences. If the option is not selected,
1862 the caller must always pop the args.
1864 The attribute stdcall is equivalent to RTD on a per module basis. */
1867 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1869 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1871 /* Cdecl functions override -mrtd, and never pop the stack. */
1872 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1874 /* Stdcall and fastcall functions will pop the stack if not
1876 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1877 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1881 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1882 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1883 == void_type_node)))
1887 /* Lose any fake structure return argument if it is passed on the stack. */
1888 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1891 int nregs = ix86_function_regparm (funtype, fundecl);
1894 return GET_MODE_SIZE (Pmode);
1900 /* Argument support functions. */
1902 /* Return true when register may be used to pass function parameters. */
1904 ix86_function_arg_regno_p (int regno)
1908 return (regno < REGPARM_MAX
1909 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1910 if (SSE_REGNO_P (regno) && TARGET_SSE)
1912 /* RAX is used as hidden argument to va_arg functions. */
1915 for (i = 0; i < REGPARM_MAX; i++)
1916 if (regno == x86_64_int_parameter_registers[i])
1921 /* Return if we do not know how to pass TYPE solely in registers. */
1924 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1926 if (must_pass_in_stack_var_size_or_pad (mode, type))
1928 return (!TARGET_64BIT && type && mode == TImode);
1931 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1932 for a call to a function whose data type is FNTYPE.
1933 For a library call, FNTYPE is 0. */
1936 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1937 tree fntype, /* tree ptr for function decl */
1938 rtx libname, /* SYMBOL_REF of library name or 0 */
1941 static CUMULATIVE_ARGS zero_cum;
1942 tree param, next_param;
1944 if (TARGET_DEBUG_ARG)
1946 fprintf (stderr, "\ninit_cumulative_args (");
1948 fprintf (stderr, "fntype code = %s, ret code = %s",
1949 tree_code_name[(int) TREE_CODE (fntype)],
1950 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1952 fprintf (stderr, "no fntype");
1955 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1960 /* Set up the number of registers to use for passing arguments. */
1962 cum->nregs = ix86_function_regparm (fntype, fndecl);
1964 cum->nregs = ix86_regparm;
1966 cum->sse_nregs = SSE_REGPARM_MAX;
1968 cum->mmx_nregs = MMX_REGPARM_MAX;
1969 cum->warn_sse = true;
1970 cum->warn_mmx = true;
1971 cum->maybe_vaarg = false;
1973 /* Use ecx and edx registers if function has fastcall attribute */
1974 if (fntype && !TARGET_64BIT)
1976 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1983 /* Determine if this function has variable arguments. This is
1984 indicated by the last argument being 'void_type_mode' if there
1985 are no variable arguments. If there are variable arguments, then
1986 we won't pass anything in registers in 32-bit mode. */
1988 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
1990 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1991 param != 0; param = next_param)
1993 next_param = TREE_CHAIN (param);
1994 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2005 cum->maybe_vaarg = true;
2009 if ((!fntype && !libname)
2010 || (fntype && !TYPE_ARG_TYPES (fntype)))
2011 cum->maybe_vaarg = 1;
2013 if (TARGET_DEBUG_ARG)
2014 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2019 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2020 of this code is to classify each 8bytes of incoming argument by the register
2021 class and assign registers accordingly. */
2023 /* Return the union class of CLASS1 and CLASS2.
2024 See the x86-64 PS ABI for details. */
2026 static enum x86_64_reg_class
2027 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2029 /* Rule #1: If both classes are equal, this is the resulting class. */
2030 if (class1 == class2)
2033 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2035 if (class1 == X86_64_NO_CLASS)
2037 if (class2 == X86_64_NO_CLASS)
2040 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2041 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2042 return X86_64_MEMORY_CLASS;
2044 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2045 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2046 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2047 return X86_64_INTEGERSI_CLASS;
2048 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2049 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2050 return X86_64_INTEGER_CLASS;
2052 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2053 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2054 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2055 return X86_64_MEMORY_CLASS;
2057 /* Rule #6: Otherwise class SSE is used. */
2058 return X86_64_SSE_CLASS;
2061 /* Classify the argument of type TYPE and mode MODE.
2062 CLASSES will be filled by the register class used to pass each word
2063 of the operand. The number of words is returned. In case the parameter
2064 should be passed in memory, 0 is returned. As a special case for zero
2065 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2067 BIT_OFFSET is used internally for handling records and specifies offset
2068 of the offset in bits modulo 256 to avoid overflow cases.
2070 See the x86-64 PS ABI for details.
2074 classify_argument (enum machine_mode mode, tree type,
2075 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2077 HOST_WIDE_INT bytes =
2078 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2079 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2081 /* Variable sized entities are always passed/returned in memory. */
2085 if (mode != VOIDmode
2086 && targetm.calls.must_pass_in_stack (mode, type))
2089 if (type && AGGREGATE_TYPE_P (type))
2093 enum x86_64_reg_class subclasses[MAX_CLASSES];
2095 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2099 for (i = 0; i < words; i++)
2100 classes[i] = X86_64_NO_CLASS;
2102 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2103 signalize memory class, so handle it as special case. */
2106 classes[0] = X86_64_NO_CLASS;
2110 /* Classify each field of record and merge classes. */
2111 if (TREE_CODE (type) == RECORD_TYPE)
2113 /* For classes first merge in the field of the subclasses. */
2114 if (TYPE_BINFO (type))
2116 tree binfo, base_binfo;
2119 for (binfo = TYPE_BINFO (type), i = 0;
2120 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2123 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2124 tree type = BINFO_TYPE (base_binfo);
2126 num = classify_argument (TYPE_MODE (type),
2128 (offset + bit_offset) % 256);
2131 for (i = 0; i < num; i++)
2133 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2135 merge_classes (subclasses[i], classes[i + pos]);
2139 /* And now merge the fields of structure. */
2140 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2142 if (TREE_CODE (field) == FIELD_DECL)
2146 /* Bitfields are always classified as integer. Handle them
2147 early, since later code would consider them to be
2148 misaligned integers. */
2149 if (DECL_BIT_FIELD (field))
2151 for (i = int_bit_position (field) / 8 / 8;
2152 i < (int_bit_position (field)
2153 + tree_low_cst (DECL_SIZE (field), 0)
2156 merge_classes (X86_64_INTEGER_CLASS,
2161 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2162 TREE_TYPE (field), subclasses,
2163 (int_bit_position (field)
2164 + bit_offset) % 256);
2167 for (i = 0; i < num; i++)
2170 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2172 merge_classes (subclasses[i], classes[i + pos]);
2178 /* Arrays are handled as small records. */
2179 else if (TREE_CODE (type) == ARRAY_TYPE)
2182 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2183 TREE_TYPE (type), subclasses, bit_offset);
2187 /* The partial classes are now full classes. */
2188 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2189 subclasses[0] = X86_64_SSE_CLASS;
2190 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2191 subclasses[0] = X86_64_INTEGER_CLASS;
2193 for (i = 0; i < words; i++)
2194 classes[i] = subclasses[i % num];
2196 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2197 else if (TREE_CODE (type) == UNION_TYPE
2198 || TREE_CODE (type) == QUAL_UNION_TYPE)
2200 /* For classes first merge in the field of the subclasses. */
2201 if (TYPE_BINFO (type))
2203 tree binfo, base_binfo;
2206 for (binfo = TYPE_BINFO (type), i = 0;
2207 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2210 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2211 tree type = BINFO_TYPE (base_binfo);
2213 num = classify_argument (TYPE_MODE (type),
2215 (offset + (bit_offset % 64)) % 256);
2218 for (i = 0; i < num; i++)
2220 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2222 merge_classes (subclasses[i], classes[i + pos]);
2226 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2228 if (TREE_CODE (field) == FIELD_DECL)
2231 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2232 TREE_TYPE (field), subclasses,
2236 for (i = 0; i < num; i++)
2237 classes[i] = merge_classes (subclasses[i], classes[i]);
2241 else if (TREE_CODE (type) == SET_TYPE)
2245 classes[0] = X86_64_INTEGERSI_CLASS;
2248 else if (bytes <= 8)
2250 classes[0] = X86_64_INTEGER_CLASS;
2253 else if (bytes <= 12)
2255 classes[0] = X86_64_INTEGER_CLASS;
2256 classes[1] = X86_64_INTEGERSI_CLASS;
2261 classes[0] = X86_64_INTEGER_CLASS;
2262 classes[1] = X86_64_INTEGER_CLASS;
2269 /* Final merger cleanup. */
2270 for (i = 0; i < words; i++)
2272 /* If one class is MEMORY, everything should be passed in
2274 if (classes[i] == X86_64_MEMORY_CLASS)
2277 /* The X86_64_SSEUP_CLASS should be always preceded by
2278 X86_64_SSE_CLASS. */
2279 if (classes[i] == X86_64_SSEUP_CLASS
2280 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2281 classes[i] = X86_64_SSE_CLASS;
2283 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2284 if (classes[i] == X86_64_X87UP_CLASS
2285 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2286 classes[i] = X86_64_SSE_CLASS;
2291 /* Compute alignment needed. We align all types to natural boundaries with
2292 exception of XFmode that is aligned to 64bits. */
2293 if (mode != VOIDmode && mode != BLKmode)
2295 int mode_alignment = GET_MODE_BITSIZE (mode);
2298 mode_alignment = 128;
2299 else if (mode == XCmode)
2300 mode_alignment = 256;
2301 if (COMPLEX_MODE_P (mode))
2302 mode_alignment /= 2;
2303 /* Misaligned fields are always returned in memory. */
2304 if (bit_offset % mode_alignment)
2308 /* for V1xx modes, just use the base mode */
2309 if (VECTOR_MODE_P (mode)
2310 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2311 mode = GET_MODE_INNER (mode);
2313 /* Classification of atomic types. */
2323 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2324 classes[0] = X86_64_INTEGERSI_CLASS;
2326 classes[0] = X86_64_INTEGER_CLASS;
2330 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2335 if (!(bit_offset % 64))
2336 classes[0] = X86_64_SSESF_CLASS;
2338 classes[0] = X86_64_SSE_CLASS;
2341 classes[0] = X86_64_SSEDF_CLASS;
2344 classes[0] = X86_64_X87_CLASS;
2345 classes[1] = X86_64_X87UP_CLASS;
2348 classes[0] = X86_64_SSE_CLASS;
2349 classes[1] = X86_64_SSEUP_CLASS;
2352 classes[0] = X86_64_SSE_CLASS;
2355 classes[0] = X86_64_SSEDF_CLASS;
2356 classes[1] = X86_64_SSEDF_CLASS;
2360 /* These modes are larger than 16 bytes. */
2368 classes[0] = X86_64_SSE_CLASS;
2369 classes[1] = X86_64_SSEUP_CLASS;
2375 classes[0] = X86_64_SSE_CLASS;
2381 if (VECTOR_MODE_P (mode))
2385 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2387 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2388 classes[0] = X86_64_INTEGERSI_CLASS;
2390 classes[0] = X86_64_INTEGER_CLASS;
2391 classes[1] = X86_64_INTEGER_CLASS;
2392 return 1 + (bytes > 8);
2399 /* Examine the argument and return set number of register required in each
2400 class. Return 0 iff parameter should be passed in memory. */
2402 examine_argument (enum machine_mode mode, tree type, int in_return,
2403 int *int_nregs, int *sse_nregs)
2405 enum x86_64_reg_class class[MAX_CLASSES];
2406 int n = classify_argument (mode, type, class, 0);
2412 for (n--; n >= 0; n--)
2415 case X86_64_INTEGER_CLASS:
2416 case X86_64_INTEGERSI_CLASS:
2419 case X86_64_SSE_CLASS:
2420 case X86_64_SSESF_CLASS:
2421 case X86_64_SSEDF_CLASS:
2424 case X86_64_NO_CLASS:
2425 case X86_64_SSEUP_CLASS:
2427 case X86_64_X87_CLASS:
2428 case X86_64_X87UP_CLASS:
2432 case X86_64_MEMORY_CLASS:
2437 /* Construct container for the argument used by GCC interface. See
2438 FUNCTION_ARG for the detailed description. */
2440 construct_container (enum machine_mode mode, tree type, int in_return,
2441 int nintregs, int nsseregs, const int * intreg,
2444 enum machine_mode tmpmode;
2446 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2447 enum x86_64_reg_class class[MAX_CLASSES];
2451 int needed_sseregs, needed_intregs;
2452 rtx exp[MAX_CLASSES];
2455 n = classify_argument (mode, type, class, 0);
2456 if (TARGET_DEBUG_ARG)
2459 fprintf (stderr, "Memory class\n");
2462 fprintf (stderr, "Classes:");
2463 for (i = 0; i < n; i++)
2465 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2467 fprintf (stderr, "\n");
2472 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2474 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2477 /* First construct simple cases. Avoid SCmode, since we want to use
2478 single register to pass this type. */
2479 if (n == 1 && mode != SCmode)
2482 case X86_64_INTEGER_CLASS:
2483 case X86_64_INTEGERSI_CLASS:
2484 return gen_rtx_REG (mode, intreg[0]);
2485 case X86_64_SSE_CLASS:
2486 case X86_64_SSESF_CLASS:
2487 case X86_64_SSEDF_CLASS:
2488 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2489 case X86_64_X87_CLASS:
2490 return gen_rtx_REG (mode, FIRST_STACK_REG);
2491 case X86_64_NO_CLASS:
2492 /* Zero sized array, struct or class. */
2497 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2499 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2501 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2502 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2503 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2504 && class[1] == X86_64_INTEGER_CLASS
2505 && (mode == CDImode || mode == TImode || mode == TFmode)
2506 && intreg[0] + 1 == intreg[1])
2507 return gen_rtx_REG (mode, intreg[0]);
2509 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2510 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2512 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2514 /* Otherwise figure out the entries of the PARALLEL. */
2515 for (i = 0; i < n; i++)
2519 case X86_64_NO_CLASS:
2521 case X86_64_INTEGER_CLASS:
2522 case X86_64_INTEGERSI_CLASS:
2523 /* Merge TImodes on aligned occasions here too. */
2524 if (i * 8 + 8 > bytes)
2525 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2526 else if (class[i] == X86_64_INTEGERSI_CLASS)
2530 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2531 if (tmpmode == BLKmode)
2533 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2534 gen_rtx_REG (tmpmode, *intreg),
2538 case X86_64_SSESF_CLASS:
2539 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2540 gen_rtx_REG (SFmode,
2541 SSE_REGNO (sse_regno)),
2545 case X86_64_SSEDF_CLASS:
2546 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2547 gen_rtx_REG (DFmode,
2548 SSE_REGNO (sse_regno)),
2552 case X86_64_SSE_CLASS:
2553 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2557 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2558 gen_rtx_REG (tmpmode,
2559 SSE_REGNO (sse_regno)),
2561 if (tmpmode == TImode)
2569 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2570 for (i = 0; i < nexps; i++)
2571 XVECEXP (ret, 0, i) = exp [i];
2575 /* Update the data in CUM to advance over an argument
2576 of mode MODE and data type TYPE.
2577 (TYPE is null for libcalls where that information may not be available.) */
2580 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2581 enum machine_mode mode, /* current arg mode */
2582 tree type, /* type of the argument or 0 if lib support */
2583 int named) /* whether or not the argument was named */
2586 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2587 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2589 if (TARGET_DEBUG_ARG)
2591 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2592 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2595 int int_nregs, sse_nregs;
2596 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2597 cum->words += words;
2598 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2600 cum->nregs -= int_nregs;
2601 cum->sse_nregs -= sse_nregs;
2602 cum->regno += int_nregs;
2603 cum->sse_regno += sse_nregs;
2606 cum->words += words;
2610 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2611 && (!type || !AGGREGATE_TYPE_P (type)))
2613 cum->sse_words += words;
2614 cum->sse_nregs -= 1;
2615 cum->sse_regno += 1;
2616 if (cum->sse_nregs <= 0)
2622 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2623 && (!type || !AGGREGATE_TYPE_P (type)))
2625 cum->mmx_words += words;
2626 cum->mmx_nregs -= 1;
2627 cum->mmx_regno += 1;
2628 if (cum->mmx_nregs <= 0)
2636 cum->words += words;
2637 cum->nregs -= words;
2638 cum->regno += words;
2640 if (cum->nregs <= 0)
2650 /* Define where to put the arguments to a function.
2651 Value is zero to push the argument on the stack,
2652 or a hard register in which to store the argument.
2654 MODE is the argument's machine mode.
2655 TYPE is the data type of the argument (as a tree).
2656 This is null for libcalls where that information may
2658 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2659 the preceding args and about the function being called.
2660 NAMED is nonzero if this argument is a named parameter
2661 (otherwise it is an extra parameter matching an ellipsis). */
2664 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2665 enum machine_mode mode, /* current arg mode */
2666 tree type, /* type of the argument or 0 if lib support */
2667 int named) /* != 0 for normal args, == 0 for ... args */
2671 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2672 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2673 static bool warnedsse, warnedmmx;
2675 /* Handle a hidden AL argument containing number of registers for varargs
2676 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2678 if (mode == VOIDmode)
2681 return GEN_INT (cum->maybe_vaarg
2682 ? (cum->sse_nregs < 0
2690 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2691 &x86_64_int_parameter_registers [cum->regno],
2696 /* For now, pass fp/complex values on the stack. */
2708 if (words <= cum->nregs)
2710 int regno = cum->regno;
2712 /* Fastcall allocates the first two DWORD (SImode) or
2713 smaller arguments to ECX and EDX. */
2716 if (mode == BLKmode || mode == DImode)
2719 /* ECX not EAX is the first allocated register. */
2723 ret = gen_rtx_REG (mode, regno);
2733 if (!type || !AGGREGATE_TYPE_P (type))
2735 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2738 warning ("SSE vector argument without SSE enabled "
2742 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2749 if (!type || !AGGREGATE_TYPE_P (type))
2751 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2754 warning ("MMX vector argument without MMX enabled "
2758 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2763 if (TARGET_DEBUG_ARG)
2766 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2767 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2770 print_simple_rtl (stderr, ret);
2772 fprintf (stderr, ", stack");
2774 fprintf (stderr, " )\n");
2780 /* A C expression that indicates when an argument must be passed by
2781 reference. If nonzero for an argument, a copy of that argument is
2782 made in memory and a pointer to the argument is passed instead of
2783 the argument itself. The pointer is passed in whatever way is
2784 appropriate for passing a pointer to that type. */
2787 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2788 enum machine_mode mode ATTRIBUTE_UNUSED,
2789 tree type, bool named ATTRIBUTE_UNUSED)
2794 if (type && int_size_in_bytes (type) == -1)
2796 if (TARGET_DEBUG_ARG)
2797 fprintf (stderr, "function_arg_pass_by_reference\n");
2804 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2807 contains_128bit_aligned_vector_p (tree type)
2809 enum machine_mode mode = TYPE_MODE (type);
2810 if (SSE_REG_MODE_P (mode)
2811 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2813 if (TYPE_ALIGN (type) < 128)
2816 if (AGGREGATE_TYPE_P (type))
2818 /* Walk the aggregates recursively. */
2819 if (TREE_CODE (type) == RECORD_TYPE
2820 || TREE_CODE (type) == UNION_TYPE
2821 || TREE_CODE (type) == QUAL_UNION_TYPE)
2825 if (TYPE_BINFO (type))
2827 tree binfo, base_binfo;
2830 for (binfo = TYPE_BINFO (type), i = 0;
2831 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2832 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2835 /* And now merge the fields of structure. */
2836 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2838 if (TREE_CODE (field) == FIELD_DECL
2839 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2843 /* Just for use if some languages passes arrays by value. */
2844 else if (TREE_CODE (type) == ARRAY_TYPE)
2846 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2855 /* Gives the alignment boundary, in bits, of an argument with the
2856 specified mode and type. */
2859 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2863 align = TYPE_ALIGN (type);
2865 align = GET_MODE_ALIGNMENT (mode);
2866 if (align < PARM_BOUNDARY)
2867 align = PARM_BOUNDARY;
2870 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2871 make an exception for SSE modes since these require 128bit
2874 The handling here differs from field_alignment. ICC aligns MMX
2875 arguments to 4 byte boundaries, while structure fields are aligned
2876 to 8 byte boundaries. */
2878 align = PARM_BOUNDARY;
2881 if (!SSE_REG_MODE_P (mode))
2882 align = PARM_BOUNDARY;
2886 if (!contains_128bit_aligned_vector_p (type))
2887 align = PARM_BOUNDARY;
2895 /* Return true if N is a possible register number of function value. */
2897 ix86_function_value_regno_p (int regno)
2901 return ((regno) == 0
2902 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2903 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2905 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2906 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2907 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2910 /* Define how to find the value returned by a function.
2911 VALTYPE is the data type of the value (as a tree).
2912 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2913 otherwise, FUNC is 0. */
2915 ix86_function_value (tree valtype)
2919 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2920 REGPARM_MAX, SSE_REGPARM_MAX,
2921 x86_64_int_return_registers, 0);
2922 /* For zero sized structures, construct_container return NULL, but we need
2923 to keep rest of compiler happy by returning meaningful value. */
2925 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2929 return gen_rtx_REG (TYPE_MODE (valtype),
2930 ix86_value_regno (TYPE_MODE (valtype)));
2933 /* Return false iff type is returned in memory. */
2935 ix86_return_in_memory (tree type)
2937 int needed_intregs, needed_sseregs, size;
2938 enum machine_mode mode = TYPE_MODE (type);
2941 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2943 if (mode == BLKmode)
2946 size = int_size_in_bytes (type);
2948 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2951 if (VECTOR_MODE_P (mode) || mode == TImode)
2953 /* User-created vectors small enough to fit in EAX. */
2957 /* MMX/3dNow values are returned on the stack, since we've
2958 got to EMMS/FEMMS before returning. */
2962 /* SSE values are returned in XMM0, except when it doesn't exist. */
2964 return (TARGET_SSE ? 0 : 1);
2975 /* When returning SSE vector types, we have a choice of either
2976 (1) being abi incompatible with a -march switch, or
2977 (2) generating an error.
2978 Given no good solution, I think the safest thing is one warning.
2979 The user won't be able to use -Werror, but....
2981 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2982 called in response to actually generating a caller or callee that
2983 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2984 via aggregate_value_p for general type probing from tree-ssa. */
2987 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2991 if (!TARGET_SSE && type && !warned)
2993 /* Look at the return type of the function, not the function type. */
2994 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
2997 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3000 warning ("SSE vector return without SSE enabled changes the ABI");
3007 /* Define how to find the value returned by a library function
3008 assuming the value has mode MODE. */
3010 ix86_libcall_value (enum machine_mode mode)
3021 return gen_rtx_REG (mode, FIRST_SSE_REG);
3023 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3028 return gen_rtx_REG (mode, 0);
3032 return gen_rtx_REG (mode, ix86_value_regno (mode));
3035 /* Given a mode, return the register to use for a return value. */
3038 ix86_value_regno (enum machine_mode mode)
3040 /* Floating point return values in %st(0). */
3041 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3042 return FIRST_FLOAT_REG;
3043 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3044 we prevent this case when sse is not available. */
3045 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3046 return FIRST_SSE_REG;
3047 /* Everything else in %eax. */
3051 /* Create the va_list data type. */
3054 ix86_build_builtin_va_list (void)
3056 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3058 /* For i386 we use plain pointer to argument area. */
3060 return build_pointer_type (char_type_node);
3062 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3063 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3065 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3066 unsigned_type_node);
3067 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3068 unsigned_type_node);
3069 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3071 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3074 DECL_FIELD_CONTEXT (f_gpr) = record;
3075 DECL_FIELD_CONTEXT (f_fpr) = record;
3076 DECL_FIELD_CONTEXT (f_ovf) = record;
3077 DECL_FIELD_CONTEXT (f_sav) = record;
3079 TREE_CHAIN (record) = type_decl;
3080 TYPE_NAME (record) = type_decl;
3081 TYPE_FIELDS (record) = f_gpr;
3082 TREE_CHAIN (f_gpr) = f_fpr;
3083 TREE_CHAIN (f_fpr) = f_ovf;
3084 TREE_CHAIN (f_ovf) = f_sav;
3086 layout_type (record);
3088 /* The correct type is an array type of one element. */
3089 return build_array_type (record, build_index_type (size_zero_node));
3092 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3095 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3096 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3099 CUMULATIVE_ARGS next_cum;
3100 rtx save_area = NULL_RTX, mem;
3113 /* Indicate to allocate space on the stack for varargs save area. */
3114 ix86_save_varrargs_registers = 1;
3116 cfun->stack_alignment_needed = 128;
3118 fntype = TREE_TYPE (current_function_decl);
3119 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3120 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3121 != void_type_node));
3123 /* For varargs, we do not want to skip the dummy va_dcl argument.
3124 For stdargs, we do want to skip the last named argument. */
3127 function_arg_advance (&next_cum, mode, type, 1);
3130 save_area = frame_pointer_rtx;
3132 set = get_varargs_alias_set ();
3134 for (i = next_cum.regno; i < ix86_regparm; i++)
3136 mem = gen_rtx_MEM (Pmode,
3137 plus_constant (save_area, i * UNITS_PER_WORD));
3138 set_mem_alias_set (mem, set);
3139 emit_move_insn (mem, gen_rtx_REG (Pmode,
3140 x86_64_int_parameter_registers[i]));
3143 if (next_cum.sse_nregs)
3145 /* Now emit code to save SSE registers. The AX parameter contains number
3146 of SSE parameter registers used to call this function. We use
3147 sse_prologue_save insn template that produces computed jump across
3148 SSE saves. We need some preparation work to get this working. */
3150 label = gen_label_rtx ();
3151 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3153 /* Compute address to jump to :
3154 label - 5*eax + nnamed_sse_arguments*5 */
3155 tmp_reg = gen_reg_rtx (Pmode);
3156 nsse_reg = gen_reg_rtx (Pmode);
3157 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3158 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3159 gen_rtx_MULT (Pmode, nsse_reg,
3161 if (next_cum.sse_regno)
3164 gen_rtx_CONST (DImode,
3165 gen_rtx_PLUS (DImode,
3167 GEN_INT (next_cum.sse_regno * 4))));
3169 emit_move_insn (nsse_reg, label_ref);
3170 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3172 /* Compute address of memory block we save into. We always use pointer
3173 pointing 127 bytes after first byte to store - this is needed to keep
3174 instruction size limited by 4 bytes. */
3175 tmp_reg = gen_reg_rtx (Pmode);
3176 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3177 plus_constant (save_area,
3178 8 * REGPARM_MAX + 127)));
3179 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3180 set_mem_alias_set (mem, set);
3181 set_mem_align (mem, BITS_PER_WORD);
3183 /* And finally do the dirty job! */
3184 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3185 GEN_INT (next_cum.sse_regno), label));
3190 /* Implement va_start. */
3193 ix86_va_start (tree valist, rtx nextarg)
3195 HOST_WIDE_INT words, n_gpr, n_fpr;
3196 tree f_gpr, f_fpr, f_ovf, f_sav;
3197 tree gpr, fpr, ovf, sav, t;
3199 /* Only 64bit target needs something special. */
3202 std_expand_builtin_va_start (valist, nextarg);
3206 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3207 f_fpr = TREE_CHAIN (f_gpr);
3208 f_ovf = TREE_CHAIN (f_fpr);
3209 f_sav = TREE_CHAIN (f_ovf);
3211 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3212 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3213 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3214 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3215 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3217 /* Count number of gp and fp argument registers used. */
3218 words = current_function_args_info.words;
3219 n_gpr = current_function_args_info.regno;
3220 n_fpr = current_function_args_info.sse_regno;
3222 if (TARGET_DEBUG_ARG)
3223 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3224 (int) words, (int) n_gpr, (int) n_fpr);
3226 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3227 build_int_2 (n_gpr * 8, 0));
3228 TREE_SIDE_EFFECTS (t) = 1;
3229 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3231 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3232 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3233 TREE_SIDE_EFFECTS (t) = 1;
3234 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3236 /* Find the overflow area. */
3237 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3239 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3240 build_int_2 (words * UNITS_PER_WORD, 0));
3241 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3242 TREE_SIDE_EFFECTS (t) = 1;
3243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3245 /* Find the register save area.
3246 Prologue of the function save it right above stack frame. */
3247 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3248 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3249 TREE_SIDE_EFFECTS (t) = 1;
3250 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3253 /* Implement va_arg. */
3256 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3258 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3259 tree f_gpr, f_fpr, f_ovf, f_sav;
3260 tree gpr, fpr, ovf, sav, t;
3262 tree lab_false, lab_over = NULL_TREE;
3268 /* Only 64bit target needs something special. */
3270 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3272 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3273 f_fpr = TREE_CHAIN (f_gpr);
3274 f_ovf = TREE_CHAIN (f_fpr);
3275 f_sav = TREE_CHAIN (f_ovf);
3277 valist = build_fold_indirect_ref (valist);
3278 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3279 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3280 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3281 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3283 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3285 type = build_pointer_type (type);
3286 size = int_size_in_bytes (type);
3287 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3289 container = construct_container (TYPE_MODE (type), type, 0,
3290 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3292 * Pull the value out of the saved registers ...
3295 addr = create_tmp_var (ptr_type_node, "addr");
3296 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3300 int needed_intregs, needed_sseregs;
3302 tree int_addr, sse_addr;
3304 lab_false = create_artificial_label ();
3305 lab_over = create_artificial_label ();
3307 examine_argument (TYPE_MODE (type), type, 0,
3308 &needed_intregs, &needed_sseregs);
3310 need_temp = (!REG_P (container)
3311 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3312 || TYPE_ALIGN (type) > 128));
3314 /* In case we are passing structure, verify that it is consecutive block
3315 on the register save area. If not we need to do moves. */
3316 if (!need_temp && !REG_P (container))
3318 /* Verify that all registers are strictly consecutive */
3319 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3323 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3325 rtx slot = XVECEXP (container, 0, i);
3326 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3327 || INTVAL (XEXP (slot, 1)) != i * 16)
3335 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3337 rtx slot = XVECEXP (container, 0, i);
3338 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3339 || INTVAL (XEXP (slot, 1)) != i * 8)
3351 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3352 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3353 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3354 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3356 /* First ensure that we fit completely in registers. */
3359 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3360 TREE_TYPE (t) = TREE_TYPE (gpr);
3361 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3362 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3363 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3364 gimplify_and_add (t, pre_p);
3368 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3369 + REGPARM_MAX * 8, 0);
3370 TREE_TYPE (t) = TREE_TYPE (fpr);
3371 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3372 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3373 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3374 gimplify_and_add (t, pre_p);
3377 /* Compute index to start of area used for integer regs. */
3380 /* int_addr = gpr + sav; */
3381 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3382 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3383 gimplify_and_add (t, pre_p);
3387 /* sse_addr = fpr + sav; */
3388 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3389 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3390 gimplify_and_add (t, pre_p);
3395 tree temp = create_tmp_var (type, "va_arg_tmp");
3398 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3399 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3400 gimplify_and_add (t, pre_p);
3402 for (i = 0; i < XVECLEN (container, 0); i++)
3404 rtx slot = XVECEXP (container, 0, i);
3405 rtx reg = XEXP (slot, 0);
3406 enum machine_mode mode = GET_MODE (reg);
3407 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3408 tree addr_type = build_pointer_type (piece_type);
3411 tree dest_addr, dest;
3413 if (SSE_REGNO_P (REGNO (reg)))
3415 src_addr = sse_addr;
3416 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3420 src_addr = int_addr;
3421 src_offset = REGNO (reg) * 8;
3423 src_addr = fold_convert (addr_type, src_addr);
3424 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3425 size_int (src_offset)));
3426 src = build_fold_indirect_ref (src_addr);
3428 dest_addr = fold_convert (addr_type, addr);
3429 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3430 size_int (INTVAL (XEXP (slot, 1)))));
3431 dest = build_fold_indirect_ref (dest_addr);
3433 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3434 gimplify_and_add (t, pre_p);
3440 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3441 build_int_2 (needed_intregs * 8, 0));
3442 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3443 gimplify_and_add (t, pre_p);
3448 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3449 build_int_2 (needed_sseregs * 16, 0));
3450 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3451 gimplify_and_add (t, pre_p);
3454 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3455 gimplify_and_add (t, pre_p);
3457 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3458 append_to_statement_list (t, pre_p);
3461 /* ... otherwise out of the overflow area. */
3463 /* Care for on-stack alignment if needed. */
3464 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3468 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3469 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3470 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3472 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3474 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3475 gimplify_and_add (t2, pre_p);
3477 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3478 build_int_2 (rsize * UNITS_PER_WORD, 0));
3479 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3480 gimplify_and_add (t, pre_p);
3484 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3485 append_to_statement_list (t, pre_p);
3488 ptrtype = build_pointer_type (type);
3489 addr = fold_convert (ptrtype, addr);
3492 addr = build_fold_indirect_ref (addr);
3493 return build_fold_indirect_ref (addr);
3496 /* Return nonzero if OP is either a i387 or SSE fp register. */
3498 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3500 return ANY_FP_REG_P (op);
3503 /* Return nonzero if OP is an i387 fp register. */
3505 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3507 return FP_REG_P (op);
3510 /* Return nonzero if OP is a non-fp register_operand. */
3512 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3514 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3517 /* Return nonzero if OP is a register operand other than an
3518 i387 fp register. */
3520 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3522 return register_operand (op, mode) && !FP_REG_P (op);
3525 /* Return nonzero if OP is general operand representable on x86_64. */
3528 x86_64_general_operand (rtx op, enum machine_mode mode)
3531 return general_operand (op, mode);
3532 if (nonimmediate_operand (op, mode))
3534 return x86_64_sign_extended_value (op);
3537 /* Return nonzero if OP is general operand representable on x86_64
3538 as either sign extended or zero extended constant. */
3541 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3544 return general_operand (op, mode);
3545 if (nonimmediate_operand (op, mode))
3547 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3550 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3553 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3556 return nonmemory_operand (op, mode);
3557 if (register_operand (op, mode))
3559 return x86_64_sign_extended_value (op);
3562 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3565 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3567 if (!TARGET_64BIT || !flag_pic)
3568 return nonmemory_operand (op, mode);
3569 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3571 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3576 /* Return nonzero if OPNUM's MEM should be matched
3577 in movabs* patterns. */
3580 ix86_check_movabs (rtx insn, int opnum)
3584 set = PATTERN (insn);
3585 if (GET_CODE (set) == PARALLEL)
3586 set = XVECEXP (set, 0, 0);
3587 if (GET_CODE (set) != SET)
3589 mem = XEXP (set, opnum);
3590 while (GET_CODE (mem) == SUBREG)
3591 mem = SUBREG_REG (mem);
3592 if (GET_CODE (mem) != MEM)
3594 return (volatile_ok || !MEM_VOLATILE_P (mem));
3597 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3600 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3603 return nonmemory_operand (op, mode);
3604 if (register_operand (op, mode))
3606 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3609 /* Return nonzero if OP is immediate operand representable on x86_64. */
3612 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3615 return immediate_operand (op, mode);
3616 return x86_64_sign_extended_value (op);
3619 /* Return nonzero if OP is immediate operand representable on x86_64. */
3622 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3624 return x86_64_zero_extended_value (op);
3627 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3628 for shift & compare patterns, as shifting by 0 does not change flags),
3629 else return zero. */
3632 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3634 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3637 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3638 reference and a constant. */
3641 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3643 switch (GET_CODE (op))
3651 if (GET_CODE (op) == SYMBOL_REF
3652 || GET_CODE (op) == LABEL_REF
3653 || (GET_CODE (op) == UNSPEC
3654 && (XINT (op, 1) == UNSPEC_GOT
3655 || XINT (op, 1) == UNSPEC_GOTOFF
3656 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3658 if (GET_CODE (op) != PLUS
3659 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3663 if (GET_CODE (op) == SYMBOL_REF
3664 || GET_CODE (op) == LABEL_REF)
3666 /* Only @GOTOFF gets offsets. */
3667 if (GET_CODE (op) != UNSPEC
3668 || XINT (op, 1) != UNSPEC_GOTOFF)
3671 op = XVECEXP (op, 0, 0);
3672 if (GET_CODE (op) == SYMBOL_REF
3673 || GET_CODE (op) == LABEL_REF)
3682 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3685 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3687 if (GET_CODE (op) != CONST)
3692 if (GET_CODE (op) == UNSPEC
3693 && XINT (op, 1) == UNSPEC_GOTPCREL)
3695 if (GET_CODE (op) == PLUS
3696 && GET_CODE (XEXP (op, 0)) == UNSPEC
3697 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3702 if (GET_CODE (op) == UNSPEC)
3704 if (GET_CODE (op) != PLUS
3705 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3708 if (GET_CODE (op) == UNSPEC)
3714 /* Return true if OP is a symbolic operand that resolves locally. */
3717 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3719 if (GET_CODE (op) == CONST
3720 && GET_CODE (XEXP (op, 0)) == PLUS
3721 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3722 op = XEXP (XEXP (op, 0), 0);
3724 if (GET_CODE (op) == LABEL_REF)
3727 if (GET_CODE (op) != SYMBOL_REF)
3730 if (SYMBOL_REF_LOCAL_P (op))
3733 /* There is, however, a not insubstantial body of code in the rest of
3734 the compiler that assumes it can just stick the results of
3735 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3736 /* ??? This is a hack. Should update the body of the compiler to
3737 always create a DECL an invoke targetm.encode_section_info. */
3738 if (strncmp (XSTR (op, 0), internal_label_prefix,
3739 internal_label_prefix_len) == 0)
3745 /* Test for various thread-local symbols. */
3748 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3750 if (GET_CODE (op) != SYMBOL_REF)
3752 return SYMBOL_REF_TLS_MODEL (op);
3756 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3758 if (GET_CODE (op) != SYMBOL_REF)
3760 return SYMBOL_REF_TLS_MODEL (op) == kind;
3764 global_dynamic_symbolic_operand (rtx op,
3765 enum machine_mode mode ATTRIBUTE_UNUSED)
3767 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3771 local_dynamic_symbolic_operand (rtx op,
3772 enum machine_mode mode ATTRIBUTE_UNUSED)
3774 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3778 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3780 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3784 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3786 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3789 /* Test for a valid operand for a call instruction. Don't allow the
3790 arg pointer register or virtual regs since they may decay into
3791 reg + const, which the patterns can't handle. */
3794 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3796 /* Disallow indirect through a virtual register. This leads to
3797 compiler aborts when trying to eliminate them. */
3798 if (GET_CODE (op) == REG
3799 && (op == arg_pointer_rtx
3800 || op == frame_pointer_rtx
3801 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3802 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3805 /* Disallow `call 1234'. Due to varying assembler lameness this
3806 gets either rejected or translated to `call .+1234'. */
3807 if (GET_CODE (op) == CONST_INT)
3810 /* Explicitly allow SYMBOL_REF even if pic. */
3811 if (GET_CODE (op) == SYMBOL_REF)
3814 /* Otherwise we can allow any general_operand in the address. */
3815 return general_operand (op, Pmode);
3818 /* Test for a valid operand for a call instruction. Don't allow the
3819 arg pointer register or virtual regs since they may decay into
3820 reg + const, which the patterns can't handle. */
3823 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3825 /* Disallow indirect through a virtual register. This leads to
3826 compiler aborts when trying to eliminate them. */
3827 if (GET_CODE (op) == REG
3828 && (op == arg_pointer_rtx
3829 || op == frame_pointer_rtx
3830 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3831 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3834 /* Explicitly allow SYMBOL_REF even if pic. */
3835 if (GET_CODE (op) == SYMBOL_REF)
3838 /* Otherwise we can only allow register operands. */
3839 return register_operand (op, Pmode);
3843 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3845 if (GET_CODE (op) == CONST
3846 && GET_CODE (XEXP (op, 0)) == PLUS
3847 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3848 op = XEXP (XEXP (op, 0), 0);
3849 return GET_CODE (op) == SYMBOL_REF;
3852 /* Match exactly zero and one. */
3855 const0_operand (rtx op, enum machine_mode mode)
3857 return op == CONST0_RTX (mode);
3861 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3863 return op == const1_rtx;
3866 /* Match 2, 4, or 8. Used for leal multiplicands. */
3869 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3871 return (GET_CODE (op) == CONST_INT
3872 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3876 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3878 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3882 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3884 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3888 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3890 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3894 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3896 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3900 /* True if this is a constant appropriate for an increment or decrement. */
3903 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3905 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3906 registers, since carry flag is not set. */
3907 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3909 return op == const1_rtx || op == constm1_rtx;
3912 /* Return nonzero if OP is acceptable as operand of DImode shift
3916 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3919 return nonimmediate_operand (op, mode);
3921 return register_operand (op, mode);
3924 /* Return false if this is the stack pointer, or any other fake
3925 register eliminable to the stack pointer. Otherwise, this is
3928 This is used to prevent esp from being used as an index reg.
3929 Which would only happen in pathological cases. */
3932 reg_no_sp_operand (rtx op, enum machine_mode mode)
3935 if (GET_CODE (t) == SUBREG)
3937 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3940 return register_operand (op, mode);
3944 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3946 return MMX_REG_P (op);
3949 /* Return false if this is any eliminable register. Otherwise
3953 general_no_elim_operand (rtx op, enum machine_mode mode)
3956 if (GET_CODE (t) == SUBREG)
3958 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3959 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3960 || t == virtual_stack_dynamic_rtx)
3963 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3964 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3967 return general_operand (op, mode);
3970 /* Return false if this is any eliminable register. Otherwise
3971 register_operand or const_int. */
3974 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3977 if (GET_CODE (t) == SUBREG)
3979 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3980 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3981 || t == virtual_stack_dynamic_rtx)
3984 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3987 /* Return false if this is any eliminable register or stack register,
3988 otherwise work like register_operand. */
3991 index_register_operand (rtx op, enum machine_mode mode)
3994 if (GET_CODE (t) == SUBREG)
3998 if (t == arg_pointer_rtx
3999 || t == frame_pointer_rtx
4000 || t == virtual_incoming_args_rtx
4001 || t == virtual_stack_vars_rtx
4002 || t == virtual_stack_dynamic_rtx
4003 || REGNO (t) == STACK_POINTER_REGNUM)
4006 return general_operand (op, mode);
4009 /* Return true if op is a Q_REGS class register. */
4012 q_regs_operand (rtx op, enum machine_mode mode)
4014 if (mode != VOIDmode && GET_MODE (op) != mode)
4016 if (GET_CODE (op) == SUBREG)
4017 op = SUBREG_REG (op);
4018 return ANY_QI_REG_P (op);
4021 /* Return true if op is an flags register. */
4024 flags_reg_operand (rtx op, enum machine_mode mode)
4026 if (mode != VOIDmode && GET_MODE (op) != mode)
4028 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
4031 /* Return true if op is a NON_Q_REGS class register. */
4034 non_q_regs_operand (rtx op, enum machine_mode mode)
4036 if (mode != VOIDmode && GET_MODE (op) != mode)
4038 if (GET_CODE (op) == SUBREG)
4039 op = SUBREG_REG (op);
4040 return NON_QI_REG_P (op);
4044 zero_extended_scalar_load_operand (rtx op,
4045 enum machine_mode mode ATTRIBUTE_UNUSED)
4048 if (GET_CODE (op) != MEM)
4050 op = maybe_get_pool_constant (op);
4053 if (GET_CODE (op) != CONST_VECTOR)
4056 (GET_MODE_SIZE (GET_MODE (op)) /
4057 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4058 for (n_elts--; n_elts > 0; n_elts--)
4060 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4061 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4067 /* Return 1 when OP is operand acceptable for standard SSE move. */
4069 vector_move_operand (rtx op, enum machine_mode mode)
4071 if (nonimmediate_operand (op, mode))
4073 if (GET_MODE (op) != mode && mode != VOIDmode)
4075 return (op == CONST0_RTX (GET_MODE (op)));
4078 /* Return true if op if a valid address, and does not contain
4079 a segment override. */
4082 no_seg_address_operand (rtx op, enum machine_mode mode)
4084 struct ix86_address parts;
4086 if (! address_operand (op, mode))
4089 if (! ix86_decompose_address (op, &parts))
4092 return parts.seg == SEG_DEFAULT;
4095 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4098 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4100 enum rtx_code code = GET_CODE (op);
4103 /* Operations supported directly. */
4113 /* These are equivalent to ones above in non-IEEE comparisons. */
4120 return !TARGET_IEEE_FP;
4125 /* Return 1 if OP is a valid comparison operator in valid mode. */
4127 ix86_comparison_operator (rtx op, enum machine_mode mode)
4129 enum machine_mode inmode;
4130 enum rtx_code code = GET_CODE (op);
4131 if (mode != VOIDmode && GET_MODE (op) != mode)
4133 if (!COMPARISON_P (op))
4135 inmode = GET_MODE (XEXP (op, 0));
4137 if (inmode == CCFPmode || inmode == CCFPUmode)
4139 enum rtx_code second_code, bypass_code;
4140 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4141 return (bypass_code == NIL && second_code == NIL);
4148 if (inmode == CCmode || inmode == CCGCmode
4149 || inmode == CCGOCmode || inmode == CCNOmode)
4152 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4153 if (inmode == CCmode)
4157 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4165 /* Return 1 if OP is a valid comparison operator testing carry flag
4168 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4170 enum machine_mode inmode;
4171 enum rtx_code code = GET_CODE (op);
4173 if (mode != VOIDmode && GET_MODE (op) != mode)
4175 if (!COMPARISON_P (op))
4177 inmode = GET_MODE (XEXP (op, 0));
4178 if (GET_CODE (XEXP (op, 0)) != REG
4179 || REGNO (XEXP (op, 0)) != 17
4180 || XEXP (op, 1) != const0_rtx)
4183 if (inmode == CCFPmode || inmode == CCFPUmode)
4185 enum rtx_code second_code, bypass_code;
4187 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4188 if (bypass_code != NIL || second_code != NIL)
4190 code = ix86_fp_compare_code_to_integer (code);
4192 else if (inmode != CCmode)
4197 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4200 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4202 enum machine_mode inmode;
4203 enum rtx_code code = GET_CODE (op);
4205 if (mode != VOIDmode && GET_MODE (op) != mode)
4207 if (!COMPARISON_P (op))
4209 inmode = GET_MODE (XEXP (op, 0));
4210 if (inmode == CCFPmode || inmode == CCFPUmode)
4212 enum rtx_code second_code, bypass_code;
4214 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4215 if (bypass_code != NIL || second_code != NIL)
4217 code = ix86_fp_compare_code_to_integer (code);
4219 /* i387 supports just limited amount of conditional codes. */
4222 case LTU: case GTU: case LEU: case GEU:
4223 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4226 case ORDERED: case UNORDERED:
4234 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4237 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4239 switch (GET_CODE (op))
4242 /* Modern CPUs have same latency for HImode and SImode multiply,
4243 but 386 and 486 do HImode multiply faster. */
4244 return ix86_tune > PROCESSOR_I486;
4256 /* Nearly general operand, but accept any const_double, since we wish
4257 to be able to drop them into memory rather than have them get pulled
4261 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4263 if (mode != VOIDmode && mode != GET_MODE (op))
4265 if (GET_CODE (op) == CONST_DOUBLE)
4267 return general_operand (op, mode);
4270 /* Match an SI or HImode register for a zero_extract. */
4273 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4276 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4277 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4280 if (!register_operand (op, VOIDmode))
4283 /* Be careful to accept only registers having upper parts. */
4284 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4285 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4288 /* Return 1 if this is a valid binary floating-point operation.
4289 OP is the expression matched, and MODE is its mode. */
4292 binary_fp_operator (rtx op, enum machine_mode mode)
4294 if (mode != VOIDmode && mode != GET_MODE (op))
4297 switch (GET_CODE (op))
4303 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4311 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4313 return GET_CODE (op) == MULT;
4317 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4319 return GET_CODE (op) == DIV;
4323 arith_or_logical_operator (rtx op, enum machine_mode mode)
4325 return ((mode == VOIDmode || GET_MODE (op) == mode)
4326 && ARITHMETIC_P (op));
4329 /* Returns 1 if OP is memory operand with a displacement. */
4332 memory_displacement_operand (rtx op, enum machine_mode mode)
4334 struct ix86_address parts;
4336 if (! memory_operand (op, mode))
4339 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4342 return parts.disp != NULL_RTX;
4345 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4346 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4348 ??? It seems likely that this will only work because cmpsi is an
4349 expander, and no actual insns use this. */
4352 cmpsi_operand (rtx op, enum machine_mode mode)
4354 if (nonimmediate_operand (op, mode))
4357 if (GET_CODE (op) == AND
4358 && GET_MODE (op) == SImode
4359 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4360 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4361 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4362 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4363 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4364 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4370 /* Returns 1 if OP is memory operand that can not be represented by the
4374 long_memory_operand (rtx op, enum machine_mode mode)
4376 if (! memory_operand (op, mode))
4379 return memory_address_length (op) != 0;
4382 /* Return nonzero if the rtx is known aligned. */
4385 aligned_operand (rtx op, enum machine_mode mode)
4387 struct ix86_address parts;
4389 if (!general_operand (op, mode))
4392 /* Registers and immediate operands are always "aligned". */
4393 if (GET_CODE (op) != MEM)
4396 /* Don't even try to do any aligned optimizations with volatiles. */
4397 if (MEM_VOLATILE_P (op))
4402 /* Pushes and pops are only valid on the stack pointer. */
4403 if (GET_CODE (op) == PRE_DEC
4404 || GET_CODE (op) == POST_INC)
4407 /* Decode the address. */
4408 if (! ix86_decompose_address (op, &parts))
4411 /* Look for some component that isn't known to be aligned. */
4415 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4420 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4425 if (GET_CODE (parts.disp) != CONST_INT
4426 || (INTVAL (parts.disp) & 3) != 0)
4430 /* Didn't find one -- this must be an aligned address. */
4434 /* Initialize the table of extra 80387 mathematical constants. */
4437 init_ext_80387_constants (void)
4439 static const char * cst[5] =
4441 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4442 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4443 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4444 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4445 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4449 for (i = 0; i < 5; i++)
4451 real_from_string (&ext_80387_constants_table[i], cst[i]);
4452 /* Ensure each constant is rounded to XFmode precision. */
4453 real_convert (&ext_80387_constants_table[i],
4454 XFmode, &ext_80387_constants_table[i]);
4457 ext_80387_constants_init = 1;
4460 /* Return true if the constant is something that can be loaded with
4461 a special instruction. */
4464 standard_80387_constant_p (rtx x)
4466 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4469 if (x == CONST0_RTX (GET_MODE (x)))
4471 if (x == CONST1_RTX (GET_MODE (x)))
4474 /* For XFmode constants, try to find a special 80387 instruction when
4475 optimizing for size or on those CPUs that benefit from them. */
4476 if (GET_MODE (x) == XFmode
4477 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4482 if (! ext_80387_constants_init)
4483 init_ext_80387_constants ();
4485 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4486 for (i = 0; i < 5; i++)
4487 if (real_identical (&r, &ext_80387_constants_table[i]))
4494 /* Return the opcode of the special instruction to be used to load
4498 standard_80387_constant_opcode (rtx x)
4500 switch (standard_80387_constant_p (x))
4520 /* Return the CONST_DOUBLE representing the 80387 constant that is
4521 loaded by the specified special instruction. The argument IDX
4522 matches the return value from standard_80387_constant_p. */
4525 standard_80387_constant_rtx (int idx)
4529 if (! ext_80387_constants_init)
4530 init_ext_80387_constants ();
4546 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4550 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4553 standard_sse_constant_p (rtx x)
4555 if (x == const0_rtx)
4557 return (x == CONST0_RTX (GET_MODE (x)));
4560 /* Returns 1 if OP contains a symbol reference */
4563 symbolic_reference_mentioned_p (rtx op)
4568 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4571 fmt = GET_RTX_FORMAT (GET_CODE (op));
4572 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4578 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4579 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4583 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4590 /* Return 1 if it is appropriate to emit `ret' instructions in the
4591 body of a function. Do this only if the epilogue is simple, needing a
4592 couple of insns. Prior to reloading, we can't tell how many registers
4593 must be saved, so return 0 then. Return 0 if there is no frame
4594 marker to de-allocate.
4596 If NON_SAVING_SETJMP is defined and true, then it is not possible
4597 for the epilogue to be simple, so return 0. This is a special case
4598 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4599 until final, but jump_optimize may need to know sooner if a
4603 ix86_can_use_return_insn_p (void)
4605 struct ix86_frame frame;
4607 #ifdef NON_SAVING_SETJMP
4608 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4612 if (! reload_completed || frame_pointer_needed)
4615 /* Don't allow more than 32 pop, since that's all we can do
4616 with one instruction. */
4617 if (current_function_pops_args
4618 && current_function_args_size >= 32768)
4621 ix86_compute_frame_layout (&frame);
4622 return frame.to_allocate == 0 && frame.nregs == 0;
4625 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4627 x86_64_sign_extended_value (rtx value)
4629 switch (GET_CODE (value))
4631 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4632 to be at least 32 and this all acceptable constants are
4633 represented as CONST_INT. */
4635 if (HOST_BITS_PER_WIDE_INT == 32)
4639 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4640 return trunc_int_for_mode (val, SImode) == val;
4644 /* For certain code models, the symbolic references are known to fit.
4645 in CM_SMALL_PIC model we know it fits if it is local to the shared
4646 library. Don't count TLS SYMBOL_REFs here, since they should fit
4647 only if inside of UNSPEC handled below. */
4649 /* TLS symbols are not constant. */
4650 if (tls_symbolic_operand (value, Pmode))
4652 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4654 /* For certain code models, the code is near as well. */
4656 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4657 || ix86_cmodel == CM_KERNEL);
4659 /* We also may accept the offsetted memory references in certain special
4662 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4663 switch (XINT (XEXP (value, 0), 1))
4665 case UNSPEC_GOTPCREL:
4667 case UNSPEC_GOTNTPOFF:
4673 if (GET_CODE (XEXP (value, 0)) == PLUS)
4675 rtx op1 = XEXP (XEXP (value, 0), 0);
4676 rtx op2 = XEXP (XEXP (value, 0), 1);
4677 HOST_WIDE_INT offset;
4679 if (ix86_cmodel == CM_LARGE)
4681 if (GET_CODE (op2) != CONST_INT)
4683 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4684 switch (GET_CODE (op1))
4687 /* For CM_SMALL assume that latest object is 16MB before
4688 end of 31bits boundary. We may also accept pretty
4689 large negative constants knowing that all objects are
4690 in the positive half of address space. */
4691 if (ix86_cmodel == CM_SMALL
4692 && offset < 16*1024*1024
4693 && trunc_int_for_mode (offset, SImode) == offset)
4695 /* For CM_KERNEL we know that all object resist in the
4696 negative half of 32bits address space. We may not
4697 accept negative offsets, since they may be just off
4698 and we may accept pretty large positive ones. */
4699 if (ix86_cmodel == CM_KERNEL
4701 && trunc_int_for_mode (offset, SImode) == offset)
4705 /* These conditions are similar to SYMBOL_REF ones, just the
4706 constraints for code models differ. */
4707 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4708 && offset < 16*1024*1024
4709 && trunc_int_for_mode (offset, SImode) == offset)
4711 if (ix86_cmodel == CM_KERNEL
4713 && trunc_int_for_mode (offset, SImode) == offset)
4717 switch (XINT (op1, 1))
4722 && trunc_int_for_mode (offset, SImode) == offset)
4736 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4738 x86_64_zero_extended_value (rtx value)
4740 switch (GET_CODE (value))
4743 if (HOST_BITS_PER_WIDE_INT == 32)
4744 return (GET_MODE (value) == VOIDmode
4745 && !CONST_DOUBLE_HIGH (value));
4749 if (HOST_BITS_PER_WIDE_INT == 32)
4750 return INTVAL (value) >= 0;
4752 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4755 /* For certain code models, the symbolic references are known to fit. */
4757 /* TLS symbols are not constant. */
4758 if (tls_symbolic_operand (value, Pmode))
4760 return ix86_cmodel == CM_SMALL;
4762 /* For certain code models, the code is near as well. */
4764 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4766 /* We also may accept the offsetted memory references in certain special
4769 if (GET_CODE (XEXP (value, 0)) == PLUS)
4771 rtx op1 = XEXP (XEXP (value, 0), 0);
4772 rtx op2 = XEXP (XEXP (value, 0), 1);
4774 if (ix86_cmodel == CM_LARGE)
4776 switch (GET_CODE (op1))
4780 /* For small code model we may accept pretty large positive
4781 offsets, since one bit is available for free. Negative
4782 offsets are limited by the size of NULL pointer area
4783 specified by the ABI. */
4784 if (ix86_cmodel == CM_SMALL
4785 && GET_CODE (op2) == CONST_INT
4786 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4787 && (trunc_int_for_mode (INTVAL (op2), SImode)
4790 /* ??? For the kernel, we may accept adjustment of
4791 -0x10000000, since we know that it will just convert
4792 negative address space to positive, but perhaps this
4793 is not worthwhile. */
4796 /* These conditions are similar to SYMBOL_REF ones, just the
4797 constraints for code models differ. */
4798 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4799 && GET_CODE (op2) == CONST_INT
4800 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4801 && (trunc_int_for_mode (INTVAL (op2), SImode)
4815 /* Value should be nonzero if functions must have frame pointers.
4816 Zero means the frame pointer need not be set up (and parms may
4817 be accessed via the stack pointer) in functions that seem suitable. */
4820 ix86_frame_pointer_required (void)
4822 /* If we accessed previous frames, then the generated code expects
4823 to be able to access the saved ebp value in our frame. */
4824 if (cfun->machine->accesses_prev_frame)
4827 /* Several x86 os'es need a frame pointer for other reasons,
4828 usually pertaining to setjmp. */
4829 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4832 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4833 the frame pointer by default. Turn it back on now if we've not
4834 got a leaf function. */
4835 if (TARGET_OMIT_LEAF_FRAME_POINTER
4836 && (!current_function_is_leaf))
4839 if (current_function_profile)
4845 /* Record that the current function accesses previous call frames. */
4848 ix86_setup_frame_addresses (void)
4850 cfun->machine->accesses_prev_frame = 1;
4853 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4854 # define USE_HIDDEN_LINKONCE 1
4856 # define USE_HIDDEN_LINKONCE 0
4859 static int pic_labels_used;
4861 /* Fills in the label name that should be used for a pc thunk for
4862 the given register. */
4865 get_pc_thunk_name (char name[32], unsigned int regno)
4867 if (USE_HIDDEN_LINKONCE)
4868 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4870 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4874 /* This function generates code for -fpic that loads %ebx with
4875 the return address of the caller and then returns. */
4878 ix86_file_end (void)
4883 for (regno = 0; regno < 8; ++regno)
4887 if (! ((pic_labels_used >> regno) & 1))
4890 get_pc_thunk_name (name, regno);
4892 if (USE_HIDDEN_LINKONCE)
4896 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4898 TREE_PUBLIC (decl) = 1;
4899 TREE_STATIC (decl) = 1;
4900 DECL_ONE_ONLY (decl) = 1;
4902 (*targetm.asm_out.unique_section) (decl, 0);
4903 named_section (decl, NULL, 0);
4905 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4906 fputs ("\t.hidden\t", asm_out_file);
4907 assemble_name (asm_out_file, name);
4908 fputc ('\n', asm_out_file);
4909 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4914 ASM_OUTPUT_LABEL (asm_out_file, name);
4917 xops[0] = gen_rtx_REG (SImode, regno);
4918 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4919 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4920 output_asm_insn ("ret", xops);
4923 if (NEED_INDICATE_EXEC_STACK)
4924 file_end_indicate_exec_stack ();
4927 /* Emit code for the SET_GOT patterns. */
4930 output_set_got (rtx dest)
4935 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4937 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4939 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4942 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4944 output_asm_insn ("call\t%a2", xops);
4947 /* Output the "canonical" label name ("Lxx$pb") here too. This
4948 is what will be referred to by the Mach-O PIC subsystem. */
4949 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4951 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4952 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4955 output_asm_insn ("pop{l}\t%0", xops);
4960 get_pc_thunk_name (name, REGNO (dest));
4961 pic_labels_used |= 1 << REGNO (dest);
4963 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4964 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4965 output_asm_insn ("call\t%X2", xops);
4968 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4969 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4970 else if (!TARGET_MACHO)
4971 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4976 /* Generate an "push" pattern for input ARG. */
4981 return gen_rtx_SET (VOIDmode,
4983 gen_rtx_PRE_DEC (Pmode,
4984 stack_pointer_rtx)),
4988 /* Return >= 0 if there is an unused call-clobbered register available
4989 for the entire function. */
4992 ix86_select_alt_pic_regnum (void)
4994 if (current_function_is_leaf && !current_function_profile)
4997 for (i = 2; i >= 0; --i)
4998 if (!regs_ever_live[i])
5002 return INVALID_REGNUM;
5005 /* Return 1 if we need to save REGNO. */
5007 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5009 if (pic_offset_table_rtx
5010 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5011 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5012 || current_function_profile
5013 || current_function_calls_eh_return
5014 || current_function_uses_const_pool))
5016 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5021 if (current_function_calls_eh_return && maybe_eh_return)
5026 unsigned test = EH_RETURN_DATA_REGNO (i);
5027 if (test == INVALID_REGNUM)
5034 return (regs_ever_live[regno]
5035 && !call_used_regs[regno]
5036 && !fixed_regs[regno]
5037 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5040 /* Return number of registers to be saved on the stack. */
5043 ix86_nsaved_regs (void)
5048 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5049 if (ix86_save_reg (regno, true))
5054 /* Return the offset between two registers, one to be eliminated, and the other
5055 its replacement, at the start of a routine. */
5058 ix86_initial_elimination_offset (int from, int to)
5060 struct ix86_frame frame;
5061 ix86_compute_frame_layout (&frame);
5063 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5064 return frame.hard_frame_pointer_offset;
5065 else if (from == FRAME_POINTER_REGNUM
5066 && to == HARD_FRAME_POINTER_REGNUM)
5067 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5070 if (to != STACK_POINTER_REGNUM)
5072 else if (from == ARG_POINTER_REGNUM)
5073 return frame.stack_pointer_offset;
5074 else if (from != FRAME_POINTER_REGNUM)
5077 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5081 /* Fill structure ix86_frame about frame of currently computed function. */
5084 ix86_compute_frame_layout (struct ix86_frame *frame)
5086 HOST_WIDE_INT total_size;
5087 unsigned int stack_alignment_needed;
5088 HOST_WIDE_INT offset;
5089 unsigned int preferred_alignment;
5090 HOST_WIDE_INT size = get_frame_size ();
5092 frame->nregs = ix86_nsaved_regs ();
5095 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5096 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5098 /* During reload iteration the amount of registers saved can change.
5099 Recompute the value as needed. Do not recompute when amount of registers
5100 didn't change as reload does mutiple calls to the function and does not
5101 expect the decision to change within single iteration. */
5103 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5105 int count = frame->nregs;
5107 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5108 /* The fast prologue uses move instead of push to save registers. This
5109 is significantly longer, but also executes faster as modern hardware
5110 can execute the moves in parallel, but can't do that for push/pop.
5112 Be careful about choosing what prologue to emit: When function takes
5113 many instructions to execute we may use slow version as well as in
5114 case function is known to be outside hot spot (this is known with
5115 feedback only). Weight the size of function by number of registers
5116 to save as it is cheap to use one or two push instructions but very
5117 slow to use many of them. */
5119 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5120 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5121 || (flag_branch_probabilities
5122 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5123 cfun->machine->use_fast_prologue_epilogue = false;
5125 cfun->machine->use_fast_prologue_epilogue
5126 = !expensive_function_p (count);
5128 if (TARGET_PROLOGUE_USING_MOVE
5129 && cfun->machine->use_fast_prologue_epilogue)
5130 frame->save_regs_using_mov = true;
5132 frame->save_regs_using_mov = false;
5135 /* Skip return address and saved base pointer. */
5136 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5138 frame->hard_frame_pointer_offset = offset;
5140 /* Do some sanity checking of stack_alignment_needed and
5141 preferred_alignment, since i386 port is the only using those features
5142 that may break easily. */
5144 if (size && !stack_alignment_needed)
5146 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5148 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5150 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5153 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5154 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5156 /* Register save area */
5157 offset += frame->nregs * UNITS_PER_WORD;
5160 if (ix86_save_varrargs_registers)
5162 offset += X86_64_VARARGS_SIZE;
5163 frame->va_arg_size = X86_64_VARARGS_SIZE;
5166 frame->va_arg_size = 0;
5168 /* Align start of frame for local function. */
5169 frame->padding1 = ((offset + stack_alignment_needed - 1)
5170 & -stack_alignment_needed) - offset;
5172 offset += frame->padding1;
5174 /* Frame pointer points here. */
5175 frame->frame_pointer_offset = offset;
5179 /* Add outgoing arguments area. Can be skipped if we eliminated
5180 all the function calls as dead code.
5181 Skipping is however impossible when function calls alloca. Alloca
5182 expander assumes that last current_function_outgoing_args_size
5183 of stack frame are unused. */
5184 if (ACCUMULATE_OUTGOING_ARGS
5185 && (!current_function_is_leaf || current_function_calls_alloca))
5187 offset += current_function_outgoing_args_size;
5188 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5191 frame->outgoing_arguments_size = 0;
5193 /* Align stack boundary. Only needed if we're calling another function
5195 if (!current_function_is_leaf || current_function_calls_alloca)
5196 frame->padding2 = ((offset + preferred_alignment - 1)
5197 & -preferred_alignment) - offset;
5199 frame->padding2 = 0;
5201 offset += frame->padding2;
5203 /* We've reached end of stack frame. */
5204 frame->stack_pointer_offset = offset;
5206 /* Size prologue needs to allocate. */
5207 frame->to_allocate =
5208 (size + frame->padding1 + frame->padding2
5209 + frame->outgoing_arguments_size + frame->va_arg_size);
5211 if ((!frame->to_allocate && frame->nregs <= 1)
5212 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5213 frame->save_regs_using_mov = false;
5215 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5216 && current_function_is_leaf)
5218 frame->red_zone_size = frame->to_allocate;
5219 if (frame->save_regs_using_mov)
5220 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5221 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5222 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5225 frame->red_zone_size = 0;
5226 frame->to_allocate -= frame->red_zone_size;
5227 frame->stack_pointer_offset -= frame->red_zone_size;
5229 fprintf (stderr, "nregs: %i\n", frame->nregs);
5230 fprintf (stderr, "size: %i\n", size);
5231 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5232 fprintf (stderr, "padding1: %i\n", frame->padding1);
5233 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5234 fprintf (stderr, "padding2: %i\n", frame->padding2);
5235 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5236 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5237 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5238 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5239 frame->hard_frame_pointer_offset);
5240 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5244 /* Emit code to save registers in the prologue. */
5247 ix86_emit_save_regs (void)
5252 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5253 if (ix86_save_reg (regno, true))
5255 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5256 RTX_FRAME_RELATED_P (insn) = 1;
5260 /* Emit code to save registers using MOV insns. First register
5261 is restored from POINTER + OFFSET. */
5263 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5268 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5269 if (ix86_save_reg (regno, true))
5271 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5273 gen_rtx_REG (Pmode, regno));
5274 RTX_FRAME_RELATED_P (insn) = 1;
5275 offset += UNITS_PER_WORD;
5279 /* Expand prologue or epilogue stack adjustment.
5280 The pattern exist to put a dependency on all ebp-based memory accesses.
5281 STYLE should be negative if instructions should be marked as frame related,
5282 zero if %r11 register is live and cannot be freely used and positive
5286 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5291 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5292 else if (x86_64_immediate_operand (offset, DImode))
5293 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5297 /* r11 is used by indirect sibcall return as well, set before the
5298 epilogue and used after the epilogue. ATM indirect sibcall
5299 shouldn't be used together with huge frame sizes in one
5300 function because of the frame_size check in sibcall.c. */
5303 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5304 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5306 RTX_FRAME_RELATED_P (insn) = 1;
5307 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5311 RTX_FRAME_RELATED_P (insn) = 1;
5314 /* Expand the prologue into a bunch of separate insns. */
5317 ix86_expand_prologue (void)
5321 struct ix86_frame frame;
5322 HOST_WIDE_INT allocate;
5324 ix86_compute_frame_layout (&frame);
5326 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5327 slower on all targets. Also sdb doesn't like it. */
5329 if (frame_pointer_needed)
5331 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5332 RTX_FRAME_RELATED_P (insn) = 1;
5334 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5335 RTX_FRAME_RELATED_P (insn) = 1;
5338 allocate = frame.to_allocate;
5340 if (!frame.save_regs_using_mov)
5341 ix86_emit_save_regs ();
5343 allocate += frame.nregs * UNITS_PER_WORD;
5345 /* When using red zone we may start register saving before allocating
5346 the stack frame saving one cycle of the prologue. */
5347 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5348 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5349 : stack_pointer_rtx,
5350 -frame.nregs * UNITS_PER_WORD);
5354 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5355 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5356 GEN_INT (-allocate), -1);
5359 /* Only valid for Win32. */
5360 rtx eax = gen_rtx_REG (SImode, 0);
5361 bool eax_live = ix86_eax_live_at_start_p ();
5368 emit_insn (gen_push (eax));
5372 insn = emit_move_insn (eax, GEN_INT (allocate));
5373 RTX_FRAME_RELATED_P (insn) = 1;
5375 insn = emit_insn (gen_allocate_stack_worker (eax));
5376 RTX_FRAME_RELATED_P (insn) = 1;
5380 rtx t = plus_constant (stack_pointer_rtx, allocate);
5381 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5385 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5387 if (!frame_pointer_needed || !frame.to_allocate)
5388 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5390 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5391 -frame.nregs * UNITS_PER_WORD);
5394 pic_reg_used = false;
5395 if (pic_offset_table_rtx
5396 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5397 || current_function_profile))
5399 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5401 if (alt_pic_reg_used != INVALID_REGNUM)
5402 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5404 pic_reg_used = true;
5409 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5411 /* Even with accurate pre-reload life analysis, we can wind up
5412 deleting all references to the pic register after reload.
5413 Consider if cross-jumping unifies two sides of a branch
5414 controlled by a comparison vs the only read from a global.
5415 In which case, allow the set_got to be deleted, though we're
5416 too late to do anything about the ebx save in the prologue. */
5417 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5420 /* Prevent function calls from be scheduled before the call to mcount.
5421 In the pic_reg_used case, make sure that the got load isn't deleted. */
5422 if (current_function_profile)
5423 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5426 /* Emit code to restore saved registers using MOV insns. First register
5427 is restored from POINTER + OFFSET. */
5429 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5430 int maybe_eh_return)
5433 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5435 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5436 if (ix86_save_reg (regno, maybe_eh_return))
5438 /* Ensure that adjust_address won't be forced to produce pointer
5439 out of range allowed by x86-64 instruction set. */
5440 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5444 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5445 emit_move_insn (r11, GEN_INT (offset));
5446 emit_insn (gen_adddi3 (r11, r11, pointer));
5447 base_address = gen_rtx_MEM (Pmode, r11);
5450 emit_move_insn (gen_rtx_REG (Pmode, regno),
5451 adjust_address (base_address, Pmode, offset));
5452 offset += UNITS_PER_WORD;
5456 /* Restore function stack, frame, and registers. */
5459 ix86_expand_epilogue (int style)
5462 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5463 struct ix86_frame frame;
5464 HOST_WIDE_INT offset;
5466 ix86_compute_frame_layout (&frame);
5468 /* Calculate start of saved registers relative to ebp. Special care
5469 must be taken for the normal return case of a function using
5470 eh_return: the eax and edx registers are marked as saved, but not
5471 restored along this path. */
5472 offset = frame.nregs;
5473 if (current_function_calls_eh_return && style != 2)
5475 offset *= -UNITS_PER_WORD;
5477 /* If we're only restoring one register and sp is not valid then
5478 using a move instruction to restore the register since it's
5479 less work than reloading sp and popping the register.
5481 The default code result in stack adjustment using add/lea instruction,
5482 while this code results in LEAVE instruction (or discrete equivalent),
5483 so it is profitable in some other cases as well. Especially when there
5484 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5485 and there is exactly one register to pop. This heuristic may need some
5486 tuning in future. */
5487 if ((!sp_valid && frame.nregs <= 1)
5488 || (TARGET_EPILOGUE_USING_MOVE
5489 && cfun->machine->use_fast_prologue_epilogue
5490 && (frame.nregs > 1 || frame.to_allocate))
5491 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5492 || (frame_pointer_needed && TARGET_USE_LEAVE
5493 && cfun->machine->use_fast_prologue_epilogue
5494 && frame.nregs == 1)
5495 || current_function_calls_eh_return)
5497 /* Restore registers. We can use ebp or esp to address the memory
5498 locations. If both are available, default to ebp, since offsets
5499 are known to be small. Only exception is esp pointing directly to the
5500 end of block of saved registers, where we may simplify addressing
5503 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5504 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5505 frame.to_allocate, style == 2);
5507 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5508 offset, style == 2);
5510 /* eh_return epilogues need %ecx added to the stack pointer. */
5513 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5515 if (frame_pointer_needed)
5517 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5518 tmp = plus_constant (tmp, UNITS_PER_WORD);
5519 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5521 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5522 emit_move_insn (hard_frame_pointer_rtx, tmp);
5524 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5529 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5530 tmp = plus_constant (tmp, (frame.to_allocate
5531 + frame.nregs * UNITS_PER_WORD));
5532 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5535 else if (!frame_pointer_needed)
5536 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5537 GEN_INT (frame.to_allocate
5538 + frame.nregs * UNITS_PER_WORD),
5540 /* If not an i386, mov & pop is faster than "leave". */
5541 else if (TARGET_USE_LEAVE || optimize_size
5542 || !cfun->machine->use_fast_prologue_epilogue)
5543 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5546 pro_epilogue_adjust_stack (stack_pointer_rtx,
5547 hard_frame_pointer_rtx,
5550 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5552 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5557 /* First step is to deallocate the stack frame so that we can
5558 pop the registers. */
5561 if (!frame_pointer_needed)
5563 pro_epilogue_adjust_stack (stack_pointer_rtx,
5564 hard_frame_pointer_rtx,
5565 GEN_INT (offset), style);
5567 else if (frame.to_allocate)
5568 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5569 GEN_INT (frame.to_allocate), style);
5571 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5572 if (ix86_save_reg (regno, false))
5575 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5577 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5579 if (frame_pointer_needed)
5581 /* Leave results in shorter dependency chains on CPUs that are
5582 able to grok it fast. */
5583 if (TARGET_USE_LEAVE)
5584 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5585 else if (TARGET_64BIT)
5586 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5588 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5592 /* Sibcall epilogues don't want a return instruction. */
5596 if (current_function_pops_args && current_function_args_size)
5598 rtx popc = GEN_INT (current_function_pops_args);
5600 /* i386 can only pop 64K bytes. If asked to pop more, pop
5601 return address, do explicit add, and jump indirectly to the
5604 if (current_function_pops_args >= 65536)
5606 rtx ecx = gen_rtx_REG (SImode, 2);
5608 /* There is no "pascal" calling convention in 64bit ABI. */
5612 emit_insn (gen_popsi1 (ecx));
5613 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5614 emit_jump_insn (gen_return_indirect_internal (ecx));
5617 emit_jump_insn (gen_return_pop_internal (popc));
5620 emit_jump_insn (gen_return_internal ());
5623 /* Reset from the function's potential modifications. */
5626 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5627 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5629 if (pic_offset_table_rtx)
5630 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5633 /* Extract the parts of an RTL expression that is a valid memory address
5634 for an instruction. Return 0 if the structure of the address is
5635 grossly off. Return -1 if the address contains ASHIFT, so it is not
5636 strictly valid, but still used for computing length of lea instruction. */
5639 ix86_decompose_address (rtx addr, struct ix86_address *out)
5641 rtx base = NULL_RTX;
5642 rtx index = NULL_RTX;
5643 rtx disp = NULL_RTX;
5644 HOST_WIDE_INT scale = 1;
5645 rtx scale_rtx = NULL_RTX;
5647 enum ix86_address_seg seg = SEG_DEFAULT;
5649 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5651 else if (GET_CODE (addr) == PLUS)
5661 addends[n++] = XEXP (op, 1);
5664 while (GET_CODE (op) == PLUS);
5669 for (i = n; i >= 0; --i)
5672 switch (GET_CODE (op))
5677 index = XEXP (op, 0);
5678 scale_rtx = XEXP (op, 1);
5682 if (XINT (op, 1) == UNSPEC_TP
5683 && TARGET_TLS_DIRECT_SEG_REFS
5684 && seg == SEG_DEFAULT)
5685 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5714 else if (GET_CODE (addr) == MULT)
5716 index = XEXP (addr, 0); /* index*scale */
5717 scale_rtx = XEXP (addr, 1);
5719 else if (GET_CODE (addr) == ASHIFT)
5723 /* We're called for lea too, which implements ashift on occasion. */
5724 index = XEXP (addr, 0);
5725 tmp = XEXP (addr, 1);
5726 if (GET_CODE (tmp) != CONST_INT)
5728 scale = INTVAL (tmp);
5729 if ((unsigned HOST_WIDE_INT) scale > 3)
5735 disp = addr; /* displacement */
5737 /* Extract the integral value of scale. */
5740 if (GET_CODE (scale_rtx) != CONST_INT)
5742 scale = INTVAL (scale_rtx);
5745 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5746 if (base && index && scale == 1
5747 && (index == arg_pointer_rtx
5748 || index == frame_pointer_rtx
5749 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5756 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5757 if ((base == hard_frame_pointer_rtx
5758 || base == frame_pointer_rtx
5759 || base == arg_pointer_rtx) && !disp)
5762 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5763 Avoid this by transforming to [%esi+0]. */
5764 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5765 && base && !index && !disp
5767 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5770 /* Special case: encode reg+reg instead of reg*2. */
5771 if (!base && index && scale && scale == 2)
5772 base = index, scale = 1;
5774 /* Special case: scaling cannot be encoded without base or displacement. */
5775 if (!base && !disp && index && scale != 1)
5787 /* Return cost of the memory address x.
5788 For i386, it is better to use a complex address than let gcc copy
5789 the address into a reg and make a new pseudo. But not if the address
5790 requires to two regs - that would mean more pseudos with longer
5793 ix86_address_cost (rtx x)
5795 struct ix86_address parts;
5798 if (!ix86_decompose_address (x, &parts))
5801 /* More complex memory references are better. */
5802 if (parts.disp && parts.disp != const0_rtx)
5804 if (parts.seg != SEG_DEFAULT)
5807 /* Attempt to minimize number of registers in the address. */
5809 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5811 && (!REG_P (parts.index)
5812 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5816 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5818 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5819 && parts.base != parts.index)
5822 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5823 since it's predecode logic can't detect the length of instructions
5824 and it degenerates to vector decoded. Increase cost of such
5825 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5826 to split such addresses or even refuse such addresses at all.
5828 Following addressing modes are affected:
5833 The first and last case may be avoidable by explicitly coding the zero in
5834 memory address, but I don't have AMD-K6 machine handy to check this
5838 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5839 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5840 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5846 /* If X is a machine specific address (i.e. a symbol or label being
5847 referenced as a displacement from the GOT implemented using an
5848 UNSPEC), then return the base term. Otherwise return X. */
5851 ix86_find_base_term (rtx x)
5857 if (GET_CODE (x) != CONST)
5860 if (GET_CODE (term) == PLUS
5861 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5862 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5863 term = XEXP (term, 0);
5864 if (GET_CODE (term) != UNSPEC
5865 || XINT (term, 1) != UNSPEC_GOTPCREL)
5868 term = XVECEXP (term, 0, 0);
5870 if (GET_CODE (term) != SYMBOL_REF
5871 && GET_CODE (term) != LABEL_REF)
5877 term = ix86_delegitimize_address (x);
5879 if (GET_CODE (term) != SYMBOL_REF
5880 && GET_CODE (term) != LABEL_REF)
5886 /* Determine if a given RTX is a valid constant. We already know this
5887 satisfies CONSTANT_P. */
5890 legitimate_constant_p (rtx x)
5894 switch (GET_CODE (x))
5897 /* TLS symbols are not constant. */
5898 if (tls_symbolic_operand (x, Pmode))
5903 inner = XEXP (x, 0);
5905 /* Offsets of TLS symbols are never valid.
5906 Discourage CSE from creating them. */
5907 if (GET_CODE (inner) == PLUS
5908 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5911 if (GET_CODE (inner) == PLUS
5912 || GET_CODE (inner) == MINUS)
5914 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5916 inner = XEXP (inner, 0);
5919 /* Only some unspecs are valid as "constants". */
5920 if (GET_CODE (inner) == UNSPEC)
5921 switch (XINT (inner, 1))
5925 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5927 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5937 /* Otherwise we handle everything else in the move patterns. */
5941 /* Determine if it's legal to put X into the constant pool. This
5942 is not possible for the address of thread-local symbols, which
5943 is checked above. */
5946 ix86_cannot_force_const_mem (rtx x)
5948 return !legitimate_constant_p (x);
5951 /* Determine if a given RTX is a valid constant address. */
5954 constant_address_p (rtx x)
5956 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5959 /* Nonzero if the constant value X is a legitimate general operand
5960 when generating PIC code. It is given that flag_pic is on and
5961 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5964 legitimate_pic_operand_p (rtx x)
5968 switch (GET_CODE (x))
5971 inner = XEXP (x, 0);
5973 /* Only some unspecs are valid as "constants". */
5974 if (GET_CODE (inner) == UNSPEC)
5975 switch (XINT (inner, 1))
5978 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5986 return legitimate_pic_address_disp_p (x);
5993 /* Determine if a given CONST RTX is a valid memory displacement
5997 legitimate_pic_address_disp_p (rtx disp)
6001 /* In 64bit mode we can allow direct addresses of symbols and labels
6002 when they are not dynamic symbols. */
6005 /* TLS references should always be enclosed in UNSPEC. */
6006 if (tls_symbolic_operand (disp, GET_MODE (disp)))
6008 if (GET_CODE (disp) == SYMBOL_REF
6009 && ix86_cmodel == CM_SMALL_PIC
6010 && SYMBOL_REF_LOCAL_P (disp))
6012 if (GET_CODE (disp) == LABEL_REF)
6014 if (GET_CODE (disp) == CONST
6015 && GET_CODE (XEXP (disp, 0)) == PLUS)
6017 rtx op0 = XEXP (XEXP (disp, 0), 0);
6018 rtx op1 = XEXP (XEXP (disp, 0), 1);
6020 /* TLS references should always be enclosed in UNSPEC. */
6021 if (tls_symbolic_operand (op0, GET_MODE (op0)))
6023 if (((GET_CODE (op0) == SYMBOL_REF
6024 && ix86_cmodel == CM_SMALL_PIC
6025 && SYMBOL_REF_LOCAL_P (op0))
6026 || GET_CODE (op0) == LABEL_REF)
6027 && GET_CODE (op1) == CONST_INT
6028 && INTVAL (op1) < 16*1024*1024
6029 && INTVAL (op1) >= -16*1024*1024)
6033 if (GET_CODE (disp) != CONST)
6035 disp = XEXP (disp, 0);
6039 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6040 of GOT tables. We should not need these anyway. */
6041 if (GET_CODE (disp) != UNSPEC
6042 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6045 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6046 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6052 if (GET_CODE (disp) == PLUS)
6054 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6056 disp = XEXP (disp, 0);
6060 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6061 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6063 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6064 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6065 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6067 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6068 if (! strcmp (sym_name, "<pic base>"))
6073 if (GET_CODE (disp) != UNSPEC)
6076 switch (XINT (disp, 1))
6081 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6083 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6084 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6085 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6087 case UNSPEC_GOTTPOFF:
6088 case UNSPEC_GOTNTPOFF:
6089 case UNSPEC_INDNTPOFF:
6092 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6094 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6096 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6102 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6103 memory address for an instruction. The MODE argument is the machine mode
6104 for the MEM expression that wants to use this address.
6106 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6107 convert common non-canonical forms to canonical form so that they will
6111 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6113 struct ix86_address parts;
6114 rtx base, index, disp;
6115 HOST_WIDE_INT scale;
6116 const char *reason = NULL;
6117 rtx reason_rtx = NULL_RTX;
6119 if (TARGET_DEBUG_ADDR)
6122 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6123 GET_MODE_NAME (mode), strict);
6127 if (ix86_decompose_address (addr, &parts) <= 0)
6129 reason = "decomposition failed";
6134 index = parts.index;
6136 scale = parts.scale;
6138 /* Validate base register.
6140 Don't allow SUBREG's here, it can lead to spill failures when the base
6141 is one word out of a two word structure, which is represented internally
6148 if (GET_CODE (base) != REG)
6150 reason = "base is not a register";
6154 if (GET_MODE (base) != Pmode)
6156 reason = "base is not in Pmode";
6160 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6161 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6163 reason = "base is not valid";
6168 /* Validate index register.
6170 Don't allow SUBREG's here, it can lead to spill failures when the index
6171 is one word out of a two word structure, which is represented internally
6178 if (GET_CODE (index) != REG)
6180 reason = "index is not a register";
6184 if (GET_MODE (index) != Pmode)
6186 reason = "index is not in Pmode";
6190 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6191 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6193 reason = "index is not valid";
6198 /* Validate scale factor. */
6201 reason_rtx = GEN_INT (scale);
6204 reason = "scale without index";
6208 if (scale != 2 && scale != 4 && scale != 8)
6210 reason = "scale is not a valid multiplier";
6215 /* Validate displacement. */
6220 if (GET_CODE (disp) == CONST
6221 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6222 switch (XINT (XEXP (disp, 0), 1))
6226 case UNSPEC_GOTPCREL:
6229 goto is_legitimate_pic;
6231 case UNSPEC_GOTTPOFF:
6232 case UNSPEC_GOTNTPOFF:
6233 case UNSPEC_INDNTPOFF:
6239 reason = "invalid address unspec";
6243 else if (flag_pic && (SYMBOLIC_CONST (disp)
6245 && !machopic_operand_p (disp)
6250 if (TARGET_64BIT && (index || base))
6252 /* foo@dtpoff(%rX) is ok. */
6253 if (GET_CODE (disp) != CONST
6254 || GET_CODE (XEXP (disp, 0)) != PLUS
6255 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6256 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6257 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6258 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6260 reason = "non-constant pic memory reference";
6264 else if (! legitimate_pic_address_disp_p (disp))
6266 reason = "displacement is an invalid pic construct";
6270 /* This code used to verify that a symbolic pic displacement
6271 includes the pic_offset_table_rtx register.
6273 While this is good idea, unfortunately these constructs may
6274 be created by "adds using lea" optimization for incorrect
6283 This code is nonsensical, but results in addressing
6284 GOT table with pic_offset_table_rtx base. We can't
6285 just refuse it easily, since it gets matched by
6286 "addsi3" pattern, that later gets split to lea in the
6287 case output register differs from input. While this
6288 can be handled by separate addsi pattern for this case
6289 that never results in lea, this seems to be easier and
6290 correct fix for crash to disable this test. */
6292 else if (GET_CODE (disp) != LABEL_REF
6293 && GET_CODE (disp) != CONST_INT
6294 && (GET_CODE (disp) != CONST
6295 || !legitimate_constant_p (disp))
6296 && (GET_CODE (disp) != SYMBOL_REF
6297 || !legitimate_constant_p (disp)))
6299 reason = "displacement is not constant";
6302 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6304 reason = "displacement is out of range";
6309 /* Everything looks valid. */
6310 if (TARGET_DEBUG_ADDR)
6311 fprintf (stderr, "Success.\n");
6315 if (TARGET_DEBUG_ADDR)
6317 fprintf (stderr, "Error: %s\n", reason);
6318 debug_rtx (reason_rtx);
6323 /* Return an unique alias set for the GOT. */
6325 static HOST_WIDE_INT
6326 ix86_GOT_alias_set (void)
6328 static HOST_WIDE_INT set = -1;
6330 set = new_alias_set ();
6334 /* Return a legitimate reference for ORIG (an address) using the
6335 register REG. If REG is 0, a new pseudo is generated.
6337 There are two types of references that must be handled:
6339 1. Global data references must load the address from the GOT, via
6340 the PIC reg. An insn is emitted to do this load, and the reg is
6343 2. Static data references, constant pool addresses, and code labels
6344 compute the address as an offset from the GOT, whose base is in
6345 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6346 differentiate them from global data objects. The returned
6347 address is the PIC reg + an unspec constant.
6349 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6350 reg also appears in the address. */
6353 legitimize_pic_address (rtx orig, rtx reg)
6361 reg = gen_reg_rtx (Pmode);
6362 /* Use the generic Mach-O PIC machinery. */
6363 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6366 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6368 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6370 /* This symbol may be referenced via a displacement from the PIC
6371 base address (@GOTOFF). */
6373 if (reload_in_progress)
6374 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6375 if (GET_CODE (addr) == CONST)
6376 addr = XEXP (addr, 0);
6377 if (GET_CODE (addr) == PLUS)
6379 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6380 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6383 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6384 new = gen_rtx_CONST (Pmode, new);
6385 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6389 emit_move_insn (reg, new);
6393 else if (GET_CODE (addr) == SYMBOL_REF)
6397 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6398 new = gen_rtx_CONST (Pmode, new);
6399 new = gen_rtx_MEM (Pmode, new);
6400 RTX_UNCHANGING_P (new) = 1;
6401 set_mem_alias_set (new, ix86_GOT_alias_set ());
6404 reg = gen_reg_rtx (Pmode);
6405 /* Use directly gen_movsi, otherwise the address is loaded
6406 into register for CSE. We don't want to CSE this addresses,
6407 instead we CSE addresses from the GOT table, so skip this. */
6408 emit_insn (gen_movsi (reg, new));
6413 /* This symbol must be referenced via a load from the
6414 Global Offset Table (@GOT). */
6416 if (reload_in_progress)
6417 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6418 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6419 new = gen_rtx_CONST (Pmode, new);
6420 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6421 new = gen_rtx_MEM (Pmode, new);
6422 RTX_UNCHANGING_P (new) = 1;
6423 set_mem_alias_set (new, ix86_GOT_alias_set ());
6426 reg = gen_reg_rtx (Pmode);
6427 emit_move_insn (reg, new);
6433 if (GET_CODE (addr) == CONST)
6435 addr = XEXP (addr, 0);
6437 /* We must match stuff we generate before. Assume the only
6438 unspecs that can get here are ours. Not that we could do
6439 anything with them anyway.... */
6440 if (GET_CODE (addr) == UNSPEC
6441 || (GET_CODE (addr) == PLUS
6442 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6444 if (GET_CODE (addr) != PLUS)
6447 if (GET_CODE (addr) == PLUS)
6449 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6451 /* Check first to see if this is a constant offset from a @GOTOFF
6452 symbol reference. */
6453 if (local_symbolic_operand (op0, Pmode)
6454 && GET_CODE (op1) == CONST_INT)
6458 if (reload_in_progress)
6459 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6460 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6462 new = gen_rtx_PLUS (Pmode, new, op1);
6463 new = gen_rtx_CONST (Pmode, new);
6464 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6468 emit_move_insn (reg, new);
6474 if (INTVAL (op1) < -16*1024*1024
6475 || INTVAL (op1) >= 16*1024*1024)
6476 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6481 base = legitimize_pic_address (XEXP (addr, 0), reg);
6482 new = legitimize_pic_address (XEXP (addr, 1),
6483 base == reg ? NULL_RTX : reg);
6485 if (GET_CODE (new) == CONST_INT)
6486 new = plus_constant (base, INTVAL (new));
6489 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6491 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6492 new = XEXP (new, 1);
6494 new = gen_rtx_PLUS (Pmode, base, new);
6502 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6505 get_thread_pointer (int to_reg)
6509 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6513 reg = gen_reg_rtx (Pmode);
6514 insn = gen_rtx_SET (VOIDmode, reg, tp);
6515 insn = emit_insn (insn);
6520 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6521 false if we expect this to be used for a memory address and true if
6522 we expect to load the address into a register. */
6525 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6527 rtx dest, base, off, pic;
6532 case TLS_MODEL_GLOBAL_DYNAMIC:
6533 dest = gen_reg_rtx (Pmode);
6536 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6539 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6540 insns = get_insns ();
6543 emit_libcall_block (insns, dest, rax, x);
6546 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6549 case TLS_MODEL_LOCAL_DYNAMIC:
6550 base = gen_reg_rtx (Pmode);
6553 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6556 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6557 insns = get_insns ();
6560 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6561 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6562 emit_libcall_block (insns, base, rax, note);
6565 emit_insn (gen_tls_local_dynamic_base_32 (base));
6567 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6568 off = gen_rtx_CONST (Pmode, off);
6570 return gen_rtx_PLUS (Pmode, base, off);
6572 case TLS_MODEL_INITIAL_EXEC:
6576 type = UNSPEC_GOTNTPOFF;
6580 if (reload_in_progress)
6581 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6582 pic = pic_offset_table_rtx;
6583 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6585 else if (!TARGET_GNU_TLS)
6587 pic = gen_reg_rtx (Pmode);
6588 emit_insn (gen_set_got (pic));
6589 type = UNSPEC_GOTTPOFF;
6594 type = UNSPEC_INDNTPOFF;
6597 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6598 off = gen_rtx_CONST (Pmode, off);
6600 off = gen_rtx_PLUS (Pmode, pic, off);
6601 off = gen_rtx_MEM (Pmode, off);
6602 RTX_UNCHANGING_P (off) = 1;
6603 set_mem_alias_set (off, ix86_GOT_alias_set ());
6605 if (TARGET_64BIT || TARGET_GNU_TLS)
6607 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6608 off = force_reg (Pmode, off);
6609 return gen_rtx_PLUS (Pmode, base, off);
6613 base = get_thread_pointer (true);
6614 dest = gen_reg_rtx (Pmode);
6615 emit_insn (gen_subsi3 (dest, base, off));
6619 case TLS_MODEL_LOCAL_EXEC:
6620 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6621 (TARGET_64BIT || TARGET_GNU_TLS)
6622 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6623 off = gen_rtx_CONST (Pmode, off);
6625 if (TARGET_64BIT || TARGET_GNU_TLS)
6627 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6628 return gen_rtx_PLUS (Pmode, base, off);
6632 base = get_thread_pointer (true);
6633 dest = gen_reg_rtx (Pmode);
6634 emit_insn (gen_subsi3 (dest, base, off));
6645 /* Try machine-dependent ways of modifying an illegitimate address
6646 to be legitimate. If we find one, return the new, valid address.
6647 This macro is used in only one place: `memory_address' in explow.c.
6649 OLDX is the address as it was before break_out_memory_refs was called.
6650 In some cases it is useful to look at this to decide what needs to be done.
6652 MODE and WIN are passed so that this macro can use
6653 GO_IF_LEGITIMATE_ADDRESS.
6655 It is always safe for this macro to do nothing. It exists to recognize
6656 opportunities to optimize the output.
6658 For the 80386, we handle X+REG by loading X into a register R and
6659 using R+REG. R will go in a general reg and indexing will be used.
6660 However, if REG is a broken-out memory address or multiplication,
6661 nothing needs to be done because REG can certainly go in a general reg.
6663 When -fpic is used, special handling is needed for symbolic references.
6664 See comments by legitimize_pic_address in i386.c for details. */
6667 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6672 if (TARGET_DEBUG_ADDR)
6674 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6675 GET_MODE_NAME (mode));
6679 log = tls_symbolic_operand (x, mode);
6681 return legitimize_tls_address (x, log, false);
6682 if (GET_CODE (x) == CONST
6683 && GET_CODE (XEXP (x, 0)) == PLUS
6684 && (log = tls_symbolic_operand (XEXP (XEXP (x, 0), 0), Pmode)))
6686 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6687 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6690 if (flag_pic && SYMBOLIC_CONST (x))
6691 return legitimize_pic_address (x, 0);
6693 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6694 if (GET_CODE (x) == ASHIFT
6695 && GET_CODE (XEXP (x, 1)) == CONST_INT
6696 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6699 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6700 GEN_INT (1 << log));
6703 if (GET_CODE (x) == PLUS)
6705 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6707 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6708 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6709 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6712 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6713 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6714 GEN_INT (1 << log));
6717 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6718 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6719 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6722 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6723 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6724 GEN_INT (1 << log));
6727 /* Put multiply first if it isn't already. */
6728 if (GET_CODE (XEXP (x, 1)) == MULT)
6730 rtx tmp = XEXP (x, 0);
6731 XEXP (x, 0) = XEXP (x, 1);
6736 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6737 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6738 created by virtual register instantiation, register elimination, and
6739 similar optimizations. */
6740 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6743 x = gen_rtx_PLUS (Pmode,
6744 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6745 XEXP (XEXP (x, 1), 0)),
6746 XEXP (XEXP (x, 1), 1));
6750 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6751 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6752 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6753 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6754 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6755 && CONSTANT_P (XEXP (x, 1)))
6758 rtx other = NULL_RTX;
6760 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6762 constant = XEXP (x, 1);
6763 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6765 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6767 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6768 other = XEXP (x, 1);
6776 x = gen_rtx_PLUS (Pmode,
6777 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6778 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6779 plus_constant (other, INTVAL (constant)));
6783 if (changed && legitimate_address_p (mode, x, FALSE))
6786 if (GET_CODE (XEXP (x, 0)) == MULT)
6789 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6792 if (GET_CODE (XEXP (x, 1)) == MULT)
6795 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6799 && GET_CODE (XEXP (x, 1)) == REG
6800 && GET_CODE (XEXP (x, 0)) == REG)
6803 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6806 x = legitimize_pic_address (x, 0);
6809 if (changed && legitimate_address_p (mode, x, FALSE))
6812 if (GET_CODE (XEXP (x, 0)) == REG)
6814 rtx temp = gen_reg_rtx (Pmode);
6815 rtx val = force_operand (XEXP (x, 1), temp);
6817 emit_move_insn (temp, val);
6823 else if (GET_CODE (XEXP (x, 1)) == REG)
6825 rtx temp = gen_reg_rtx (Pmode);
6826 rtx val = force_operand (XEXP (x, 0), temp);
6828 emit_move_insn (temp, val);
6838 /* Print an integer constant expression in assembler syntax. Addition
6839 and subtraction are the only arithmetic that may appear in these
6840 expressions. FILE is the stdio stream to write to, X is the rtx, and
6841 CODE is the operand print code from the output string. */
6844 output_pic_addr_const (FILE *file, rtx x, int code)
6848 switch (GET_CODE (x))
6858 /* Mark the decl as referenced so that cgraph will output the function. */
6859 if (SYMBOL_REF_DECL (x))
6860 mark_decl_referenced (SYMBOL_REF_DECL (x));
6862 assemble_name (file, XSTR (x, 0));
6863 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6864 fputs ("@PLT", file);
6871 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6872 assemble_name (asm_out_file, buf);
6876 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6880 /* This used to output parentheses around the expression,
6881 but that does not work on the 386 (either ATT or BSD assembler). */
6882 output_pic_addr_const (file, XEXP (x, 0), code);
6886 if (GET_MODE (x) == VOIDmode)
6888 /* We can use %d if the number is <32 bits and positive. */
6889 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6890 fprintf (file, "0x%lx%08lx",
6891 (unsigned long) CONST_DOUBLE_HIGH (x),
6892 (unsigned long) CONST_DOUBLE_LOW (x));
6894 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6897 /* We can't handle floating point constants;
6898 PRINT_OPERAND must handle them. */
6899 output_operand_lossage ("floating constant misused");
6903 /* Some assemblers need integer constants to appear first. */
6904 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6906 output_pic_addr_const (file, XEXP (x, 0), code);
6908 output_pic_addr_const (file, XEXP (x, 1), code);
6910 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6912 output_pic_addr_const (file, XEXP (x, 1), code);
6914 output_pic_addr_const (file, XEXP (x, 0), code);
6922 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6923 output_pic_addr_const (file, XEXP (x, 0), code);
6925 output_pic_addr_const (file, XEXP (x, 1), code);
6927 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6931 if (XVECLEN (x, 0) != 1)
6933 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6934 switch (XINT (x, 1))
6937 fputs ("@GOT", file);
6940 fputs ("@GOTOFF", file);
6942 case UNSPEC_GOTPCREL:
6943 fputs ("@GOTPCREL(%rip)", file);
6945 case UNSPEC_GOTTPOFF:
6946 /* FIXME: This might be @TPOFF in Sun ld too. */
6947 fputs ("@GOTTPOFF", file);
6950 fputs ("@TPOFF", file);
6954 fputs ("@TPOFF", file);
6956 fputs ("@NTPOFF", file);
6959 fputs ("@DTPOFF", file);
6961 case UNSPEC_GOTNTPOFF:
6963 fputs ("@GOTTPOFF(%rip)", file);
6965 fputs ("@GOTNTPOFF", file);
6967 case UNSPEC_INDNTPOFF:
6968 fputs ("@INDNTPOFF", file);
6971 output_operand_lossage ("invalid UNSPEC as operand");
6977 output_operand_lossage ("invalid expression as operand");
6981 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6982 We need to handle our special PIC relocations. */
6985 i386_dwarf_output_addr_const (FILE *file, rtx x)
6988 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6992 fprintf (file, "%s", ASM_LONG);
6995 output_pic_addr_const (file, x, '\0');
6997 output_addr_const (file, x);
7001 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
7002 We need to emit DTP-relative relocations. */
7005 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7007 fputs (ASM_LONG, file);
7008 output_addr_const (file, x);
7009 fputs ("@DTPOFF", file);
7015 fputs (", 0", file);
7022 /* In the name of slightly smaller debug output, and to cater to
7023 general assembler losage, recognize PIC+GOTOFF and turn it back
7024 into a direct symbol reference. */
7027 ix86_delegitimize_address (rtx orig_x)
7031 if (GET_CODE (x) == MEM)
7036 if (GET_CODE (x) != CONST
7037 || GET_CODE (XEXP (x, 0)) != UNSPEC
7038 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7039 || GET_CODE (orig_x) != MEM)
7041 return XVECEXP (XEXP (x, 0), 0, 0);
7044 if (GET_CODE (x) != PLUS
7045 || GET_CODE (XEXP (x, 1)) != CONST)
7048 if (GET_CODE (XEXP (x, 0)) == REG
7049 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7050 /* %ebx + GOT/GOTOFF */
7052 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7054 /* %ebx + %reg * scale + GOT/GOTOFF */
7056 if (GET_CODE (XEXP (y, 0)) == REG
7057 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7059 else if (GET_CODE (XEXP (y, 1)) == REG
7060 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7064 if (GET_CODE (y) != REG
7065 && GET_CODE (y) != MULT
7066 && GET_CODE (y) != ASHIFT)
7072 x = XEXP (XEXP (x, 1), 0);
7073 if (GET_CODE (x) == UNSPEC
7074 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7075 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7078 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7079 return XVECEXP (x, 0, 0);
7082 if (GET_CODE (x) == PLUS
7083 && GET_CODE (XEXP (x, 0)) == UNSPEC
7084 && GET_CODE (XEXP (x, 1)) == CONST_INT
7085 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7086 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7087 && GET_CODE (orig_x) != MEM)))
7089 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7091 return gen_rtx_PLUS (Pmode, y, x);
7099 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7104 if (mode == CCFPmode || mode == CCFPUmode)
7106 enum rtx_code second_code, bypass_code;
7107 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7108 if (bypass_code != NIL || second_code != NIL)
7110 code = ix86_fp_compare_code_to_integer (code);
7114 code = reverse_condition (code);
7125 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7130 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7131 Those same assemblers have the same but opposite losage on cmov. */
7134 suffix = fp ? "nbe" : "a";
7137 if (mode == CCNOmode || mode == CCGOCmode)
7139 else if (mode == CCmode || mode == CCGCmode)
7150 if (mode == CCNOmode || mode == CCGOCmode)
7152 else if (mode == CCmode || mode == CCGCmode)
7161 suffix = fp ? "nb" : "ae";
7164 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7174 suffix = fp ? "u" : "p";
7177 suffix = fp ? "nu" : "np";
7182 fputs (suffix, file);
7185 /* Print the name of register X to FILE based on its machine mode and number.
7186 If CODE is 'w', pretend the mode is HImode.
7187 If CODE is 'b', pretend the mode is QImode.
7188 If CODE is 'k', pretend the mode is SImode.
7189 If CODE is 'q', pretend the mode is DImode.
7190 If CODE is 'h', pretend the reg is the `high' byte register.
7191 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7194 print_reg (rtx x, int code, FILE *file)
7196 if (REGNO (x) == ARG_POINTER_REGNUM
7197 || REGNO (x) == FRAME_POINTER_REGNUM
7198 || REGNO (x) == FLAGS_REG
7199 || REGNO (x) == FPSR_REG)
7202 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7205 if (code == 'w' || MMX_REG_P (x))
7207 else if (code == 'b')
7209 else if (code == 'k')
7211 else if (code == 'q')
7213 else if (code == 'y')
7215 else if (code == 'h')
7218 code = GET_MODE_SIZE (GET_MODE (x));
7220 /* Irritatingly, AMD extended registers use different naming convention
7221 from the normal registers. */
7222 if (REX_INT_REG_P (x))
7229 error ("extended registers have no high halves");
7232 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7235 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7238 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7241 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7244 error ("unsupported operand size for extended register");
7252 if (STACK_TOP_P (x))
7254 fputs ("st(0)", file);
7261 if (! ANY_FP_REG_P (x))
7262 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7267 fputs (hi_reg_name[REGNO (x)], file);
7270 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7272 fputs (qi_reg_name[REGNO (x)], file);
7275 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7277 fputs (qi_high_reg_name[REGNO (x)], file);
7284 /* Locate some local-dynamic symbol still in use by this function
7285 so that we can print its name in some tls_local_dynamic_base
7289 get_some_local_dynamic_name (void)
7293 if (cfun->machine->some_ld_name)
7294 return cfun->machine->some_ld_name;
7296 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7298 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7299 return cfun->machine->some_ld_name;
7305 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7309 if (GET_CODE (x) == SYMBOL_REF
7310 && local_dynamic_symbolic_operand (x, Pmode))
7312 cfun->machine->some_ld_name = XSTR (x, 0);
7320 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7321 C -- print opcode suffix for set/cmov insn.
7322 c -- like C, but print reversed condition
7323 F,f -- likewise, but for floating-point.
7324 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7326 R -- print the prefix for register names.
7327 z -- print the opcode suffix for the size of the current operand.
7328 * -- print a star (in certain assembler syntax)
7329 A -- print an absolute memory reference.
7330 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7331 s -- print a shift double count, followed by the assemblers argument
7333 b -- print the QImode name of the register for the indicated operand.
7334 %b0 would print %al if operands[0] is reg 0.
7335 w -- likewise, print the HImode name of the register.
7336 k -- likewise, print the SImode name of the register.
7337 q -- likewise, print the DImode name of the register.
7338 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7339 y -- print "st(0)" instead of "st" as a register.
7340 D -- print condition for SSE cmp instruction.
7341 P -- if PIC, print an @PLT suffix.
7342 X -- don't print any sort of PIC '@' suffix for a symbol.
7343 & -- print some in-use local-dynamic symbol name.
7347 print_operand (FILE *file, rtx x, int code)
7354 if (ASSEMBLER_DIALECT == ASM_ATT)
7359 assemble_name (file, get_some_local_dynamic_name ());
7363 if (ASSEMBLER_DIALECT == ASM_ATT)
7365 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7367 /* Intel syntax. For absolute addresses, registers should not
7368 be surrounded by braces. */
7369 if (GET_CODE (x) != REG)
7372 PRINT_OPERAND (file, x, 0);
7380 PRINT_OPERAND (file, x, 0);
7385 if (ASSEMBLER_DIALECT == ASM_ATT)
7390 if (ASSEMBLER_DIALECT == ASM_ATT)
7395 if (ASSEMBLER_DIALECT == ASM_ATT)
7400 if (ASSEMBLER_DIALECT == ASM_ATT)
7405 if (ASSEMBLER_DIALECT == ASM_ATT)
7410 if (ASSEMBLER_DIALECT == ASM_ATT)
7415 /* 387 opcodes don't get size suffixes if the operands are
7417 if (STACK_REG_P (x))
7420 /* Likewise if using Intel opcodes. */
7421 if (ASSEMBLER_DIALECT == ASM_INTEL)
7424 /* This is the size of op from size of operand. */
7425 switch (GET_MODE_SIZE (GET_MODE (x)))
7428 #ifdef HAVE_GAS_FILDS_FISTS
7434 if (GET_MODE (x) == SFmode)
7449 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7451 #ifdef GAS_MNEMONICS
7477 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7479 PRINT_OPERAND (file, x, 0);
7485 /* Little bit of braindamage here. The SSE compare instructions
7486 does use completely different names for the comparisons that the
7487 fp conditional moves. */
7488 switch (GET_CODE (x))
7503 fputs ("unord", file);
7507 fputs ("neq", file);
7511 fputs ("nlt", file);
7515 fputs ("nle", file);
7518 fputs ("ord", file);
7526 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7527 if (ASSEMBLER_DIALECT == ASM_ATT)
7529 switch (GET_MODE (x))
7531 case HImode: putc ('w', file); break;
7533 case SFmode: putc ('l', file); break;
7535 case DFmode: putc ('q', file); break;
7543 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7546 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7547 if (ASSEMBLER_DIALECT == ASM_ATT)
7550 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7553 /* Like above, but reverse condition */
7555 /* Check to see if argument to %c is really a constant
7556 and not a condition code which needs to be reversed. */
7557 if (!COMPARISON_P (x))
7559 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7562 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7565 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7566 if (ASSEMBLER_DIALECT == ASM_ATT)
7569 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7575 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7578 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7581 int pred_val = INTVAL (XEXP (x, 0));
7583 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7584 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7586 int taken = pred_val > REG_BR_PROB_BASE / 2;
7587 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7589 /* Emit hints only in the case default branch prediction
7590 heuristics would fail. */
7591 if (taken != cputaken)
7593 /* We use 3e (DS) prefix for taken branches and
7594 2e (CS) prefix for not taken branches. */
7596 fputs ("ds ; ", file);
7598 fputs ("cs ; ", file);
7605 output_operand_lossage ("invalid operand code `%c'", code);
7609 if (GET_CODE (x) == REG)
7610 print_reg (x, code, file);
7612 else if (GET_CODE (x) == MEM)
7614 /* No `byte ptr' prefix for call instructions. */
7615 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7618 switch (GET_MODE_SIZE (GET_MODE (x)))
7620 case 1: size = "BYTE"; break;
7621 case 2: size = "WORD"; break;
7622 case 4: size = "DWORD"; break;
7623 case 8: size = "QWORD"; break;
7624 case 12: size = "XWORD"; break;
7625 case 16: size = "XMMWORD"; break;
7630 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7633 else if (code == 'w')
7635 else if (code == 'k')
7639 fputs (" PTR ", file);
7643 /* Avoid (%rip) for call operands. */
7644 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7645 && GET_CODE (x) != CONST_INT)
7646 output_addr_const (file, x);
7647 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7648 output_operand_lossage ("invalid constraints for operand");
7653 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7658 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7659 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7661 if (ASSEMBLER_DIALECT == ASM_ATT)
7663 fprintf (file, "0x%08lx", l);
7666 /* These float cases don't actually occur as immediate operands. */
7667 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7671 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7672 fprintf (file, "%s", dstr);
7675 else if (GET_CODE (x) == CONST_DOUBLE
7676 && GET_MODE (x) == XFmode)
7680 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7681 fprintf (file, "%s", dstr);
7688 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7690 if (ASSEMBLER_DIALECT == ASM_ATT)
7693 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7694 || GET_CODE (x) == LABEL_REF)
7696 if (ASSEMBLER_DIALECT == ASM_ATT)
7699 fputs ("OFFSET FLAT:", file);
7702 if (GET_CODE (x) == CONST_INT)
7703 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7705 output_pic_addr_const (file, x, code);
7707 output_addr_const (file, x);
7711 /* Print a memory operand whose address is ADDR. */
7714 print_operand_address (FILE *file, rtx addr)
7716 struct ix86_address parts;
7717 rtx base, index, disp;
7720 if (! ix86_decompose_address (addr, &parts))
7724 index = parts.index;
7726 scale = parts.scale;
7734 if (USER_LABEL_PREFIX[0] == 0)
7736 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7742 if (!base && !index)
7744 /* Displacement only requires special attention. */
7746 if (GET_CODE (disp) == CONST_INT)
7748 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7750 if (USER_LABEL_PREFIX[0] == 0)
7752 fputs ("ds:", file);
7754 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7757 output_pic_addr_const (file, disp, 0);
7759 output_addr_const (file, disp);
7761 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7763 && ((GET_CODE (disp) == SYMBOL_REF
7764 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7765 || GET_CODE (disp) == LABEL_REF
7766 || (GET_CODE (disp) == CONST
7767 && GET_CODE (XEXP (disp, 0)) == PLUS
7768 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7769 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7770 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7771 fputs ("(%rip)", file);
7775 if (ASSEMBLER_DIALECT == ASM_ATT)
7780 output_pic_addr_const (file, disp, 0);
7781 else if (GET_CODE (disp) == LABEL_REF)
7782 output_asm_label (disp);
7784 output_addr_const (file, disp);
7789 print_reg (base, 0, file);
7793 print_reg (index, 0, file);
7795 fprintf (file, ",%d", scale);
7801 rtx offset = NULL_RTX;
7805 /* Pull out the offset of a symbol; print any symbol itself. */
7806 if (GET_CODE (disp) == CONST
7807 && GET_CODE (XEXP (disp, 0)) == PLUS
7808 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7810 offset = XEXP (XEXP (disp, 0), 1);
7811 disp = gen_rtx_CONST (VOIDmode,
7812 XEXP (XEXP (disp, 0), 0));
7816 output_pic_addr_const (file, disp, 0);
7817 else if (GET_CODE (disp) == LABEL_REF)
7818 output_asm_label (disp);
7819 else if (GET_CODE (disp) == CONST_INT)
7822 output_addr_const (file, disp);
7828 print_reg (base, 0, file);
7831 if (INTVAL (offset) >= 0)
7833 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7837 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7844 print_reg (index, 0, file);
7846 fprintf (file, "*%d", scale);
7854 output_addr_const_extra (FILE *file, rtx x)
7858 if (GET_CODE (x) != UNSPEC)
7861 op = XVECEXP (x, 0, 0);
7862 switch (XINT (x, 1))
7864 case UNSPEC_GOTTPOFF:
7865 output_addr_const (file, op);
7866 /* FIXME: This might be @TPOFF in Sun ld. */
7867 fputs ("@GOTTPOFF", file);
7870 output_addr_const (file, op);
7871 fputs ("@TPOFF", file);
7874 output_addr_const (file, op);
7876 fputs ("@TPOFF", file);
7878 fputs ("@NTPOFF", file);
7881 output_addr_const (file, op);
7882 fputs ("@DTPOFF", file);
7884 case UNSPEC_GOTNTPOFF:
7885 output_addr_const (file, op);
7887 fputs ("@GOTTPOFF(%rip)", file);
7889 fputs ("@GOTNTPOFF", file);
7891 case UNSPEC_INDNTPOFF:
7892 output_addr_const (file, op);
7893 fputs ("@INDNTPOFF", file);
7903 /* Split one or more DImode RTL references into pairs of SImode
7904 references. The RTL can be REG, offsettable MEM, integer constant, or
7905 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7906 split and "num" is its length. lo_half and hi_half are output arrays
7907 that parallel "operands". */
7910 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7914 rtx op = operands[num];
7916 /* simplify_subreg refuse to split volatile memory addresses,
7917 but we still have to handle it. */
7918 if (GET_CODE (op) == MEM)
7920 lo_half[num] = adjust_address (op, SImode, 0);
7921 hi_half[num] = adjust_address (op, SImode, 4);
7925 lo_half[num] = simplify_gen_subreg (SImode, op,
7926 GET_MODE (op) == VOIDmode
7927 ? DImode : GET_MODE (op), 0);
7928 hi_half[num] = simplify_gen_subreg (SImode, op,
7929 GET_MODE (op) == VOIDmode
7930 ? DImode : GET_MODE (op), 4);
7934 /* Split one or more TImode RTL references into pairs of SImode
7935 references. The RTL can be REG, offsettable MEM, integer constant, or
7936 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7937 split and "num" is its length. lo_half and hi_half are output arrays
7938 that parallel "operands". */
7941 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7945 rtx op = operands[num];
7947 /* simplify_subreg refuse to split volatile memory addresses, but we
7948 still have to handle it. */
7949 if (GET_CODE (op) == MEM)
7951 lo_half[num] = adjust_address (op, DImode, 0);
7952 hi_half[num] = adjust_address (op, DImode, 8);
7956 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7957 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7962 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7963 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7964 is the expression of the binary operation. The output may either be
7965 emitted here, or returned to the caller, like all output_* functions.
7967 There is no guarantee that the operands are the same mode, as they
7968 might be within FLOAT or FLOAT_EXTEND expressions. */
7970 #ifndef SYSV386_COMPAT
7971 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7972 wants to fix the assemblers because that causes incompatibility
7973 with gcc. No-one wants to fix gcc because that causes
7974 incompatibility with assemblers... You can use the option of
7975 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7976 #define SYSV386_COMPAT 1
7980 output_387_binary_op (rtx insn, rtx *operands)
7982 static char buf[30];
7985 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7987 #ifdef ENABLE_CHECKING
7988 /* Even if we do not want to check the inputs, this documents input
7989 constraints. Which helps in understanding the following code. */
7990 if (STACK_REG_P (operands[0])
7991 && ((REG_P (operands[1])
7992 && REGNO (operands[0]) == REGNO (operands[1])
7993 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7994 || (REG_P (operands[2])
7995 && REGNO (operands[0]) == REGNO (operands[2])
7996 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7997 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8003 switch (GET_CODE (operands[3]))
8006 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8007 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8015 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8016 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8024 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8025 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8033 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8034 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8048 if (GET_MODE (operands[0]) == SFmode)
8049 strcat (buf, "ss\t{%2, %0|%0, %2}");
8051 strcat (buf, "sd\t{%2, %0|%0, %2}");
8056 switch (GET_CODE (operands[3]))
8060 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8062 rtx temp = operands[2];
8063 operands[2] = operands[1];
8067 /* know operands[0] == operands[1]. */
8069 if (GET_CODE (operands[2]) == MEM)
8075 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8077 if (STACK_TOP_P (operands[0]))
8078 /* How is it that we are storing to a dead operand[2]?
8079 Well, presumably operands[1] is dead too. We can't
8080 store the result to st(0) as st(0) gets popped on this
8081 instruction. Instead store to operands[2] (which I
8082 think has to be st(1)). st(1) will be popped later.
8083 gcc <= 2.8.1 didn't have this check and generated
8084 assembly code that the Unixware assembler rejected. */
8085 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8087 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8091 if (STACK_TOP_P (operands[0]))
8092 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8094 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8099 if (GET_CODE (operands[1]) == MEM)
8105 if (GET_CODE (operands[2]) == MEM)
8111 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8114 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8115 derived assemblers, confusingly reverse the direction of
8116 the operation for fsub{r} and fdiv{r} when the
8117 destination register is not st(0). The Intel assembler
8118 doesn't have this brain damage. Read !SYSV386_COMPAT to
8119 figure out what the hardware really does. */
8120 if (STACK_TOP_P (operands[0]))
8121 p = "{p\t%0, %2|rp\t%2, %0}";
8123 p = "{rp\t%2, %0|p\t%0, %2}";
8125 if (STACK_TOP_P (operands[0]))
8126 /* As above for fmul/fadd, we can't store to st(0). */
8127 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8129 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8134 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8137 if (STACK_TOP_P (operands[0]))
8138 p = "{rp\t%0, %1|p\t%1, %0}";
8140 p = "{p\t%1, %0|rp\t%0, %1}";
8142 if (STACK_TOP_P (operands[0]))
8143 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8145 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8150 if (STACK_TOP_P (operands[0]))
8152 if (STACK_TOP_P (operands[1]))
8153 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8155 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8158 else if (STACK_TOP_P (operands[1]))
8161 p = "{\t%1, %0|r\t%0, %1}";
8163 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8169 p = "{r\t%2, %0|\t%0, %2}";
8171 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8184 /* Output code to initialize control word copies used by
8185 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8186 is set to control word rounding downwards. */
8188 emit_i387_cw_initialization (rtx normal, rtx round_down)
8190 rtx reg = gen_reg_rtx (HImode);
8192 emit_insn (gen_x86_fnstcw_1 (normal));
8193 emit_move_insn (reg, normal);
8194 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8196 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8198 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8199 emit_move_insn (round_down, reg);
8202 /* Output code for INSN to convert a float to a signed int. OPERANDS
8203 are the insn operands. The output may be [HSD]Imode and the input
8204 operand may be [SDX]Fmode. */
8207 output_fix_trunc (rtx insn, rtx *operands)
8209 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8210 int dimode_p = GET_MODE (operands[0]) == DImode;
8212 /* Jump through a hoop or two for DImode, since the hardware has no
8213 non-popping instruction. We used to do this a different way, but
8214 that was somewhat fragile and broke with post-reload splitters. */
8215 if (dimode_p && !stack_top_dies)
8216 output_asm_insn ("fld\t%y1", operands);
8218 if (!STACK_TOP_P (operands[1]))
8221 if (GET_CODE (operands[0]) != MEM)
8224 output_asm_insn ("fldcw\t%3", operands);
8225 if (stack_top_dies || dimode_p)
8226 output_asm_insn ("fistp%z0\t%0", operands);
8228 output_asm_insn ("fist%z0\t%0", operands);
8229 output_asm_insn ("fldcw\t%2", operands);
8234 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8235 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8236 when fucom should be used. */
8239 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8242 rtx cmp_op0 = operands[0];
8243 rtx cmp_op1 = operands[1];
8244 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8249 cmp_op1 = operands[2];
8253 if (GET_MODE (operands[0]) == SFmode)
8255 return "ucomiss\t{%1, %0|%0, %1}";
8257 return "comiss\t{%1, %0|%0, %1}";
8260 return "ucomisd\t{%1, %0|%0, %1}";
8262 return "comisd\t{%1, %0|%0, %1}";
8265 if (! STACK_TOP_P (cmp_op0))
8268 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8270 if (STACK_REG_P (cmp_op1)
8272 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8273 && REGNO (cmp_op1) != FIRST_STACK_REG)
8275 /* If both the top of the 387 stack dies, and the other operand
8276 is also a stack register that dies, then this must be a
8277 `fcompp' float compare */
8281 /* There is no double popping fcomi variant. Fortunately,
8282 eflags is immune from the fstp's cc clobbering. */
8284 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8286 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8287 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8294 return "fucompp\n\tfnstsw\t%0";
8296 return "fcompp\n\tfnstsw\t%0";
8309 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8311 static const char * const alt[24] =
8323 "fcomi\t{%y1, %0|%0, %y1}",
8324 "fcomip\t{%y1, %0|%0, %y1}",
8325 "fucomi\t{%y1, %0|%0, %y1}",
8326 "fucomip\t{%y1, %0|%0, %y1}",
8333 "fcom%z2\t%y2\n\tfnstsw\t%0",
8334 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8335 "fucom%z2\t%y2\n\tfnstsw\t%0",
8336 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8338 "ficom%z2\t%y2\n\tfnstsw\t%0",
8339 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8347 mask = eflags_p << 3;
8348 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8349 mask |= unordered_p << 1;
8350 mask |= stack_top_dies;
8363 ix86_output_addr_vec_elt (FILE *file, int value)
8365 const char *directive = ASM_LONG;
8370 directive = ASM_QUAD;
8376 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8380 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8383 fprintf (file, "%s%s%d-%s%d\n",
8384 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8385 else if (HAVE_AS_GOTOFF_IN_DATA)
8386 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8388 else if (TARGET_MACHO)
8390 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8391 machopic_output_function_base_name (file);
8392 fprintf(file, "\n");
8396 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8397 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8400 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8404 ix86_expand_clear (rtx dest)
8408 /* We play register width games, which are only valid after reload. */
8409 if (!reload_completed)
8412 /* Avoid HImode and its attendant prefix byte. */
8413 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8414 dest = gen_rtx_REG (SImode, REGNO (dest));
8416 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8418 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8419 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8421 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8422 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8428 /* X is an unchanging MEM. If it is a constant pool reference, return
8429 the constant pool rtx, else NULL. */
8432 maybe_get_pool_constant (rtx x)
8434 x = ix86_delegitimize_address (XEXP (x, 0));
8436 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8437 return get_pool_constant (x);
8443 ix86_expand_move (enum machine_mode mode, rtx operands[])
8445 int strict = (reload_in_progress || reload_completed);
8447 enum tls_model model;
8452 model = tls_symbolic_operand (op1, Pmode);
8455 op1 = legitimize_tls_address (op1, model, true);
8456 op1 = force_operand (op1, op0);
8461 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8466 rtx temp = ((reload_in_progress
8467 || ((op0 && GET_CODE (op0) == REG)
8469 ? op0 : gen_reg_rtx (Pmode));
8470 op1 = machopic_indirect_data_reference (op1, temp);
8471 op1 = machopic_legitimize_pic_address (op1, mode,
8472 temp == op1 ? 0 : temp);
8474 else if (MACHOPIC_INDIRECT)
8475 op1 = machopic_indirect_data_reference (op1, 0);
8479 if (GET_CODE (op0) == MEM)
8480 op1 = force_reg (Pmode, op1);
8482 op1 = legitimize_address (op1, op1, Pmode);
8483 #endif /* TARGET_MACHO */
8487 if (GET_CODE (op0) == MEM
8488 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8489 || !push_operand (op0, mode))
8490 && GET_CODE (op1) == MEM)
8491 op1 = force_reg (mode, op1);
8493 if (push_operand (op0, mode)
8494 && ! general_no_elim_operand (op1, mode))
8495 op1 = copy_to_mode_reg (mode, op1);
8497 /* Force large constants in 64bit compilation into register
8498 to get them CSEed. */
8499 if (TARGET_64BIT && mode == DImode
8500 && immediate_operand (op1, mode)
8501 && !x86_64_zero_extended_value (op1)
8502 && !register_operand (op0, mode)
8503 && optimize && !reload_completed && !reload_in_progress)
8504 op1 = copy_to_mode_reg (mode, op1);
8506 if (FLOAT_MODE_P (mode))
8508 /* If we are loading a floating point constant to a register,
8509 force the value to memory now, since we'll get better code
8510 out the back end. */
8514 else if (GET_CODE (op1) == CONST_DOUBLE)
8516 op1 = validize_mem (force_const_mem (mode, op1));
8517 if (!register_operand (op0, mode))
8519 rtx temp = gen_reg_rtx (mode);
8520 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8521 emit_move_insn (op0, temp);
8528 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8532 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8534 /* Force constants other than zero into memory. We do not know how
8535 the instructions used to build constants modify the upper 64 bits
8536 of the register, once we have that information we may be able
8537 to handle some of them more efficiently. */
8538 if ((reload_in_progress | reload_completed) == 0
8539 && register_operand (operands[0], mode)
8540 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8541 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8543 /* Make operand1 a register if it isn't already. */
8545 && !register_operand (operands[0], mode)
8546 && !register_operand (operands[1], mode))
8548 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8549 emit_move_insn (operands[0], temp);
8553 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8556 /* Attempt to expand a binary operator. Make the expansion closer to the
8557 actual machine, then just general_operand, which will allow 3 separate
8558 memory references (one output, two input) in a single insn. */
8561 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8564 int matching_memory;
8565 rtx src1, src2, dst, op, clob;
8571 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8572 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8573 && (rtx_equal_p (dst, src2)
8574 || immediate_operand (src1, mode)))
8581 /* If the destination is memory, and we do not have matching source
8582 operands, do things in registers. */
8583 matching_memory = 0;
8584 if (GET_CODE (dst) == MEM)
8586 if (rtx_equal_p (dst, src1))
8587 matching_memory = 1;
8588 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8589 && rtx_equal_p (dst, src2))
8590 matching_memory = 2;
8592 dst = gen_reg_rtx (mode);
8595 /* Both source operands cannot be in memory. */
8596 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8598 if (matching_memory != 2)
8599 src2 = force_reg (mode, src2);
8601 src1 = force_reg (mode, src1);
8604 /* If the operation is not commutable, source 1 cannot be a constant
8605 or non-matching memory. */
8606 if ((CONSTANT_P (src1)
8607 || (!matching_memory && GET_CODE (src1) == MEM))
8608 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8609 src1 = force_reg (mode, src1);
8611 /* If optimizing, copy to regs to improve CSE */
8612 if (optimize && ! no_new_pseudos)
8614 if (GET_CODE (dst) == MEM)
8615 dst = gen_reg_rtx (mode);
8616 if (GET_CODE (src1) == MEM)
8617 src1 = force_reg (mode, src1);
8618 if (GET_CODE (src2) == MEM)
8619 src2 = force_reg (mode, src2);
8622 /* Emit the instruction. */
8624 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8625 if (reload_in_progress)
8627 /* Reload doesn't know about the flags register, and doesn't know that
8628 it doesn't want to clobber it. We can only do this with PLUS. */
8635 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8636 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8639 /* Fix up the destination if needed. */
8640 if (dst != operands[0])
8641 emit_move_insn (operands[0], dst);
8644 /* Return TRUE or FALSE depending on whether the binary operator meets the
8645 appropriate constraints. */
8648 ix86_binary_operator_ok (enum rtx_code code,
8649 enum machine_mode mode ATTRIBUTE_UNUSED,
8652 /* Both source operands cannot be in memory. */
8653 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8655 /* If the operation is not commutable, source 1 cannot be a constant. */
8656 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8658 /* If the destination is memory, we must have a matching source operand. */
8659 if (GET_CODE (operands[0]) == MEM
8660 && ! (rtx_equal_p (operands[0], operands[1])
8661 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8662 && rtx_equal_p (operands[0], operands[2]))))
8664 /* If the operation is not commutable and the source 1 is memory, we must
8665 have a matching destination. */
8666 if (GET_CODE (operands[1]) == MEM
8667 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8668 && ! rtx_equal_p (operands[0], operands[1]))
8673 /* Attempt to expand a unary operator. Make the expansion closer to the
8674 actual machine, then just general_operand, which will allow 2 separate
8675 memory references (one output, one input) in a single insn. */
8678 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8681 int matching_memory;
8682 rtx src, dst, op, clob;
8687 /* If the destination is memory, and we do not have matching source
8688 operands, do things in registers. */
8689 matching_memory = 0;
8690 if (GET_CODE (dst) == MEM)
8692 if (rtx_equal_p (dst, src))
8693 matching_memory = 1;
8695 dst = gen_reg_rtx (mode);
8698 /* When source operand is memory, destination must match. */
8699 if (!matching_memory && GET_CODE (src) == MEM)
8700 src = force_reg (mode, src);
8702 /* If optimizing, copy to regs to improve CSE */
8703 if (optimize && ! no_new_pseudos)
8705 if (GET_CODE (dst) == MEM)
8706 dst = gen_reg_rtx (mode);
8707 if (GET_CODE (src) == MEM)
8708 src = force_reg (mode, src);
8711 /* Emit the instruction. */
8713 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8714 if (reload_in_progress || code == NOT)
8716 /* Reload doesn't know about the flags register, and doesn't know that
8717 it doesn't want to clobber it. */
8724 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8725 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8728 /* Fix up the destination if needed. */
8729 if (dst != operands[0])
8730 emit_move_insn (operands[0], dst);
8733 /* Return TRUE or FALSE depending on whether the unary operator meets the
8734 appropriate constraints. */
8737 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8738 enum machine_mode mode ATTRIBUTE_UNUSED,
8739 rtx operands[2] ATTRIBUTE_UNUSED)
8741 /* If one of operands is memory, source and destination must match. */
8742 if ((GET_CODE (operands[0]) == MEM
8743 || GET_CODE (operands[1]) == MEM)
8744 && ! rtx_equal_p (operands[0], operands[1]))
8749 /* Return TRUE or FALSE depending on whether the first SET in INSN
8750 has source and destination with matching CC modes, and that the
8751 CC mode is at least as constrained as REQ_MODE. */
8754 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8757 enum machine_mode set_mode;
8759 set = PATTERN (insn);
8760 if (GET_CODE (set) == PARALLEL)
8761 set = XVECEXP (set, 0, 0);
8762 if (GET_CODE (set) != SET)
8764 if (GET_CODE (SET_SRC (set)) != COMPARE)
8767 set_mode = GET_MODE (SET_DEST (set));
8771 if (req_mode != CCNOmode
8772 && (req_mode != CCmode
8773 || XEXP (SET_SRC (set), 1) != const0_rtx))
8777 if (req_mode == CCGCmode)
8781 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8785 if (req_mode == CCZmode)
8795 return (GET_MODE (SET_SRC (set)) == set_mode);
8798 /* Generate insn patterns to do an integer compare of OPERANDS. */
8801 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8803 enum machine_mode cmpmode;
8806 cmpmode = SELECT_CC_MODE (code, op0, op1);
8807 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8809 /* This is very simple, but making the interface the same as in the
8810 FP case makes the rest of the code easier. */
8811 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8812 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8814 /* Return the test that should be put into the flags user, i.e.
8815 the bcc, scc, or cmov instruction. */
8816 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8819 /* Figure out whether to use ordered or unordered fp comparisons.
8820 Return the appropriate mode to use. */
8823 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8825 /* ??? In order to make all comparisons reversible, we do all comparisons
8826 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8827 all forms trapping and nontrapping comparisons, we can make inequality
8828 comparisons trapping again, since it results in better code when using
8829 FCOM based compares. */
8830 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8834 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8836 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8837 return ix86_fp_compare_mode (code);
8840 /* Only zero flag is needed. */
8842 case NE: /* ZF!=0 */
8844 /* Codes needing carry flag. */
8845 case GEU: /* CF=0 */
8846 case GTU: /* CF=0 & ZF=0 */
8847 case LTU: /* CF=1 */
8848 case LEU: /* CF=1 | ZF=1 */
8850 /* Codes possibly doable only with sign flag when
8851 comparing against zero. */
8852 case GE: /* SF=OF or SF=0 */
8853 case LT: /* SF<>OF or SF=1 */
8854 if (op1 == const0_rtx)
8857 /* For other cases Carry flag is not required. */
8859 /* Codes doable only with sign flag when comparing
8860 against zero, but we miss jump instruction for it
8861 so we need to use relational tests against overflow
8862 that thus needs to be zero. */
8863 case GT: /* ZF=0 & SF=OF */
8864 case LE: /* ZF=1 | SF<>OF */
8865 if (op1 == const0_rtx)
8869 /* strcmp pattern do (use flags) and combine may ask us for proper
8878 /* Return the fixed registers used for condition codes. */
8881 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8888 /* If two condition code modes are compatible, return a condition code
8889 mode which is compatible with both. Otherwise, return
8892 static enum machine_mode
8893 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8898 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8901 if ((m1 == CCGCmode && m2 == CCGOCmode)
8902 || (m1 == CCGOCmode && m2 == CCGCmode))
8930 /* These are only compatible with themselves, which we already
8936 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8939 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8941 enum rtx_code swapped_code = swap_condition (code);
8942 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8943 || (ix86_fp_comparison_cost (swapped_code)
8944 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8947 /* Swap, force into registers, or otherwise massage the two operands
8948 to a fp comparison. The operands are updated in place; the new
8949 comparison code is returned. */
8951 static enum rtx_code
8952 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8954 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8955 rtx op0 = *pop0, op1 = *pop1;
8956 enum machine_mode op_mode = GET_MODE (op0);
8957 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8959 /* All of the unordered compare instructions only work on registers.
8960 The same is true of the XFmode compare instructions. The same is
8961 true of the fcomi compare instructions. */
8964 && (fpcmp_mode == CCFPUmode
8965 || op_mode == XFmode
8966 || ix86_use_fcomi_compare (code)))
8968 op0 = force_reg (op_mode, op0);
8969 op1 = force_reg (op_mode, op1);
8973 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8974 things around if they appear profitable, otherwise force op0
8977 if (standard_80387_constant_p (op0) == 0
8978 || (GET_CODE (op0) == MEM
8979 && ! (standard_80387_constant_p (op1) == 0
8980 || GET_CODE (op1) == MEM)))
8983 tmp = op0, op0 = op1, op1 = tmp;
8984 code = swap_condition (code);
8987 if (GET_CODE (op0) != REG)
8988 op0 = force_reg (op_mode, op0);
8990 if (CONSTANT_P (op1))
8992 if (standard_80387_constant_p (op1))
8993 op1 = force_reg (op_mode, op1);
8995 op1 = validize_mem (force_const_mem (op_mode, op1));
8999 /* Try to rearrange the comparison to make it cheaper. */
9000 if (ix86_fp_comparison_cost (code)
9001 > ix86_fp_comparison_cost (swap_condition (code))
9002 && (GET_CODE (op1) == REG || !no_new_pseudos))
9005 tmp = op0, op0 = op1, op1 = tmp;
9006 code = swap_condition (code);
9007 if (GET_CODE (op0) != REG)
9008 op0 = force_reg (op_mode, op0);
9016 /* Convert comparison codes we use to represent FP comparison to integer
9017 code that will result in proper branch. Return UNKNOWN if no such code
9019 static enum rtx_code
9020 ix86_fp_compare_code_to_integer (enum rtx_code code)
9049 /* Split comparison code CODE into comparisons we can do using branch
9050 instructions. BYPASS_CODE is comparison code for branch that will
9051 branch around FIRST_CODE and SECOND_CODE. If some of branches
9052 is not required, set value to NIL.
9053 We never require more than two branches. */
9055 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9056 enum rtx_code *first_code,
9057 enum rtx_code *second_code)
9063 /* The fcomi comparison sets flags as follows:
9073 case GT: /* GTU - CF=0 & ZF=0 */
9074 case GE: /* GEU - CF=0 */
9075 case ORDERED: /* PF=0 */
9076 case UNORDERED: /* PF=1 */
9077 case UNEQ: /* EQ - ZF=1 */
9078 case UNLT: /* LTU - CF=1 */
9079 case UNLE: /* LEU - CF=1 | ZF=1 */
9080 case LTGT: /* EQ - ZF=0 */
9082 case LT: /* LTU - CF=1 - fails on unordered */
9084 *bypass_code = UNORDERED;
9086 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9088 *bypass_code = UNORDERED;
9090 case EQ: /* EQ - ZF=1 - fails on unordered */
9092 *bypass_code = UNORDERED;
9094 case NE: /* NE - ZF=0 - fails on unordered */
9096 *second_code = UNORDERED;
9098 case UNGE: /* GEU - CF=0 - fails on unordered */
9100 *second_code = UNORDERED;
9102 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9104 *second_code = UNORDERED;
9109 if (!TARGET_IEEE_FP)
9116 /* Return cost of comparison done fcom + arithmetics operations on AX.
9117 All following functions do use number of instructions as a cost metrics.
9118 In future this should be tweaked to compute bytes for optimize_size and
9119 take into account performance of various instructions on various CPUs. */
9121 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9123 if (!TARGET_IEEE_FP)
9125 /* The cost of code output by ix86_expand_fp_compare. */
9153 /* Return cost of comparison done using fcomi operation.
9154 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9156 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9158 enum rtx_code bypass_code, first_code, second_code;
9159 /* Return arbitrarily high cost when instruction is not supported - this
9160 prevents gcc from using it. */
9163 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9164 return (bypass_code != NIL || second_code != NIL) + 2;
9167 /* Return cost of comparison done using sahf operation.
9168 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9170 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9172 enum rtx_code bypass_code, first_code, second_code;
9173 /* Return arbitrarily high cost when instruction is not preferred - this
9174 avoids gcc from using it. */
9175 if (!TARGET_USE_SAHF && !optimize_size)
9177 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9178 return (bypass_code != NIL || second_code != NIL) + 3;
9181 /* Compute cost of the comparison done using any method.
9182 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9184 ix86_fp_comparison_cost (enum rtx_code code)
9186 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9189 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9190 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9192 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9193 if (min > sahf_cost)
9195 if (min > fcomi_cost)
9200 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9203 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9204 rtx *second_test, rtx *bypass_test)
9206 enum machine_mode fpcmp_mode, intcmp_mode;
9208 int cost = ix86_fp_comparison_cost (code);
9209 enum rtx_code bypass_code, first_code, second_code;
9211 fpcmp_mode = ix86_fp_compare_mode (code);
9212 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9215 *second_test = NULL_RTX;
9217 *bypass_test = NULL_RTX;
9219 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9221 /* Do fcomi/sahf based test when profitable. */
9222 if ((bypass_code == NIL || bypass_test)
9223 && (second_code == NIL || second_test)
9224 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9228 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9229 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9235 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9236 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9238 scratch = gen_reg_rtx (HImode);
9239 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9240 emit_insn (gen_x86_sahf_1 (scratch));
9243 /* The FP codes work out to act like unsigned. */
9244 intcmp_mode = fpcmp_mode;
9246 if (bypass_code != NIL)
9247 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9248 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9250 if (second_code != NIL)
9251 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9252 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9257 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9258 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9259 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9261 scratch = gen_reg_rtx (HImode);
9262 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9264 /* In the unordered case, we have to check C2 for NaN's, which
9265 doesn't happen to work out to anything nice combination-wise.
9266 So do some bit twiddling on the value we've got in AH to come
9267 up with an appropriate set of condition codes. */
9269 intcmp_mode = CCNOmode;
9274 if (code == GT || !TARGET_IEEE_FP)
9276 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9281 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9282 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9283 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9284 intcmp_mode = CCmode;
9290 if (code == LT && TARGET_IEEE_FP)
9292 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9293 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9294 intcmp_mode = CCmode;
9299 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9305 if (code == GE || !TARGET_IEEE_FP)
9307 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9312 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9313 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9320 if (code == LE && TARGET_IEEE_FP)
9322 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9323 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9324 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9325 intcmp_mode = CCmode;
9330 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9336 if (code == EQ && TARGET_IEEE_FP)
9338 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9339 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9340 intcmp_mode = CCmode;
9345 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9352 if (code == NE && TARGET_IEEE_FP)
9354 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9355 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9361 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9367 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9371 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9380 /* Return the test that should be put into the flags user, i.e.
9381 the bcc, scc, or cmov instruction. */
9382 return gen_rtx_fmt_ee (code, VOIDmode,
9383 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9388 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9391 op0 = ix86_compare_op0;
9392 op1 = ix86_compare_op1;
9395 *second_test = NULL_RTX;
9397 *bypass_test = NULL_RTX;
9399 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9400 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9401 second_test, bypass_test);
9403 ret = ix86_expand_int_compare (code, op0, op1);
9408 /* Return true if the CODE will result in nontrivial jump sequence. */
9410 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9412 enum rtx_code bypass_code, first_code, second_code;
9415 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9416 return bypass_code != NIL || second_code != NIL;
9420 ix86_expand_branch (enum rtx_code code, rtx label)
9424 switch (GET_MODE (ix86_compare_op0))
9430 tmp = ix86_expand_compare (code, NULL, NULL);
9431 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9432 gen_rtx_LABEL_REF (VOIDmode, label),
9434 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9443 enum rtx_code bypass_code, first_code, second_code;
9445 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9448 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9450 /* Check whether we will use the natural sequence with one jump. If
9451 so, we can expand jump early. Otherwise delay expansion by
9452 creating compound insn to not confuse optimizers. */
9453 if (bypass_code == NIL && second_code == NIL
9456 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9457 gen_rtx_LABEL_REF (VOIDmode, label),
9462 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9463 ix86_compare_op0, ix86_compare_op1);
9464 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9465 gen_rtx_LABEL_REF (VOIDmode, label),
9467 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9469 use_fcomi = ix86_use_fcomi_compare (code);
9470 vec = rtvec_alloc (3 + !use_fcomi);
9471 RTVEC_ELT (vec, 0) = tmp;
9473 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9475 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9478 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9480 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9488 /* Expand DImode branch into multiple compare+branch. */
9490 rtx lo[2], hi[2], label2;
9491 enum rtx_code code1, code2, code3;
9493 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9495 tmp = ix86_compare_op0;
9496 ix86_compare_op0 = ix86_compare_op1;
9497 ix86_compare_op1 = tmp;
9498 code = swap_condition (code);
9500 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9501 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9503 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9504 avoid two branches. This costs one extra insn, so disable when
9505 optimizing for size. */
9507 if ((code == EQ || code == NE)
9509 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9514 if (hi[1] != const0_rtx)
9515 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9516 NULL_RTX, 0, OPTAB_WIDEN);
9519 if (lo[1] != const0_rtx)
9520 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9521 NULL_RTX, 0, OPTAB_WIDEN);
9523 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9524 NULL_RTX, 0, OPTAB_WIDEN);
9526 ix86_compare_op0 = tmp;
9527 ix86_compare_op1 = const0_rtx;
9528 ix86_expand_branch (code, label);
9532 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9533 op1 is a constant and the low word is zero, then we can just
9534 examine the high word. */
9536 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9539 case LT: case LTU: case GE: case GEU:
9540 ix86_compare_op0 = hi[0];
9541 ix86_compare_op1 = hi[1];
9542 ix86_expand_branch (code, label);
9548 /* Otherwise, we need two or three jumps. */
9550 label2 = gen_label_rtx ();
9553 code2 = swap_condition (code);
9554 code3 = unsigned_condition (code);
9558 case LT: case GT: case LTU: case GTU:
9561 case LE: code1 = LT; code2 = GT; break;
9562 case GE: code1 = GT; code2 = LT; break;
9563 case LEU: code1 = LTU; code2 = GTU; break;
9564 case GEU: code1 = GTU; code2 = LTU; break;
9566 case EQ: code1 = NIL; code2 = NE; break;
9567 case NE: code2 = NIL; break;
9575 * if (hi(a) < hi(b)) goto true;
9576 * if (hi(a) > hi(b)) goto false;
9577 * if (lo(a) < lo(b)) goto true;
9581 ix86_compare_op0 = hi[0];
9582 ix86_compare_op1 = hi[1];
9585 ix86_expand_branch (code1, label);
9587 ix86_expand_branch (code2, label2);
9589 ix86_compare_op0 = lo[0];
9590 ix86_compare_op1 = lo[1];
9591 ix86_expand_branch (code3, label);
9594 emit_label (label2);
9603 /* Split branch based on floating point condition. */
9605 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9606 rtx target1, rtx target2, rtx tmp)
9609 rtx label = NULL_RTX;
9611 int bypass_probability = -1, second_probability = -1, probability = -1;
9614 if (target2 != pc_rtx)
9617 code = reverse_condition_maybe_unordered (code);
9622 condition = ix86_expand_fp_compare (code, op1, op2,
9623 tmp, &second, &bypass);
9625 if (split_branch_probability >= 0)
9627 /* Distribute the probabilities across the jumps.
9628 Assume the BYPASS and SECOND to be always test
9630 probability = split_branch_probability;
9632 /* Value of 1 is low enough to make no need for probability
9633 to be updated. Later we may run some experiments and see
9634 if unordered values are more frequent in practice. */
9636 bypass_probability = 1;
9638 second_probability = 1;
9640 if (bypass != NULL_RTX)
9642 label = gen_label_rtx ();
9643 i = emit_jump_insn (gen_rtx_SET
9645 gen_rtx_IF_THEN_ELSE (VOIDmode,
9647 gen_rtx_LABEL_REF (VOIDmode,
9650 if (bypass_probability >= 0)
9652 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9653 GEN_INT (bypass_probability),
9656 i = emit_jump_insn (gen_rtx_SET
9658 gen_rtx_IF_THEN_ELSE (VOIDmode,
9659 condition, target1, target2)));
9660 if (probability >= 0)
9662 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9663 GEN_INT (probability),
9665 if (second != NULL_RTX)
9667 i = emit_jump_insn (gen_rtx_SET
9669 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9671 if (second_probability >= 0)
9673 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9674 GEN_INT (second_probability),
9677 if (label != NULL_RTX)
9682 ix86_expand_setcc (enum rtx_code code, rtx dest)
9684 rtx ret, tmp, tmpreg, equiv;
9685 rtx second_test, bypass_test;
9687 if (GET_MODE (ix86_compare_op0) == DImode
9689 return 0; /* FAIL */
9691 if (GET_MODE (dest) != QImode)
9694 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9695 PUT_MODE (ret, QImode);
9700 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9701 if (bypass_test || second_test)
9703 rtx test = second_test;
9705 rtx tmp2 = gen_reg_rtx (QImode);
9712 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9714 PUT_MODE (test, QImode);
9715 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9718 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9720 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9723 /* Attach a REG_EQUAL note describing the comparison result. */
9724 equiv = simplify_gen_relational (code, QImode,
9725 GET_MODE (ix86_compare_op0),
9726 ix86_compare_op0, ix86_compare_op1);
9727 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9729 return 1; /* DONE */
9732 /* Expand comparison setting or clearing carry flag. Return true when
9733 successful and set pop for the operation. */
9735 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9737 enum machine_mode mode =
9738 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9740 /* Do not handle DImode compares that go trought special path. Also we can't
9741 deal with FP compares yet. This is possible to add. */
9742 if ((mode == DImode && !TARGET_64BIT))
9744 if (FLOAT_MODE_P (mode))
9746 rtx second_test = NULL, bypass_test = NULL;
9747 rtx compare_op, compare_seq;
9749 /* Shortcut: following common codes never translate into carry flag compares. */
9750 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9751 || code == ORDERED || code == UNORDERED)
9754 /* These comparisons require zero flag; swap operands so they won't. */
9755 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9761 code = swap_condition (code);
9764 /* Try to expand the comparison and verify that we end up with carry flag
9765 based comparison. This is fails to be true only when we decide to expand
9766 comparison using arithmetic that is not too common scenario. */
9768 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9769 &second_test, &bypass_test);
9770 compare_seq = get_insns ();
9773 if (second_test || bypass_test)
9775 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9776 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9777 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9779 code = GET_CODE (compare_op);
9780 if (code != LTU && code != GEU)
9782 emit_insn (compare_seq);
9786 if (!INTEGRAL_MODE_P (mode))
9794 /* Convert a==0 into (unsigned)a<1. */
9797 if (op1 != const0_rtx)
9800 code = (code == EQ ? LTU : GEU);
9803 /* Convert a>b into b<a or a>=b-1. */
9806 if (GET_CODE (op1) == CONST_INT)
9808 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9809 /* Bail out on overflow. We still can swap operands but that
9810 would force loading of the constant into register. */
9811 if (op1 == const0_rtx
9812 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9814 code = (code == GTU ? GEU : LTU);
9821 code = (code == GTU ? LTU : GEU);
9825 /* Convert a>=0 into (unsigned)a<0x80000000. */
9828 if (mode == DImode || op1 != const0_rtx)
9830 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9831 code = (code == LT ? GEU : LTU);
9835 if (mode == DImode || op1 != constm1_rtx)
9837 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9838 code = (code == LE ? GEU : LTU);
9844 /* Swapping operands may cause constant to appear as first operand. */
9845 if (!nonimmediate_operand (op0, VOIDmode))
9849 op0 = force_reg (mode, op0);
9851 ix86_compare_op0 = op0;
9852 ix86_compare_op1 = op1;
9853 *pop = ix86_expand_compare (code, NULL, NULL);
9854 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9860 ix86_expand_int_movcc (rtx operands[])
9862 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9863 rtx compare_seq, compare_op;
9864 rtx second_test, bypass_test;
9865 enum machine_mode mode = GET_MODE (operands[0]);
9866 bool sign_bit_compare_p = false;;
9869 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9870 compare_seq = get_insns ();
9873 compare_code = GET_CODE (compare_op);
9875 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9876 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9877 sign_bit_compare_p = true;
9879 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9880 HImode insns, we'd be swallowed in word prefix ops. */
9882 if ((mode != HImode || TARGET_FAST_PREFIX)
9883 && (mode != DImode || TARGET_64BIT)
9884 && GET_CODE (operands[2]) == CONST_INT
9885 && GET_CODE (operands[3]) == CONST_INT)
9887 rtx out = operands[0];
9888 HOST_WIDE_INT ct = INTVAL (operands[2]);
9889 HOST_WIDE_INT cf = INTVAL (operands[3]);
9893 /* Sign bit compares are better done using shifts than we do by using
9895 if (sign_bit_compare_p
9896 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9897 ix86_compare_op1, &compare_op))
9899 /* Detect overlap between destination and compare sources. */
9902 if (!sign_bit_compare_p)
9906 compare_code = GET_CODE (compare_op);
9908 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9909 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9912 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9915 /* To simplify rest of code, restrict to the GEU case. */
9916 if (compare_code == LTU)
9918 HOST_WIDE_INT tmp = ct;
9921 compare_code = reverse_condition (compare_code);
9922 code = reverse_condition (code);
9927 PUT_CODE (compare_op,
9928 reverse_condition_maybe_unordered
9929 (GET_CODE (compare_op)));
9931 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9935 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9936 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9937 tmp = gen_reg_rtx (mode);
9940 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9942 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9946 if (code == GT || code == GE)
9947 code = reverse_condition (code);
9950 HOST_WIDE_INT tmp = ct;
9955 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9956 ix86_compare_op1, VOIDmode, 0, -1);
9969 tmp = expand_simple_binop (mode, PLUS,
9971 copy_rtx (tmp), 1, OPTAB_DIRECT);
9982 tmp = expand_simple_binop (mode, IOR,
9984 copy_rtx (tmp), 1, OPTAB_DIRECT);
9986 else if (diff == -1 && ct)
9996 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9998 tmp = expand_simple_binop (mode, PLUS,
9999 copy_rtx (tmp), GEN_INT (cf),
10000 copy_rtx (tmp), 1, OPTAB_DIRECT);
10008 * andl cf - ct, dest
10018 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10021 tmp = expand_simple_binop (mode, AND,
10023 gen_int_mode (cf - ct, mode),
10024 copy_rtx (tmp), 1, OPTAB_DIRECT);
10026 tmp = expand_simple_binop (mode, PLUS,
10027 copy_rtx (tmp), GEN_INT (ct),
10028 copy_rtx (tmp), 1, OPTAB_DIRECT);
10031 if (!rtx_equal_p (tmp, out))
10032 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10034 return 1; /* DONE */
10040 tmp = ct, ct = cf, cf = tmp;
10042 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10044 /* We may be reversing unordered compare to normal compare, that
10045 is not valid in general (we may convert non-trapping condition
10046 to trapping one), however on i386 we currently emit all
10047 comparisons unordered. */
10048 compare_code = reverse_condition_maybe_unordered (compare_code);
10049 code = reverse_condition_maybe_unordered (code);
10053 compare_code = reverse_condition (compare_code);
10054 code = reverse_condition (code);
10058 compare_code = NIL;
10059 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10060 && GET_CODE (ix86_compare_op1) == CONST_INT)
10062 if (ix86_compare_op1 == const0_rtx
10063 && (code == LT || code == GE))
10064 compare_code = code;
10065 else if (ix86_compare_op1 == constm1_rtx)
10069 else if (code == GT)
10074 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10075 if (compare_code != NIL
10076 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10077 && (cf == -1 || ct == -1))
10079 /* If lea code below could be used, only optimize
10080 if it results in a 2 insn sequence. */
10082 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10083 || diff == 3 || diff == 5 || diff == 9)
10084 || (compare_code == LT && ct == -1)
10085 || (compare_code == GE && cf == -1))
10088 * notl op1 (if necessary)
10096 code = reverse_condition (code);
10099 out = emit_store_flag (out, code, ix86_compare_op0,
10100 ix86_compare_op1, VOIDmode, 0, -1);
10102 out = expand_simple_binop (mode, IOR,
10104 out, 1, OPTAB_DIRECT);
10105 if (out != operands[0])
10106 emit_move_insn (operands[0], out);
10108 return 1; /* DONE */
10113 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10114 || diff == 3 || diff == 5 || diff == 9)
10115 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10116 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10122 * lea cf(dest*(ct-cf)),dest
10126 * This also catches the degenerate setcc-only case.
10132 out = emit_store_flag (out, code, ix86_compare_op0,
10133 ix86_compare_op1, VOIDmode, 0, 1);
10136 /* On x86_64 the lea instruction operates on Pmode, so we need
10137 to get arithmetics done in proper mode to match. */
10139 tmp = copy_rtx (out);
10143 out1 = copy_rtx (out);
10144 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10148 tmp = gen_rtx_PLUS (mode, tmp, out1);
10154 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10157 if (!rtx_equal_p (tmp, out))
10160 out = force_operand (tmp, copy_rtx (out));
10162 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10164 if (!rtx_equal_p (out, operands[0]))
10165 emit_move_insn (operands[0], copy_rtx (out));
10167 return 1; /* DONE */
10171 * General case: Jumpful:
10172 * xorl dest,dest cmpl op1, op2
10173 * cmpl op1, op2 movl ct, dest
10174 * setcc dest jcc 1f
10175 * decl dest movl cf, dest
10176 * andl (cf-ct),dest 1:
10179 * Size 20. Size 14.
10181 * This is reasonably steep, but branch mispredict costs are
10182 * high on modern cpus, so consider failing only if optimizing
10186 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10187 && BRANCH_COST >= 2)
10193 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10194 /* We may be reversing unordered compare to normal compare,
10195 that is not valid in general (we may convert non-trapping
10196 condition to trapping one), however on i386 we currently
10197 emit all comparisons unordered. */
10198 code = reverse_condition_maybe_unordered (code);
10201 code = reverse_condition (code);
10202 if (compare_code != NIL)
10203 compare_code = reverse_condition (compare_code);
10207 if (compare_code != NIL)
10209 /* notl op1 (if needed)
10214 For x < 0 (resp. x <= -1) there will be no notl,
10215 so if possible swap the constants to get rid of the
10217 True/false will be -1/0 while code below (store flag
10218 followed by decrement) is 0/-1, so the constants need
10219 to be exchanged once more. */
10221 if (compare_code == GE || !cf)
10223 code = reverse_condition (code);
10228 HOST_WIDE_INT tmp = cf;
10233 out = emit_store_flag (out, code, ix86_compare_op0,
10234 ix86_compare_op1, VOIDmode, 0, -1);
10238 out = emit_store_flag (out, code, ix86_compare_op0,
10239 ix86_compare_op1, VOIDmode, 0, 1);
10241 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10242 copy_rtx (out), 1, OPTAB_DIRECT);
10245 out = expand_simple_binop (mode, AND, copy_rtx (out),
10246 gen_int_mode (cf - ct, mode),
10247 copy_rtx (out), 1, OPTAB_DIRECT);
10249 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10250 copy_rtx (out), 1, OPTAB_DIRECT);
10251 if (!rtx_equal_p (out, operands[0]))
10252 emit_move_insn (operands[0], copy_rtx (out));
10254 return 1; /* DONE */
10258 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10260 /* Try a few things more with specific constants and a variable. */
10263 rtx var, orig_out, out, tmp;
10265 if (BRANCH_COST <= 2)
10266 return 0; /* FAIL */
10268 /* If one of the two operands is an interesting constant, load a
10269 constant with the above and mask it in with a logical operation. */
10271 if (GET_CODE (operands[2]) == CONST_INT)
10274 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10275 operands[3] = constm1_rtx, op = and_optab;
10276 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10277 operands[3] = const0_rtx, op = ior_optab;
10279 return 0; /* FAIL */
10281 else if (GET_CODE (operands[3]) == CONST_INT)
10284 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10285 operands[2] = constm1_rtx, op = and_optab;
10286 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10287 operands[2] = const0_rtx, op = ior_optab;
10289 return 0; /* FAIL */
10292 return 0; /* FAIL */
10294 orig_out = operands[0];
10295 tmp = gen_reg_rtx (mode);
10298 /* Recurse to get the constant loaded. */
10299 if (ix86_expand_int_movcc (operands) == 0)
10300 return 0; /* FAIL */
10302 /* Mask in the interesting variable. */
10303 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10305 if (!rtx_equal_p (out, orig_out))
10306 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10308 return 1; /* DONE */
10312 * For comparison with above,
10322 if (! nonimmediate_operand (operands[2], mode))
10323 operands[2] = force_reg (mode, operands[2]);
10324 if (! nonimmediate_operand (operands[3], mode))
10325 operands[3] = force_reg (mode, operands[3]);
10327 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10329 rtx tmp = gen_reg_rtx (mode);
10330 emit_move_insn (tmp, operands[3]);
10333 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10335 rtx tmp = gen_reg_rtx (mode);
10336 emit_move_insn (tmp, operands[2]);
10340 if (! register_operand (operands[2], VOIDmode)
10342 || ! register_operand (operands[3], VOIDmode)))
10343 operands[2] = force_reg (mode, operands[2]);
10346 && ! register_operand (operands[3], VOIDmode))
10347 operands[3] = force_reg (mode, operands[3]);
10349 emit_insn (compare_seq);
10350 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10351 gen_rtx_IF_THEN_ELSE (mode,
10352 compare_op, operands[2],
10355 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10356 gen_rtx_IF_THEN_ELSE (mode,
10358 copy_rtx (operands[3]),
10359 copy_rtx (operands[0]))));
10361 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10362 gen_rtx_IF_THEN_ELSE (mode,
10364 copy_rtx (operands[2]),
10365 copy_rtx (operands[0]))));
10367 return 1; /* DONE */
10371 ix86_expand_fp_movcc (rtx operands[])
10373 enum rtx_code code;
10375 rtx compare_op, second_test, bypass_test;
10377 /* For SF/DFmode conditional moves based on comparisons
10378 in same mode, we may want to use SSE min/max instructions. */
10379 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10380 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10381 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10382 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10383 && (!TARGET_IEEE_FP
10384 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10385 /* We may be called from the post-reload splitter. */
10386 && (!REG_P (operands[0])
10387 || SSE_REG_P (operands[0])
10388 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10390 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10391 code = GET_CODE (operands[1]);
10393 /* See if we have (cross) match between comparison operands and
10394 conditional move operands. */
10395 if (rtx_equal_p (operands[2], op1))
10400 code = reverse_condition_maybe_unordered (code);
10402 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10404 /* Check for min operation. */
10405 if (code == LT || code == UNLE)
10413 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10414 if (memory_operand (op0, VOIDmode))
10415 op0 = force_reg (GET_MODE (operands[0]), op0);
10416 if (GET_MODE (operands[0]) == SFmode)
10417 emit_insn (gen_minsf3 (operands[0], op0, op1));
10419 emit_insn (gen_mindf3 (operands[0], op0, op1));
10422 /* Check for max operation. */
10423 if (code == GT || code == UNGE)
10431 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10432 if (memory_operand (op0, VOIDmode))
10433 op0 = force_reg (GET_MODE (operands[0]), op0);
10434 if (GET_MODE (operands[0]) == SFmode)
10435 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10437 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10441 /* Manage condition to be sse_comparison_operator. In case we are
10442 in non-ieee mode, try to canonicalize the destination operand
10443 to be first in the comparison - this helps reload to avoid extra
10445 if (!sse_comparison_operator (operands[1], VOIDmode)
10446 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10448 rtx tmp = ix86_compare_op0;
10449 ix86_compare_op0 = ix86_compare_op1;
10450 ix86_compare_op1 = tmp;
10451 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10452 VOIDmode, ix86_compare_op0,
10455 /* Similarly try to manage result to be first operand of conditional
10456 move. We also don't support the NE comparison on SSE, so try to
10458 if ((rtx_equal_p (operands[0], operands[3])
10459 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10460 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10462 rtx tmp = operands[2];
10463 operands[2] = operands[3];
10465 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10466 (GET_CODE (operands[1])),
10467 VOIDmode, ix86_compare_op0,
10470 if (GET_MODE (operands[0]) == SFmode)
10471 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10472 operands[2], operands[3],
10473 ix86_compare_op0, ix86_compare_op1));
10475 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10476 operands[2], operands[3],
10477 ix86_compare_op0, ix86_compare_op1));
10481 /* The floating point conditional move instructions don't directly
10482 support conditions resulting from a signed integer comparison. */
10484 code = GET_CODE (operands[1]);
10485 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10487 /* The floating point conditional move instructions don't directly
10488 support signed integer comparisons. */
10490 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10492 if (second_test != NULL || bypass_test != NULL)
10494 tmp = gen_reg_rtx (QImode);
10495 ix86_expand_setcc (code, tmp);
10497 ix86_compare_op0 = tmp;
10498 ix86_compare_op1 = const0_rtx;
10499 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10501 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10503 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10504 emit_move_insn (tmp, operands[3]);
10507 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10509 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10510 emit_move_insn (tmp, operands[2]);
10514 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10515 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10520 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10521 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10526 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10527 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10535 /* Expand conditional increment or decrement using adb/sbb instructions.
10536 The default case using setcc followed by the conditional move can be
10537 done by generic code. */
10539 ix86_expand_int_addcc (rtx operands[])
10541 enum rtx_code code = GET_CODE (operands[1]);
10543 rtx val = const0_rtx;
10544 bool fpcmp = false;
10545 enum machine_mode mode = GET_MODE (operands[0]);
10547 if (operands[3] != const1_rtx
10548 && operands[3] != constm1_rtx)
10550 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10551 ix86_compare_op1, &compare_op))
10553 code = GET_CODE (compare_op);
10555 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10556 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10559 code = ix86_fp_compare_code_to_integer (code);
10566 PUT_CODE (compare_op,
10567 reverse_condition_maybe_unordered
10568 (GET_CODE (compare_op)));
10570 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10572 PUT_MODE (compare_op, mode);
10574 /* Construct either adc or sbb insn. */
10575 if ((code == LTU) == (operands[3] == constm1_rtx))
10577 switch (GET_MODE (operands[0]))
10580 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10583 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10586 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10589 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10597 switch (GET_MODE (operands[0]))
10600 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10603 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10606 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10609 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10615 return 1; /* DONE */
10619 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10620 works for floating pointer parameters and nonoffsetable memories.
10621 For pushes, it returns just stack offsets; the values will be saved
10622 in the right order. Maximally three parts are generated. */
10625 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10630 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10632 size = (GET_MODE_SIZE (mode) + 4) / 8;
10634 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10636 if (size < 2 || size > 3)
10639 /* Optimize constant pool reference to immediates. This is used by fp
10640 moves, that force all constants to memory to allow combining. */
10641 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10643 rtx tmp = maybe_get_pool_constant (operand);
10648 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10650 /* The only non-offsetable memories we handle are pushes. */
10651 if (! push_operand (operand, VOIDmode))
10654 operand = copy_rtx (operand);
10655 PUT_MODE (operand, Pmode);
10656 parts[0] = parts[1] = parts[2] = operand;
10658 else if (!TARGET_64BIT)
10660 if (mode == DImode)
10661 split_di (&operand, 1, &parts[0], &parts[1]);
10664 if (REG_P (operand))
10666 if (!reload_completed)
10668 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10669 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10671 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10673 else if (offsettable_memref_p (operand))
10675 operand = adjust_address (operand, SImode, 0);
10676 parts[0] = operand;
10677 parts[1] = adjust_address (operand, SImode, 4);
10679 parts[2] = adjust_address (operand, SImode, 8);
10681 else if (GET_CODE (operand) == CONST_DOUBLE)
10686 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10690 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10691 parts[2] = gen_int_mode (l[2], SImode);
10694 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10699 parts[1] = gen_int_mode (l[1], SImode);
10700 parts[0] = gen_int_mode (l[0], SImode);
10708 if (mode == TImode)
10709 split_ti (&operand, 1, &parts[0], &parts[1]);
10710 if (mode == XFmode || mode == TFmode)
10712 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10713 if (REG_P (operand))
10715 if (!reload_completed)
10717 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10718 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10720 else if (offsettable_memref_p (operand))
10722 operand = adjust_address (operand, DImode, 0);
10723 parts[0] = operand;
10724 parts[1] = adjust_address (operand, upper_mode, 8);
10726 else if (GET_CODE (operand) == CONST_DOUBLE)
10731 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10732 real_to_target (l, &r, mode);
10733 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10734 if (HOST_BITS_PER_WIDE_INT >= 64)
10737 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10738 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10741 parts[0] = immed_double_const (l[0], l[1], DImode);
10742 if (upper_mode == SImode)
10743 parts[1] = gen_int_mode (l[2], SImode);
10744 else if (HOST_BITS_PER_WIDE_INT >= 64)
10747 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10748 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10751 parts[1] = immed_double_const (l[2], l[3], DImode);
10761 /* Emit insns to perform a move or push of DI, DF, and XF values.
10762 Return false when normal moves are needed; true when all required
10763 insns have been emitted. Operands 2-4 contain the input values
10764 int the correct order; operands 5-7 contain the output values. */
10767 ix86_split_long_move (rtx operands[])
10772 int collisions = 0;
10773 enum machine_mode mode = GET_MODE (operands[0]);
10775 /* The DFmode expanders may ask us to move double.
10776 For 64bit target this is single move. By hiding the fact
10777 here we simplify i386.md splitters. */
10778 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10780 /* Optimize constant pool reference to immediates. This is used by
10781 fp moves, that force all constants to memory to allow combining. */
10783 if (GET_CODE (operands[1]) == MEM
10784 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10785 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10786 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10787 if (push_operand (operands[0], VOIDmode))
10789 operands[0] = copy_rtx (operands[0]);
10790 PUT_MODE (operands[0], Pmode);
10793 operands[0] = gen_lowpart (DImode, operands[0]);
10794 operands[1] = gen_lowpart (DImode, operands[1]);
10795 emit_move_insn (operands[0], operands[1]);
10799 /* The only non-offsettable memory we handle is push. */
10800 if (push_operand (operands[0], VOIDmode))
10802 else if (GET_CODE (operands[0]) == MEM
10803 && ! offsettable_memref_p (operands[0]))
10806 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10807 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10809 /* When emitting push, take care for source operands on the stack. */
10810 if (push && GET_CODE (operands[1]) == MEM
10811 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10814 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10815 XEXP (part[1][2], 0));
10816 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10817 XEXP (part[1][1], 0));
10820 /* We need to do copy in the right order in case an address register
10821 of the source overlaps the destination. */
10822 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10824 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10826 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10829 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10832 /* Collision in the middle part can be handled by reordering. */
10833 if (collisions == 1 && nparts == 3
10834 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10837 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10838 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10841 /* If there are more collisions, we can't handle it by reordering.
10842 Do an lea to the last part and use only one colliding move. */
10843 else if (collisions > 1)
10849 base = part[0][nparts - 1];
10851 /* Handle the case when the last part isn't valid for lea.
10852 Happens in 64-bit mode storing the 12-byte XFmode. */
10853 if (GET_MODE (base) != Pmode)
10854 base = gen_rtx_REG (Pmode, REGNO (base));
10856 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10857 part[1][0] = replace_equiv_address (part[1][0], base);
10858 part[1][1] = replace_equiv_address (part[1][1],
10859 plus_constant (base, UNITS_PER_WORD));
10861 part[1][2] = replace_equiv_address (part[1][2],
10862 plus_constant (base, 8));
10872 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10873 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10874 emit_move_insn (part[0][2], part[1][2]);
10879 /* In 64bit mode we don't have 32bit push available. In case this is
10880 register, it is OK - we will just use larger counterpart. We also
10881 retype memory - these comes from attempt to avoid REX prefix on
10882 moving of second half of TFmode value. */
10883 if (GET_MODE (part[1][1]) == SImode)
10885 if (GET_CODE (part[1][1]) == MEM)
10886 part[1][1] = adjust_address (part[1][1], DImode, 0);
10887 else if (REG_P (part[1][1]))
10888 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10891 if (GET_MODE (part[1][0]) == SImode)
10892 part[1][0] = part[1][1];
10895 emit_move_insn (part[0][1], part[1][1]);
10896 emit_move_insn (part[0][0], part[1][0]);
10900 /* Choose correct order to not overwrite the source before it is copied. */
10901 if ((REG_P (part[0][0])
10902 && REG_P (part[1][1])
10903 && (REGNO (part[0][0]) == REGNO (part[1][1])
10905 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10907 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10911 operands[2] = part[0][2];
10912 operands[3] = part[0][1];
10913 operands[4] = part[0][0];
10914 operands[5] = part[1][2];
10915 operands[6] = part[1][1];
10916 operands[7] = part[1][0];
10920 operands[2] = part[0][1];
10921 operands[3] = part[0][0];
10922 operands[5] = part[1][1];
10923 operands[6] = part[1][0];
10930 operands[2] = part[0][0];
10931 operands[3] = part[0][1];
10932 operands[4] = part[0][2];
10933 operands[5] = part[1][0];
10934 operands[6] = part[1][1];
10935 operands[7] = part[1][2];
10939 operands[2] = part[0][0];
10940 operands[3] = part[0][1];
10941 operands[5] = part[1][0];
10942 operands[6] = part[1][1];
10945 emit_move_insn (operands[2], operands[5]);
10946 emit_move_insn (operands[3], operands[6]);
10948 emit_move_insn (operands[4], operands[7]);
10954 ix86_split_ashldi (rtx *operands, rtx scratch)
10956 rtx low[2], high[2];
10959 if (GET_CODE (operands[2]) == CONST_INT)
10961 split_di (operands, 2, low, high);
10962 count = INTVAL (operands[2]) & 63;
10966 emit_move_insn (high[0], low[1]);
10967 emit_move_insn (low[0], const0_rtx);
10970 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10974 if (!rtx_equal_p (operands[0], operands[1]))
10975 emit_move_insn (operands[0], operands[1]);
10976 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10977 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10982 if (!rtx_equal_p (operands[0], operands[1]))
10983 emit_move_insn (operands[0], operands[1]);
10985 split_di (operands, 1, low, high);
10987 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10988 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10990 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10992 if (! no_new_pseudos)
10993 scratch = force_reg (SImode, const0_rtx);
10995 emit_move_insn (scratch, const0_rtx);
10997 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
11001 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11006 ix86_split_ashrdi (rtx *operands, rtx scratch)
11008 rtx low[2], high[2];
11011 if (GET_CODE (operands[2]) == CONST_INT)
11013 split_di (operands, 2, low, high);
11014 count = INTVAL (operands[2]) & 63;
11018 emit_move_insn (high[0], high[1]);
11019 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11020 emit_move_insn (low[0], high[0]);
11023 else if (count >= 32)
11025 emit_move_insn (low[0], high[1]);
11027 if (! reload_completed)
11028 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
11031 emit_move_insn (high[0], low[0]);
11032 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11036 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11040 if (!rtx_equal_p (operands[0], operands[1]))
11041 emit_move_insn (operands[0], operands[1]);
11042 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11043 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11048 if (!rtx_equal_p (operands[0], operands[1]))
11049 emit_move_insn (operands[0], operands[1]);
11051 split_di (operands, 1, low, high);
11053 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11054 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11056 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11058 if (! no_new_pseudos)
11059 scratch = gen_reg_rtx (SImode);
11060 emit_move_insn (scratch, high[0]);
11061 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11062 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11066 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11071 ix86_split_lshrdi (rtx *operands, rtx scratch)
11073 rtx low[2], high[2];
11076 if (GET_CODE (operands[2]) == CONST_INT)
11078 split_di (operands, 2, low, high);
11079 count = INTVAL (operands[2]) & 63;
11083 emit_move_insn (low[0], high[1]);
11084 emit_move_insn (high[0], const0_rtx);
11087 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11091 if (!rtx_equal_p (operands[0], operands[1]))
11092 emit_move_insn (operands[0], operands[1]);
11093 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11094 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11099 if (!rtx_equal_p (operands[0], operands[1]))
11100 emit_move_insn (operands[0], operands[1]);
11102 split_di (operands, 1, low, high);
11104 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11105 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11107 /* Heh. By reversing the arguments, we can reuse this pattern. */
11108 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11110 if (! no_new_pseudos)
11111 scratch = force_reg (SImode, const0_rtx);
11113 emit_move_insn (scratch, const0_rtx);
11115 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11119 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11123 /* Helper function for the string operations below. Dest VARIABLE whether
11124 it is aligned to VALUE bytes. If true, jump to the label. */
11126 ix86_expand_aligntest (rtx variable, int value)
11128 rtx label = gen_label_rtx ();
11129 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11130 if (GET_MODE (variable) == DImode)
11131 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11133 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11134 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11139 /* Adjust COUNTER by the VALUE. */
11141 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11143 if (GET_MODE (countreg) == DImode)
11144 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11146 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11149 /* Zero extend possibly SImode EXP to Pmode register. */
11151 ix86_zero_extend_to_Pmode (rtx exp)
11154 if (GET_MODE (exp) == VOIDmode)
11155 return force_reg (Pmode, exp);
11156 if (GET_MODE (exp) == Pmode)
11157 return copy_to_mode_reg (Pmode, exp);
11158 r = gen_reg_rtx (Pmode);
11159 emit_insn (gen_zero_extendsidi2 (r, exp));
11163 /* Expand string move (memcpy) operation. Use i386 string operations when
11164 profitable. expand_clrmem contains similar code. */
11166 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11168 rtx srcreg, destreg, countreg, srcexp, destexp;
11169 enum machine_mode counter_mode;
11170 HOST_WIDE_INT align = 0;
11171 unsigned HOST_WIDE_INT count = 0;
11173 if (GET_CODE (align_exp) == CONST_INT)
11174 align = INTVAL (align_exp);
11176 /* Can't use any of this if the user has appropriated esi or edi. */
11177 if (global_regs[4] || global_regs[5])
11180 /* This simple hack avoids all inlining code and simplifies code below. */
11181 if (!TARGET_ALIGN_STRINGOPS)
11184 if (GET_CODE (count_exp) == CONST_INT)
11186 count = INTVAL (count_exp);
11187 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11191 /* Figure out proper mode for counter. For 32bits it is always SImode,
11192 for 64bits use SImode when possible, otherwise DImode.
11193 Set count to number of bytes copied when known at compile time. */
11194 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11195 || x86_64_zero_extended_value (count_exp))
11196 counter_mode = SImode;
11198 counter_mode = DImode;
11200 if (counter_mode != SImode && counter_mode != DImode)
11203 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11204 if (destreg != XEXP (dst, 0))
11205 dst = replace_equiv_address_nv (dst, destreg);
11206 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11207 if (srcreg != XEXP (src, 0))
11208 src = replace_equiv_address_nv (src, srcreg);
11210 /* When optimizing for size emit simple rep ; movsb instruction for
11211 counts not divisible by 4. */
11213 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11215 emit_insn (gen_cld ());
11216 countreg = ix86_zero_extend_to_Pmode (count_exp);
11217 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11218 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11219 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11223 /* For constant aligned (or small unaligned) copies use rep movsl
11224 followed by code copying the rest. For PentiumPro ensure 8 byte
11225 alignment to allow rep movsl acceleration. */
11227 else if (count != 0
11229 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11230 || optimize_size || count < (unsigned int) 64))
11232 unsigned HOST_WIDE_INT offset = 0;
11233 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11234 rtx srcmem, dstmem;
11236 emit_insn (gen_cld ());
11237 if (count & ~(size - 1))
11239 countreg = copy_to_mode_reg (counter_mode,
11240 GEN_INT ((count >> (size == 4 ? 2 : 3))
11241 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11242 countreg = ix86_zero_extend_to_Pmode (countreg);
11244 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11245 GEN_INT (size == 4 ? 2 : 3));
11246 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11247 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11249 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11250 countreg, destexp, srcexp));
11251 offset = count & ~(size - 1);
11253 if (size == 8 && (count & 0x04))
11255 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11257 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11259 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11264 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11266 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11268 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11273 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11275 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11277 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11280 /* The generic code based on the glibc implementation:
11281 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11282 allowing accelerated copying there)
11283 - copy the data using rep movsl
11284 - copy the rest. */
11289 rtx srcmem, dstmem;
11290 int desired_alignment = (TARGET_PENTIUMPRO
11291 && (count == 0 || count >= (unsigned int) 260)
11292 ? 8 : UNITS_PER_WORD);
11293 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11294 dst = change_address (dst, BLKmode, destreg);
11295 src = change_address (src, BLKmode, srcreg);
11297 /* In case we don't know anything about the alignment, default to
11298 library version, since it is usually equally fast and result in
11301 Also emit call when we know that the count is large and call overhead
11302 will not be important. */
11303 if (!TARGET_INLINE_ALL_STRINGOPS
11304 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11307 if (TARGET_SINGLE_STRINGOP)
11308 emit_insn (gen_cld ());
11310 countreg2 = gen_reg_rtx (Pmode);
11311 countreg = copy_to_mode_reg (counter_mode, count_exp);
11313 /* We don't use loops to align destination and to copy parts smaller
11314 than 4 bytes, because gcc is able to optimize such code better (in
11315 the case the destination or the count really is aligned, gcc is often
11316 able to predict the branches) and also it is friendlier to the
11317 hardware branch prediction.
11319 Using loops is beneficial for generic case, because we can
11320 handle small counts using the loops. Many CPUs (such as Athlon)
11321 have large REP prefix setup costs.
11323 This is quite costly. Maybe we can revisit this decision later or
11324 add some customizability to this code. */
11326 if (count == 0 && align < desired_alignment)
11328 label = gen_label_rtx ();
11329 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11330 LEU, 0, counter_mode, 1, label);
11334 rtx label = ix86_expand_aligntest (destreg, 1);
11335 srcmem = change_address (src, QImode, srcreg);
11336 dstmem = change_address (dst, QImode, destreg);
11337 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11338 ix86_adjust_counter (countreg, 1);
11339 emit_label (label);
11340 LABEL_NUSES (label) = 1;
11344 rtx label = ix86_expand_aligntest (destreg, 2);
11345 srcmem = change_address (src, HImode, srcreg);
11346 dstmem = change_address (dst, HImode, destreg);
11347 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11348 ix86_adjust_counter (countreg, 2);
11349 emit_label (label);
11350 LABEL_NUSES (label) = 1;
11352 if (align <= 4 && desired_alignment > 4)
11354 rtx label = ix86_expand_aligntest (destreg, 4);
11355 srcmem = change_address (src, SImode, srcreg);
11356 dstmem = change_address (dst, SImode, destreg);
11357 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11358 ix86_adjust_counter (countreg, 4);
11359 emit_label (label);
11360 LABEL_NUSES (label) = 1;
11363 if (label && desired_alignment > 4 && !TARGET_64BIT)
11365 emit_label (label);
11366 LABEL_NUSES (label) = 1;
11369 if (!TARGET_SINGLE_STRINGOP)
11370 emit_insn (gen_cld ());
11373 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11375 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11379 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11380 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11382 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11383 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11384 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11385 countreg2, destexp, srcexp));
11389 emit_label (label);
11390 LABEL_NUSES (label) = 1;
11392 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11394 srcmem = change_address (src, SImode, srcreg);
11395 dstmem = change_address (dst, SImode, destreg);
11396 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11398 if ((align <= 4 || count == 0) && TARGET_64BIT)
11400 rtx label = ix86_expand_aligntest (countreg, 4);
11401 srcmem = change_address (src, SImode, srcreg);
11402 dstmem = change_address (dst, SImode, destreg);
11403 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11404 emit_label (label);
11405 LABEL_NUSES (label) = 1;
11407 if (align > 2 && count != 0 && (count & 2))
11409 srcmem = change_address (src, HImode, srcreg);
11410 dstmem = change_address (dst, HImode, destreg);
11411 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11413 if (align <= 2 || count == 0)
11415 rtx label = ix86_expand_aligntest (countreg, 2);
11416 srcmem = change_address (src, HImode, srcreg);
11417 dstmem = change_address (dst, HImode, destreg);
11418 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11419 emit_label (label);
11420 LABEL_NUSES (label) = 1;
11422 if (align > 1 && count != 0 && (count & 1))
11424 srcmem = change_address (src, QImode, srcreg);
11425 dstmem = change_address (dst, QImode, destreg);
11426 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11428 if (align <= 1 || count == 0)
11430 rtx label = ix86_expand_aligntest (countreg, 1);
11431 srcmem = change_address (src, QImode, srcreg);
11432 dstmem = change_address (dst, QImode, destreg);
11433 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11434 emit_label (label);
11435 LABEL_NUSES (label) = 1;
11442 /* Expand string clear operation (bzero). Use i386 string operations when
11443 profitable. expand_movmem contains similar code. */
11445 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11447 rtx destreg, zeroreg, countreg, destexp;
11448 enum machine_mode counter_mode;
11449 HOST_WIDE_INT align = 0;
11450 unsigned HOST_WIDE_INT count = 0;
11452 if (GET_CODE (align_exp) == CONST_INT)
11453 align = INTVAL (align_exp);
11455 /* Can't use any of this if the user has appropriated esi. */
11456 if (global_regs[4])
11459 /* This simple hack avoids all inlining code and simplifies code below. */
11460 if (!TARGET_ALIGN_STRINGOPS)
11463 if (GET_CODE (count_exp) == CONST_INT)
11465 count = INTVAL (count_exp);
11466 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11469 /* Figure out proper mode for counter. For 32bits it is always SImode,
11470 for 64bits use SImode when possible, otherwise DImode.
11471 Set count to number of bytes copied when known at compile time. */
11472 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11473 || x86_64_zero_extended_value (count_exp))
11474 counter_mode = SImode;
11476 counter_mode = DImode;
11478 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11479 if (destreg != XEXP (dst, 0))
11480 dst = replace_equiv_address_nv (dst, destreg);
11482 emit_insn (gen_cld ());
11484 /* When optimizing for size emit simple rep ; movsb instruction for
11485 counts not divisible by 4. */
11487 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11489 countreg = ix86_zero_extend_to_Pmode (count_exp);
11490 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11491 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11492 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11494 else if (count != 0
11496 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11497 || optimize_size || count < (unsigned int) 64))
11499 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11500 unsigned HOST_WIDE_INT offset = 0;
11502 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11503 if (count & ~(size - 1))
11505 countreg = copy_to_mode_reg (counter_mode,
11506 GEN_INT ((count >> (size == 4 ? 2 : 3))
11507 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11508 countreg = ix86_zero_extend_to_Pmode (countreg);
11509 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11510 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11511 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11512 offset = count & ~(size - 1);
11514 if (size == 8 && (count & 0x04))
11516 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11518 emit_insn (gen_strset (destreg, mem,
11519 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11524 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11526 emit_insn (gen_strset (destreg, mem,
11527 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11532 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11534 emit_insn (gen_strset (destreg, mem,
11535 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11542 /* Compute desired alignment of the string operation. */
11543 int desired_alignment = (TARGET_PENTIUMPRO
11544 && (count == 0 || count >= (unsigned int) 260)
11545 ? 8 : UNITS_PER_WORD);
11547 /* In case we don't know anything about the alignment, default to
11548 library version, since it is usually equally fast and result in
11551 Also emit call when we know that the count is large and call overhead
11552 will not be important. */
11553 if (!TARGET_INLINE_ALL_STRINGOPS
11554 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11557 if (TARGET_SINGLE_STRINGOP)
11558 emit_insn (gen_cld ());
11560 countreg2 = gen_reg_rtx (Pmode);
11561 countreg = copy_to_mode_reg (counter_mode, count_exp);
11562 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11563 /* Get rid of MEM_OFFSET, it won't be accurate. */
11564 dst = change_address (dst, BLKmode, destreg);
11566 if (count == 0 && align < desired_alignment)
11568 label = gen_label_rtx ();
11569 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11570 LEU, 0, counter_mode, 1, label);
11574 rtx label = ix86_expand_aligntest (destreg, 1);
11575 emit_insn (gen_strset (destreg, dst,
11576 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11577 ix86_adjust_counter (countreg, 1);
11578 emit_label (label);
11579 LABEL_NUSES (label) = 1;
11583 rtx label = ix86_expand_aligntest (destreg, 2);
11584 emit_insn (gen_strset (destreg, dst,
11585 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11586 ix86_adjust_counter (countreg, 2);
11587 emit_label (label);
11588 LABEL_NUSES (label) = 1;
11590 if (align <= 4 && desired_alignment > 4)
11592 rtx label = ix86_expand_aligntest (destreg, 4);
11593 emit_insn (gen_strset (destreg, dst,
11595 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11597 ix86_adjust_counter (countreg, 4);
11598 emit_label (label);
11599 LABEL_NUSES (label) = 1;
11602 if (label && desired_alignment > 4 && !TARGET_64BIT)
11604 emit_label (label);
11605 LABEL_NUSES (label) = 1;
11609 if (!TARGET_SINGLE_STRINGOP)
11610 emit_insn (gen_cld ());
11613 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11615 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11619 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11620 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11622 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11623 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11627 emit_label (label);
11628 LABEL_NUSES (label) = 1;
11631 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11632 emit_insn (gen_strset (destreg, dst,
11633 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11634 if (TARGET_64BIT && (align <= 4 || count == 0))
11636 rtx label = ix86_expand_aligntest (countreg, 4);
11637 emit_insn (gen_strset (destreg, dst,
11638 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11639 emit_label (label);
11640 LABEL_NUSES (label) = 1;
11642 if (align > 2 && count != 0 && (count & 2))
11643 emit_insn (gen_strset (destreg, dst,
11644 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11645 if (align <= 2 || count == 0)
11647 rtx label = ix86_expand_aligntest (countreg, 2);
11648 emit_insn (gen_strset (destreg, dst,
11649 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11650 emit_label (label);
11651 LABEL_NUSES (label) = 1;
11653 if (align > 1 && count != 0 && (count & 1))
11654 emit_insn (gen_strset (destreg, dst,
11655 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11656 if (align <= 1 || count == 0)
11658 rtx label = ix86_expand_aligntest (countreg, 1);
11659 emit_insn (gen_strset (destreg, dst,
11660 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11661 emit_label (label);
11662 LABEL_NUSES (label) = 1;
11668 /* Expand strlen. */
11670 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11672 rtx addr, scratch1, scratch2, scratch3, scratch4;
11674 /* The generic case of strlen expander is long. Avoid it's
11675 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11677 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11678 && !TARGET_INLINE_ALL_STRINGOPS
11680 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11683 addr = force_reg (Pmode, XEXP (src, 0));
11684 scratch1 = gen_reg_rtx (Pmode);
11686 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11689 /* Well it seems that some optimizer does not combine a call like
11690 foo(strlen(bar), strlen(bar));
11691 when the move and the subtraction is done here. It does calculate
11692 the length just once when these instructions are done inside of
11693 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11694 often used and I use one fewer register for the lifetime of
11695 output_strlen_unroll() this is better. */
11697 emit_move_insn (out, addr);
11699 ix86_expand_strlensi_unroll_1 (out, src, align);
11701 /* strlensi_unroll_1 returns the address of the zero at the end of
11702 the string, like memchr(), so compute the length by subtracting
11703 the start address. */
11705 emit_insn (gen_subdi3 (out, out, addr));
11707 emit_insn (gen_subsi3 (out, out, addr));
11712 scratch2 = gen_reg_rtx (Pmode);
11713 scratch3 = gen_reg_rtx (Pmode);
11714 scratch4 = force_reg (Pmode, constm1_rtx);
11716 emit_move_insn (scratch3, addr);
11717 eoschar = force_reg (QImode, eoschar);
11719 emit_insn (gen_cld ());
11720 src = replace_equiv_address_nv (src, scratch3);
11722 /* If .md starts supporting :P, this can be done in .md. */
11723 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11724 scratch4), UNSPEC_SCAS);
11725 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11728 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11729 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11733 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11734 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11740 /* Expand the appropriate insns for doing strlen if not just doing
11743 out = result, initialized with the start address
11744 align_rtx = alignment of the address.
11745 scratch = scratch register, initialized with the startaddress when
11746 not aligned, otherwise undefined
11748 This is just the body. It needs the initializations mentioned above and
11749 some address computing at the end. These things are done in i386.md. */
11752 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11756 rtx align_2_label = NULL_RTX;
11757 rtx align_3_label = NULL_RTX;
11758 rtx align_4_label = gen_label_rtx ();
11759 rtx end_0_label = gen_label_rtx ();
11761 rtx tmpreg = gen_reg_rtx (SImode);
11762 rtx scratch = gen_reg_rtx (SImode);
11766 if (GET_CODE (align_rtx) == CONST_INT)
11767 align = INTVAL (align_rtx);
11769 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11771 /* Is there a known alignment and is it less than 4? */
11774 rtx scratch1 = gen_reg_rtx (Pmode);
11775 emit_move_insn (scratch1, out);
11776 /* Is there a known alignment and is it not 2? */
11779 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11780 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11782 /* Leave just the 3 lower bits. */
11783 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11784 NULL_RTX, 0, OPTAB_WIDEN);
11786 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11787 Pmode, 1, align_4_label);
11788 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11789 Pmode, 1, align_2_label);
11790 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11791 Pmode, 1, align_3_label);
11795 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11796 check if is aligned to 4 - byte. */
11798 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11799 NULL_RTX, 0, OPTAB_WIDEN);
11801 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11802 Pmode, 1, align_4_label);
11805 mem = change_address (src, QImode, out);
11807 /* Now compare the bytes. */
11809 /* Compare the first n unaligned byte on a byte per byte basis. */
11810 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11811 QImode, 1, end_0_label);
11813 /* Increment the address. */
11815 emit_insn (gen_adddi3 (out, out, const1_rtx));
11817 emit_insn (gen_addsi3 (out, out, const1_rtx));
11819 /* Not needed with an alignment of 2 */
11822 emit_label (align_2_label);
11824 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11828 emit_insn (gen_adddi3 (out, out, const1_rtx));
11830 emit_insn (gen_addsi3 (out, out, const1_rtx));
11832 emit_label (align_3_label);
11835 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11839 emit_insn (gen_adddi3 (out, out, const1_rtx));
11841 emit_insn (gen_addsi3 (out, out, const1_rtx));
11844 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11845 align this loop. It gives only huge programs, but does not help to
11847 emit_label (align_4_label);
11849 mem = change_address (src, SImode, out);
11850 emit_move_insn (scratch, mem);
11852 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11854 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11856 /* This formula yields a nonzero result iff one of the bytes is zero.
11857 This saves three branches inside loop and many cycles. */
11859 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11860 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11861 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11862 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11863 gen_int_mode (0x80808080, SImode)));
11864 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11869 rtx reg = gen_reg_rtx (SImode);
11870 rtx reg2 = gen_reg_rtx (Pmode);
11871 emit_move_insn (reg, tmpreg);
11872 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11874 /* If zero is not in the first two bytes, move two bytes forward. */
11875 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11876 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11877 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11878 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11879 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11882 /* Emit lea manually to avoid clobbering of flags. */
11883 emit_insn (gen_rtx_SET (SImode, reg2,
11884 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11886 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11887 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11888 emit_insn (gen_rtx_SET (VOIDmode, out,
11889 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11896 rtx end_2_label = gen_label_rtx ();
11897 /* Is zero in the first two bytes? */
11899 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11900 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11901 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11902 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11903 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11905 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11906 JUMP_LABEL (tmp) = end_2_label;
11908 /* Not in the first two. Move two bytes forward. */
11909 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11911 emit_insn (gen_adddi3 (out, out, const2_rtx));
11913 emit_insn (gen_addsi3 (out, out, const2_rtx));
11915 emit_label (end_2_label);
11919 /* Avoid branch in fixing the byte. */
11920 tmpreg = gen_lowpart (QImode, tmpreg);
11921 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11922 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11924 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11926 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11928 emit_label (end_0_label);
11932 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11933 rtx callarg2 ATTRIBUTE_UNUSED,
11934 rtx pop, int sibcall)
11936 rtx use = NULL, call;
11938 if (pop == const0_rtx)
11940 if (TARGET_64BIT && pop)
11944 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11945 fnaddr = machopic_indirect_call_target (fnaddr);
11947 /* Static functions and indirect calls don't need the pic register. */
11948 if (! TARGET_64BIT && flag_pic
11949 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11950 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11951 use_reg (&use, pic_offset_table_rtx);
11953 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11955 rtx al = gen_rtx_REG (QImode, 0);
11956 emit_move_insn (al, callarg2);
11957 use_reg (&use, al);
11959 #endif /* TARGET_MACHO */
11961 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11963 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11964 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11966 if (sibcall && TARGET_64BIT
11967 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11970 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11971 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11972 emit_move_insn (fnaddr, addr);
11973 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11976 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11978 call = gen_rtx_SET (VOIDmode, retval, call);
11981 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11982 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11983 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11986 call = emit_call_insn (call);
11988 CALL_INSN_FUNCTION_USAGE (call) = use;
11992 /* Clear stack slot assignments remembered from previous functions.
11993 This is called from INIT_EXPANDERS once before RTL is emitted for each
11996 static struct machine_function *
11997 ix86_init_machine_status (void)
11999 struct machine_function *f;
12001 f = ggc_alloc_cleared (sizeof (struct machine_function));
12002 f->use_fast_prologue_epilogue_nregs = -1;
12007 /* Return a MEM corresponding to a stack slot with mode MODE.
12008 Allocate a new slot if necessary.
12010 The RTL for a function can have several slots available: N is
12011 which slot to use. */
12014 assign_386_stack_local (enum machine_mode mode, int n)
12016 struct stack_local_entry *s;
12018 if (n < 0 || n >= MAX_386_STACK_LOCALS)
12021 for (s = ix86_stack_locals; s; s = s->next)
12022 if (s->mode == mode && s->n == n)
12025 s = (struct stack_local_entry *)
12026 ggc_alloc (sizeof (struct stack_local_entry));
12029 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12031 s->next = ix86_stack_locals;
12032 ix86_stack_locals = s;
12036 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12038 static GTY(()) rtx ix86_tls_symbol;
12040 ix86_tls_get_addr (void)
12043 if (!ix86_tls_symbol)
12045 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12046 (TARGET_GNU_TLS && !TARGET_64BIT)
12047 ? "___tls_get_addr"
12048 : "__tls_get_addr");
12051 return ix86_tls_symbol;
12054 /* Calculate the length of the memory address in the instruction
12055 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12058 memory_address_length (rtx addr)
12060 struct ix86_address parts;
12061 rtx base, index, disp;
12064 if (GET_CODE (addr) == PRE_DEC
12065 || GET_CODE (addr) == POST_INC
12066 || GET_CODE (addr) == PRE_MODIFY
12067 || GET_CODE (addr) == POST_MODIFY)
12070 if (! ix86_decompose_address (addr, &parts))
12074 index = parts.index;
12079 - esp as the base always wants an index,
12080 - ebp as the base always wants a displacement. */
12082 /* Register Indirect. */
12083 if (base && !index && !disp)
12085 /* esp (for its index) and ebp (for its displacement) need
12086 the two-byte modrm form. */
12087 if (addr == stack_pointer_rtx
12088 || addr == arg_pointer_rtx
12089 || addr == frame_pointer_rtx
12090 || addr == hard_frame_pointer_rtx)
12094 /* Direct Addressing. */
12095 else if (disp && !base && !index)
12100 /* Find the length of the displacement constant. */
12103 if (GET_CODE (disp) == CONST_INT
12104 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12110 /* ebp always wants a displacement. */
12111 else if (base == hard_frame_pointer_rtx)
12114 /* An index requires the two-byte modrm form.... */
12116 /* ...like esp, which always wants an index. */
12117 || base == stack_pointer_rtx
12118 || base == arg_pointer_rtx
12119 || base == frame_pointer_rtx)
12126 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12127 is set, expect that insn have 8bit immediate alternative. */
12129 ix86_attr_length_immediate_default (rtx insn, int shortform)
12133 extract_insn_cached (insn);
12134 for (i = recog_data.n_operands - 1; i >= 0; --i)
12135 if (CONSTANT_P (recog_data.operand[i]))
12140 && GET_CODE (recog_data.operand[i]) == CONST_INT
12141 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12145 switch (get_attr_mode (insn))
12156 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12161 fatal_insn ("unknown insn mode", insn);
12167 /* Compute default value for "length_address" attribute. */
12169 ix86_attr_length_address_default (rtx insn)
12173 if (get_attr_type (insn) == TYPE_LEA)
12175 rtx set = PATTERN (insn);
12176 if (GET_CODE (set) == SET)
12178 else if (GET_CODE (set) == PARALLEL
12179 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12180 set = XVECEXP (set, 0, 0);
12183 #ifdef ENABLE_CHECKING
12189 return memory_address_length (SET_SRC (set));
12192 extract_insn_cached (insn);
12193 for (i = recog_data.n_operands - 1; i >= 0; --i)
12194 if (GET_CODE (recog_data.operand[i]) == MEM)
12196 return memory_address_length (XEXP (recog_data.operand[i], 0));
12202 /* Return the maximum number of instructions a cpu can issue. */
12205 ix86_issue_rate (void)
12209 case PROCESSOR_PENTIUM:
12213 case PROCESSOR_PENTIUMPRO:
12214 case PROCESSOR_PENTIUM4:
12215 case PROCESSOR_ATHLON:
12217 case PROCESSOR_NOCONA:
12225 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12226 by DEP_INSN and nothing set by DEP_INSN. */
12229 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12233 /* Simplify the test for uninteresting insns. */
12234 if (insn_type != TYPE_SETCC
12235 && insn_type != TYPE_ICMOV
12236 && insn_type != TYPE_FCMOV
12237 && insn_type != TYPE_IBR)
12240 if ((set = single_set (dep_insn)) != 0)
12242 set = SET_DEST (set);
12245 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12246 && XVECLEN (PATTERN (dep_insn), 0) == 2
12247 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12248 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12250 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12251 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12256 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12259 /* This test is true if the dependent insn reads the flags but
12260 not any other potentially set register. */
12261 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12264 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12270 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12271 address with operands set by DEP_INSN. */
12274 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12278 if (insn_type == TYPE_LEA
12281 addr = PATTERN (insn);
12282 if (GET_CODE (addr) == SET)
12284 else if (GET_CODE (addr) == PARALLEL
12285 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12286 addr = XVECEXP (addr, 0, 0);
12289 addr = SET_SRC (addr);
12294 extract_insn_cached (insn);
12295 for (i = recog_data.n_operands - 1; i >= 0; --i)
12296 if (GET_CODE (recog_data.operand[i]) == MEM)
12298 addr = XEXP (recog_data.operand[i], 0);
12305 return modified_in_p (addr, dep_insn);
12309 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12311 enum attr_type insn_type, dep_insn_type;
12312 enum attr_memory memory;
12314 int dep_insn_code_number;
12316 /* Anti and output dependencies have zero cost on all CPUs. */
12317 if (REG_NOTE_KIND (link) != 0)
12320 dep_insn_code_number = recog_memoized (dep_insn);
12322 /* If we can't recognize the insns, we can't really do anything. */
12323 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12326 insn_type = get_attr_type (insn);
12327 dep_insn_type = get_attr_type (dep_insn);
12331 case PROCESSOR_PENTIUM:
12332 /* Address Generation Interlock adds a cycle of latency. */
12333 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12336 /* ??? Compares pair with jump/setcc. */
12337 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12340 /* Floating point stores require value to be ready one cycle earlier. */
12341 if (insn_type == TYPE_FMOV
12342 && get_attr_memory (insn) == MEMORY_STORE
12343 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12347 case PROCESSOR_PENTIUMPRO:
12348 memory = get_attr_memory (insn);
12350 /* INT->FP conversion is expensive. */
12351 if (get_attr_fp_int_src (dep_insn))
12354 /* There is one cycle extra latency between an FP op and a store. */
12355 if (insn_type == TYPE_FMOV
12356 && (set = single_set (dep_insn)) != NULL_RTX
12357 && (set2 = single_set (insn)) != NULL_RTX
12358 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12359 && GET_CODE (SET_DEST (set2)) == MEM)
12362 /* Show ability of reorder buffer to hide latency of load by executing
12363 in parallel with previous instruction in case
12364 previous instruction is not needed to compute the address. */
12365 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12366 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12368 /* Claim moves to take one cycle, as core can issue one load
12369 at time and the next load can start cycle later. */
12370 if (dep_insn_type == TYPE_IMOV
12371 || dep_insn_type == TYPE_FMOV)
12379 memory = get_attr_memory (insn);
12381 /* The esp dependency is resolved before the instruction is really
12383 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12384 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12387 /* INT->FP conversion is expensive. */
12388 if (get_attr_fp_int_src (dep_insn))
12391 /* Show ability of reorder buffer to hide latency of load by executing
12392 in parallel with previous instruction in case
12393 previous instruction is not needed to compute the address. */
12394 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12395 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12397 /* Claim moves to take one cycle, as core can issue one load
12398 at time and the next load can start cycle later. */
12399 if (dep_insn_type == TYPE_IMOV
12400 || dep_insn_type == TYPE_FMOV)
12409 case PROCESSOR_ATHLON:
12411 memory = get_attr_memory (insn);
12413 /* Show ability of reorder buffer to hide latency of load by executing
12414 in parallel with previous instruction in case
12415 previous instruction is not needed to compute the address. */
12416 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12417 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12419 enum attr_unit unit = get_attr_unit (insn);
12422 /* Because of the difference between the length of integer and
12423 floating unit pipeline preparation stages, the memory operands
12424 for floating point are cheaper.
12426 ??? For Athlon it the difference is most probably 2. */
12427 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12430 loadcost = TARGET_ATHLON ? 2 : 0;
12432 if (cost >= loadcost)
12445 /* How many alternative schedules to try. This should be as wide as the
12446 scheduling freedom in the DFA, but no wider. Making this value too
12447 large results extra work for the scheduler. */
12450 ia32_multipass_dfa_lookahead (void)
12452 if (ix86_tune == PROCESSOR_PENTIUM)
12455 if (ix86_tune == PROCESSOR_PENTIUMPRO
12456 || ix86_tune == PROCESSOR_K6)
12464 /* Compute the alignment given to a constant that is being placed in memory.
12465 EXP is the constant and ALIGN is the alignment that the object would
12467 The value of this function is used instead of that alignment to align
12471 ix86_constant_alignment (tree exp, int align)
12473 if (TREE_CODE (exp) == REAL_CST)
12475 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12477 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12480 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12481 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12482 return BITS_PER_WORD;
12487 /* Compute the alignment for a static variable.
12488 TYPE is the data type, and ALIGN is the alignment that
12489 the object would ordinarily have. The value of this function is used
12490 instead of that alignment to align the object. */
12493 ix86_data_alignment (tree type, int align)
12495 if (AGGREGATE_TYPE_P (type)
12496 && TYPE_SIZE (type)
12497 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12498 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12499 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12502 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12503 to 16byte boundary. */
12506 if (AGGREGATE_TYPE_P (type)
12507 && TYPE_SIZE (type)
12508 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12509 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12510 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12514 if (TREE_CODE (type) == ARRAY_TYPE)
12516 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12518 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12521 else if (TREE_CODE (type) == COMPLEX_TYPE)
12524 if (TYPE_MODE (type) == DCmode && align < 64)
12526 if (TYPE_MODE (type) == XCmode && align < 128)
12529 else if ((TREE_CODE (type) == RECORD_TYPE
12530 || TREE_CODE (type) == UNION_TYPE
12531 || TREE_CODE (type) == QUAL_UNION_TYPE)
12532 && TYPE_FIELDS (type))
12534 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12536 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12539 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12540 || TREE_CODE (type) == INTEGER_TYPE)
12542 if (TYPE_MODE (type) == DFmode && align < 64)
12544 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12551 /* Compute the alignment for a local variable.
12552 TYPE is the data type, and ALIGN is the alignment that
12553 the object would ordinarily have. The value of this macro is used
12554 instead of that alignment to align the object. */
12557 ix86_local_alignment (tree type, int align)
12559 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12560 to 16byte boundary. */
12563 if (AGGREGATE_TYPE_P (type)
12564 && TYPE_SIZE (type)
12565 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12566 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12567 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12570 if (TREE_CODE (type) == ARRAY_TYPE)
12572 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12574 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12577 else if (TREE_CODE (type) == COMPLEX_TYPE)
12579 if (TYPE_MODE (type) == DCmode && align < 64)
12581 if (TYPE_MODE (type) == XCmode && align < 128)
12584 else if ((TREE_CODE (type) == RECORD_TYPE
12585 || TREE_CODE (type) == UNION_TYPE
12586 || TREE_CODE (type) == QUAL_UNION_TYPE)
12587 && TYPE_FIELDS (type))
12589 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12591 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12594 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12595 || TREE_CODE (type) == INTEGER_TYPE)
12598 if (TYPE_MODE (type) == DFmode && align < 64)
12600 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12606 /* Emit RTL insns to initialize the variable parts of a trampoline.
12607 FNADDR is an RTX for the address of the function's pure code.
12608 CXT is an RTX for the static chain value for the function. */
12610 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12614 /* Compute offset from the end of the jmp to the target function. */
12615 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12616 plus_constant (tramp, 10),
12617 NULL_RTX, 1, OPTAB_DIRECT);
12618 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12619 gen_int_mode (0xb9, QImode));
12620 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12621 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12622 gen_int_mode (0xe9, QImode));
12623 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12628 /* Try to load address using shorter movl instead of movabs.
12629 We may want to support movq for kernel mode, but kernel does not use
12630 trampolines at the moment. */
12631 if (x86_64_zero_extended_value (fnaddr))
12633 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12634 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12635 gen_int_mode (0xbb41, HImode));
12636 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12637 gen_lowpart (SImode, fnaddr));
12642 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12643 gen_int_mode (0xbb49, HImode));
12644 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12648 /* Load static chain using movabs to r10. */
12649 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12650 gen_int_mode (0xba49, HImode));
12651 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12654 /* Jump to the r11 */
12655 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12656 gen_int_mode (0xff49, HImode));
12657 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12658 gen_int_mode (0xe3, QImode));
12660 if (offset > TRAMPOLINE_SIZE)
12664 #ifdef ENABLE_EXECUTE_STACK
12665 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12666 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12670 #define def_builtin(MASK, NAME, TYPE, CODE) \
12672 if ((MASK) & target_flags \
12673 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12674 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12675 NULL, NULL_TREE); \
12678 struct builtin_description
12680 const unsigned int mask;
12681 const enum insn_code icode;
12682 const char *const name;
12683 const enum ix86_builtins code;
12684 const enum rtx_code comparison;
12685 const unsigned int flag;
12688 static const struct builtin_description bdesc_comi[] =
12690 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12691 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12692 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12693 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12694 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12695 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12696 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12697 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12698 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12699 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12700 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12701 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12702 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12704 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12705 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12706 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12707 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12708 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12709 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12710 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12711 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12712 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12713 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12716 static const struct builtin_description bdesc_2arg[] =
12719 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12720 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12721 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12722 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12723 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12724 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12725 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12726 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12728 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12729 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12730 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12731 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12732 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12733 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12734 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12735 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12736 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12737 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12738 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12739 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12740 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12741 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12742 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12743 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12744 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12745 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12746 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12747 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12749 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12750 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12751 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12752 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12754 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12755 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12756 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12757 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12759 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12760 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12761 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12762 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12763 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12766 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12767 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12768 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12770 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12771 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12772 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12773 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12775 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12776 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12777 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12778 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12779 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12780 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12781 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12782 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12784 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12785 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12786 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12788 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12789 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12790 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12793 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12794 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12796 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12797 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12798 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12799 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12800 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12801 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12803 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12804 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12805 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12806 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12808 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12809 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12810 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12811 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12812 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12813 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12816 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12817 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12818 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12820 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12821 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12822 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12824 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12825 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12826 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12827 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12828 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12829 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12831 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12832 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12833 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12834 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12835 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12836 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12838 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12839 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12840 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12841 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12843 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12844 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12849 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12850 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12853 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12857 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12858 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12859 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12860 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12861 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12862 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12863 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12864 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12865 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12866 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12867 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12868 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12869 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12870 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12871 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12872 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12873 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12874 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12875 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12877 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12888 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12901 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12902 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12903 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12904 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12905 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12906 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12907 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12908 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12924 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12925 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12929 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12931 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12963 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12964 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12974 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12979 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12980 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12981 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12982 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12983 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12984 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12987 static const struct builtin_description bdesc_1arg[] =
12989 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12990 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12992 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12993 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12994 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12996 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12997 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12998 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12999 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13000 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13001 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13023 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13024 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13033 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13034 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13035 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13039 ix86_init_builtins (void)
13042 ix86_init_mmx_sse_builtins ();
13045 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13046 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13049 ix86_init_mmx_sse_builtins (void)
13051 const struct builtin_description * d;
13054 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13055 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13056 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13057 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
13058 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13059 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13060 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13061 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13062 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13063 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13065 tree pchar_type_node = build_pointer_type (char_type_node);
13066 tree pcchar_type_node = build_pointer_type (
13067 build_type_variant (char_type_node, 1, 0));
13068 tree pfloat_type_node = build_pointer_type (float_type_node);
13069 tree pcfloat_type_node = build_pointer_type (
13070 build_type_variant (float_type_node, 1, 0));
13071 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13072 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13073 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13076 tree int_ftype_v4sf_v4sf
13077 = build_function_type_list (integer_type_node,
13078 V4SF_type_node, V4SF_type_node, NULL_TREE);
13079 tree v4si_ftype_v4sf_v4sf
13080 = build_function_type_list (V4SI_type_node,
13081 V4SF_type_node, V4SF_type_node, NULL_TREE);
13082 /* MMX/SSE/integer conversions. */
13083 tree int_ftype_v4sf
13084 = build_function_type_list (integer_type_node,
13085 V4SF_type_node, NULL_TREE);
13086 tree int64_ftype_v4sf
13087 = build_function_type_list (long_long_integer_type_node,
13088 V4SF_type_node, NULL_TREE);
13089 tree int_ftype_v8qi
13090 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13091 tree v4sf_ftype_v4sf_int
13092 = build_function_type_list (V4SF_type_node,
13093 V4SF_type_node, integer_type_node, NULL_TREE);
13094 tree v4sf_ftype_v4sf_int64
13095 = build_function_type_list (V4SF_type_node,
13096 V4SF_type_node, long_long_integer_type_node,
13098 tree v4sf_ftype_v4sf_v2si
13099 = build_function_type_list (V4SF_type_node,
13100 V4SF_type_node, V2SI_type_node, NULL_TREE);
13101 tree int_ftype_v4hi_int
13102 = build_function_type_list (integer_type_node,
13103 V4HI_type_node, integer_type_node, NULL_TREE);
13104 tree v4hi_ftype_v4hi_int_int
13105 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13106 integer_type_node, integer_type_node,
13108 /* Miscellaneous. */
13109 tree v8qi_ftype_v4hi_v4hi
13110 = build_function_type_list (V8QI_type_node,
13111 V4HI_type_node, V4HI_type_node, NULL_TREE);
13112 tree v4hi_ftype_v2si_v2si
13113 = build_function_type_list (V4HI_type_node,
13114 V2SI_type_node, V2SI_type_node, NULL_TREE);
13115 tree v4sf_ftype_v4sf_v4sf_int
13116 = build_function_type_list (V4SF_type_node,
13117 V4SF_type_node, V4SF_type_node,
13118 integer_type_node, NULL_TREE);
13119 tree v2si_ftype_v4hi_v4hi
13120 = build_function_type_list (V2SI_type_node,
13121 V4HI_type_node, V4HI_type_node, NULL_TREE);
13122 tree v4hi_ftype_v4hi_int
13123 = build_function_type_list (V4HI_type_node,
13124 V4HI_type_node, integer_type_node, NULL_TREE);
13125 tree v4hi_ftype_v4hi_di
13126 = build_function_type_list (V4HI_type_node,
13127 V4HI_type_node, long_long_unsigned_type_node,
13129 tree v2si_ftype_v2si_di
13130 = build_function_type_list (V2SI_type_node,
13131 V2SI_type_node, long_long_unsigned_type_node,
13133 tree void_ftype_void
13134 = build_function_type (void_type_node, void_list_node);
13135 tree void_ftype_unsigned
13136 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13137 tree void_ftype_unsigned_unsigned
13138 = build_function_type_list (void_type_node, unsigned_type_node,
13139 unsigned_type_node, NULL_TREE);
13140 tree void_ftype_pcvoid_unsigned_unsigned
13141 = build_function_type_list (void_type_node, const_ptr_type_node,
13142 unsigned_type_node, unsigned_type_node,
13144 tree unsigned_ftype_void
13145 = build_function_type (unsigned_type_node, void_list_node);
13147 = build_function_type (long_long_unsigned_type_node, void_list_node);
13148 tree v4sf_ftype_void
13149 = build_function_type (V4SF_type_node, void_list_node);
13150 tree v2si_ftype_v4sf
13151 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13152 /* Loads/stores. */
13153 tree void_ftype_v8qi_v8qi_pchar
13154 = build_function_type_list (void_type_node,
13155 V8QI_type_node, V8QI_type_node,
13156 pchar_type_node, NULL_TREE);
13157 tree v4sf_ftype_pcfloat
13158 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13159 /* @@@ the type is bogus */
13160 tree v4sf_ftype_v4sf_pv2si
13161 = build_function_type_list (V4SF_type_node,
13162 V4SF_type_node, pv2si_type_node, NULL_TREE);
13163 tree void_ftype_pv2si_v4sf
13164 = build_function_type_list (void_type_node,
13165 pv2si_type_node, V4SF_type_node, NULL_TREE);
13166 tree void_ftype_pfloat_v4sf
13167 = build_function_type_list (void_type_node,
13168 pfloat_type_node, V4SF_type_node, NULL_TREE);
13169 tree void_ftype_pdi_di
13170 = build_function_type_list (void_type_node,
13171 pdi_type_node, long_long_unsigned_type_node,
13173 tree void_ftype_pv2di_v2di
13174 = build_function_type_list (void_type_node,
13175 pv2di_type_node, V2DI_type_node, NULL_TREE);
13176 /* Normal vector unops. */
13177 tree v4sf_ftype_v4sf
13178 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13180 /* Normal vector binops. */
13181 tree v4sf_ftype_v4sf_v4sf
13182 = build_function_type_list (V4SF_type_node,
13183 V4SF_type_node, V4SF_type_node, NULL_TREE);
13184 tree v8qi_ftype_v8qi_v8qi
13185 = build_function_type_list (V8QI_type_node,
13186 V8QI_type_node, V8QI_type_node, NULL_TREE);
13187 tree v4hi_ftype_v4hi_v4hi
13188 = build_function_type_list (V4HI_type_node,
13189 V4HI_type_node, V4HI_type_node, NULL_TREE);
13190 tree v2si_ftype_v2si_v2si
13191 = build_function_type_list (V2SI_type_node,
13192 V2SI_type_node, V2SI_type_node, NULL_TREE);
13193 tree di_ftype_di_di
13194 = build_function_type_list (long_long_unsigned_type_node,
13195 long_long_unsigned_type_node,
13196 long_long_unsigned_type_node, NULL_TREE);
13198 tree v2si_ftype_v2sf
13199 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13200 tree v2sf_ftype_v2si
13201 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13202 tree v2si_ftype_v2si
13203 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13204 tree v2sf_ftype_v2sf
13205 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13206 tree v2sf_ftype_v2sf_v2sf
13207 = build_function_type_list (V2SF_type_node,
13208 V2SF_type_node, V2SF_type_node, NULL_TREE);
13209 tree v2si_ftype_v2sf_v2sf
13210 = build_function_type_list (V2SI_type_node,
13211 V2SF_type_node, V2SF_type_node, NULL_TREE);
13212 tree pint_type_node = build_pointer_type (integer_type_node);
13213 tree pcint_type_node = build_pointer_type (
13214 build_type_variant (integer_type_node, 1, 0));
13215 tree pdouble_type_node = build_pointer_type (double_type_node);
13216 tree pcdouble_type_node = build_pointer_type (
13217 build_type_variant (double_type_node, 1, 0));
13218 tree int_ftype_v2df_v2df
13219 = build_function_type_list (integer_type_node,
13220 V2DF_type_node, V2DF_type_node, NULL_TREE);
13223 = build_function_type (intTI_type_node, void_list_node);
13224 tree v2di_ftype_void
13225 = build_function_type (V2DI_type_node, void_list_node);
13226 tree ti_ftype_ti_ti
13227 = build_function_type_list (intTI_type_node,
13228 intTI_type_node, intTI_type_node, NULL_TREE);
13229 tree void_ftype_pcvoid
13230 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13232 = build_function_type_list (V2DI_type_node,
13233 long_long_unsigned_type_node, NULL_TREE);
13235 = build_function_type_list (long_long_unsigned_type_node,
13236 V2DI_type_node, NULL_TREE);
13237 tree v4sf_ftype_v4si
13238 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13239 tree v4si_ftype_v4sf
13240 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13241 tree v2df_ftype_v4si
13242 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13243 tree v4si_ftype_v2df
13244 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13245 tree v2si_ftype_v2df
13246 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13247 tree v4sf_ftype_v2df
13248 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13249 tree v2df_ftype_v2si
13250 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13251 tree v2df_ftype_v4sf
13252 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13253 tree int_ftype_v2df
13254 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13255 tree int64_ftype_v2df
13256 = build_function_type_list (long_long_integer_type_node,
13257 V2DF_type_node, NULL_TREE);
13258 tree v2df_ftype_v2df_int
13259 = build_function_type_list (V2DF_type_node,
13260 V2DF_type_node, integer_type_node, NULL_TREE);
13261 tree v2df_ftype_v2df_int64
13262 = build_function_type_list (V2DF_type_node,
13263 V2DF_type_node, long_long_integer_type_node,
13265 tree v4sf_ftype_v4sf_v2df
13266 = build_function_type_list (V4SF_type_node,
13267 V4SF_type_node, V2DF_type_node, NULL_TREE);
13268 tree v2df_ftype_v2df_v4sf
13269 = build_function_type_list (V2DF_type_node,
13270 V2DF_type_node, V4SF_type_node, NULL_TREE);
13271 tree v2df_ftype_v2df_v2df_int
13272 = build_function_type_list (V2DF_type_node,
13273 V2DF_type_node, V2DF_type_node,
13276 tree v2df_ftype_v2df_pv2si
13277 = build_function_type_list (V2DF_type_node,
13278 V2DF_type_node, pv2si_type_node, NULL_TREE);
13279 tree void_ftype_pv2si_v2df
13280 = build_function_type_list (void_type_node,
13281 pv2si_type_node, V2DF_type_node, NULL_TREE);
13282 tree void_ftype_pdouble_v2df
13283 = build_function_type_list (void_type_node,
13284 pdouble_type_node, V2DF_type_node, NULL_TREE);
13285 tree void_ftype_pint_int
13286 = build_function_type_list (void_type_node,
13287 pint_type_node, integer_type_node, NULL_TREE);
13288 tree void_ftype_v16qi_v16qi_pchar
13289 = build_function_type_list (void_type_node,
13290 V16QI_type_node, V16QI_type_node,
13291 pchar_type_node, NULL_TREE);
13292 tree v2df_ftype_pcdouble
13293 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13294 tree v2df_ftype_v2df_v2df
13295 = build_function_type_list (V2DF_type_node,
13296 V2DF_type_node, V2DF_type_node, NULL_TREE);
13297 tree v16qi_ftype_v16qi_v16qi
13298 = build_function_type_list (V16QI_type_node,
13299 V16QI_type_node, V16QI_type_node, NULL_TREE);
13300 tree v8hi_ftype_v8hi_v8hi
13301 = build_function_type_list (V8HI_type_node,
13302 V8HI_type_node, V8HI_type_node, NULL_TREE);
13303 tree v4si_ftype_v4si_v4si
13304 = build_function_type_list (V4SI_type_node,
13305 V4SI_type_node, V4SI_type_node, NULL_TREE);
13306 tree v2di_ftype_v2di_v2di
13307 = build_function_type_list (V2DI_type_node,
13308 V2DI_type_node, V2DI_type_node, NULL_TREE);
13309 tree v2di_ftype_v2df_v2df
13310 = build_function_type_list (V2DI_type_node,
13311 V2DF_type_node, V2DF_type_node, NULL_TREE);
13312 tree v2df_ftype_v2df
13313 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13314 tree v2df_ftype_double
13315 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13316 tree v2df_ftype_double_double
13317 = build_function_type_list (V2DF_type_node,
13318 double_type_node, double_type_node, NULL_TREE);
13319 tree int_ftype_v8hi_int
13320 = build_function_type_list (integer_type_node,
13321 V8HI_type_node, integer_type_node, NULL_TREE);
13322 tree v8hi_ftype_v8hi_int_int
13323 = build_function_type_list (V8HI_type_node,
13324 V8HI_type_node, integer_type_node,
13325 integer_type_node, NULL_TREE);
13326 tree v2di_ftype_v2di_int
13327 = build_function_type_list (V2DI_type_node,
13328 V2DI_type_node, integer_type_node, NULL_TREE);
13329 tree v4si_ftype_v4si_int
13330 = build_function_type_list (V4SI_type_node,
13331 V4SI_type_node, integer_type_node, NULL_TREE);
13332 tree v8hi_ftype_v8hi_int
13333 = build_function_type_list (V8HI_type_node,
13334 V8HI_type_node, integer_type_node, NULL_TREE);
13335 tree v8hi_ftype_v8hi_v2di
13336 = build_function_type_list (V8HI_type_node,
13337 V8HI_type_node, V2DI_type_node, NULL_TREE);
13338 tree v4si_ftype_v4si_v2di
13339 = build_function_type_list (V4SI_type_node,
13340 V4SI_type_node, V2DI_type_node, NULL_TREE);
13341 tree v4si_ftype_v8hi_v8hi
13342 = build_function_type_list (V4SI_type_node,
13343 V8HI_type_node, V8HI_type_node, NULL_TREE);
13344 tree di_ftype_v8qi_v8qi
13345 = build_function_type_list (long_long_unsigned_type_node,
13346 V8QI_type_node, V8QI_type_node, NULL_TREE);
13347 tree di_ftype_v2si_v2si
13348 = build_function_type_list (long_long_unsigned_type_node,
13349 V2SI_type_node, V2SI_type_node, NULL_TREE);
13350 tree v2di_ftype_v16qi_v16qi
13351 = build_function_type_list (V2DI_type_node,
13352 V16QI_type_node, V16QI_type_node, NULL_TREE);
13353 tree v2di_ftype_v4si_v4si
13354 = build_function_type_list (V2DI_type_node,
13355 V4SI_type_node, V4SI_type_node, NULL_TREE);
13356 tree int_ftype_v16qi
13357 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13358 tree v16qi_ftype_pcchar
13359 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13360 tree void_ftype_pchar_v16qi
13361 = build_function_type_list (void_type_node,
13362 pchar_type_node, V16QI_type_node, NULL_TREE);
13363 tree v4si_ftype_pcint
13364 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13365 tree void_ftype_pcint_v4si
13366 = build_function_type_list (void_type_node,
13367 pcint_type_node, V4SI_type_node, NULL_TREE);
13368 tree v2di_ftype_v2di
13369 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13372 tree float128_type;
13374 /* The __float80 type. */
13375 if (TYPE_MODE (long_double_type_node) == XFmode)
13376 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13380 /* The __float80 type. */
13381 float80_type = make_node (REAL_TYPE);
13382 TYPE_PRECISION (float80_type) = 96;
13383 layout_type (float80_type);
13384 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13387 float128_type = make_node (REAL_TYPE);
13388 TYPE_PRECISION (float128_type) = 128;
13389 layout_type (float128_type);
13390 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13392 /* Add all builtins that are more or less simple operations on two
13394 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13396 /* Use one of the operands; the target can have a different mode for
13397 mask-generating compares. */
13398 enum machine_mode mode;
13403 mode = insn_data[d->icode].operand[1].mode;
13408 type = v16qi_ftype_v16qi_v16qi;
13411 type = v8hi_ftype_v8hi_v8hi;
13414 type = v4si_ftype_v4si_v4si;
13417 type = v2di_ftype_v2di_v2di;
13420 type = v2df_ftype_v2df_v2df;
13423 type = ti_ftype_ti_ti;
13426 type = v4sf_ftype_v4sf_v4sf;
13429 type = v8qi_ftype_v8qi_v8qi;
13432 type = v4hi_ftype_v4hi_v4hi;
13435 type = v2si_ftype_v2si_v2si;
13438 type = di_ftype_di_di;
13445 /* Override for comparisons. */
13446 if (d->icode == CODE_FOR_maskcmpv4sf3
13447 || d->icode == CODE_FOR_maskncmpv4sf3
13448 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13449 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13450 type = v4si_ftype_v4sf_v4sf;
13452 if (d->icode == CODE_FOR_maskcmpv2df3
13453 || d->icode == CODE_FOR_maskncmpv2df3
13454 || d->icode == CODE_FOR_vmmaskcmpv2df3
13455 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13456 type = v2di_ftype_v2df_v2df;
13458 def_builtin (d->mask, d->name, type, d->code);
13461 /* Add the remaining MMX insns with somewhat more complicated types. */
13462 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13463 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13464 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13465 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13466 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13468 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13469 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13470 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13472 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13473 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13475 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13476 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13478 /* comi/ucomi insns. */
13479 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13480 if (d->mask == MASK_SSE2)
13481 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13483 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13485 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13486 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13487 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13489 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13490 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13491 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13492 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13493 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13494 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13495 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13496 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13497 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13498 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13499 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13501 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13502 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13504 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13506 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13507 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13508 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13509 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13510 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13511 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13513 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13514 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13515 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13516 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13518 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13519 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13520 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13521 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13523 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13525 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13527 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13528 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13529 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13530 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13531 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13532 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13534 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13536 /* Original 3DNow! */
13537 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13538 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13539 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13540 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13541 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13542 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13543 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13544 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13545 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13546 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13547 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13548 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13549 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13550 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13551 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13552 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13553 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13554 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13555 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13556 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13558 /* 3DNow! extension as used in the Athlon CPU. */
13559 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13560 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13561 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13562 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13563 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13564 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13566 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13610 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13615 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13617 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13618 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13625 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13642 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13644 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13645 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13647 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13649 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13680 /* Prescott New Instructions. */
13681 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13682 void_ftype_pcvoid_unsigned_unsigned,
13683 IX86_BUILTIN_MONITOR);
13684 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13685 void_ftype_unsigned_unsigned,
13686 IX86_BUILTIN_MWAIT);
13687 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13689 IX86_BUILTIN_MOVSHDUP);
13690 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13692 IX86_BUILTIN_MOVSLDUP);
13693 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13694 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13695 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13696 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13697 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13698 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13701 /* Errors in the source file can cause expand_expr to return const0_rtx
13702 where we expect a vector. To avoid crashing, use one of the vector
13703 clear instructions. */
13705 safe_vector_operand (rtx x, enum machine_mode mode)
13707 if (x != const0_rtx)
13709 x = gen_reg_rtx (mode);
13711 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13712 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13713 : gen_rtx_SUBREG (DImode, x, 0)));
13715 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13716 : gen_rtx_SUBREG (V4SFmode, x, 0),
13717 CONST0_RTX (V4SFmode)));
13721 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13724 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13727 tree arg0 = TREE_VALUE (arglist);
13728 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13729 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13730 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13731 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13732 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13733 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13735 if (VECTOR_MODE_P (mode0))
13736 op0 = safe_vector_operand (op0, mode0);
13737 if (VECTOR_MODE_P (mode1))
13738 op1 = safe_vector_operand (op1, mode1);
13741 || GET_MODE (target) != tmode
13742 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13743 target = gen_reg_rtx (tmode);
13745 if (GET_MODE (op1) == SImode && mode1 == TImode)
13747 rtx x = gen_reg_rtx (V4SImode);
13748 emit_insn (gen_sse2_loadd (x, op1));
13749 op1 = gen_lowpart (TImode, x);
13752 /* In case the insn wants input operands in modes different from
13753 the result, abort. */
13754 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13755 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13758 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13759 op0 = copy_to_mode_reg (mode0, op0);
13760 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13761 op1 = copy_to_mode_reg (mode1, op1);
13763 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13764 yet one of the two must not be a memory. This is normally enforced
13765 by expanders, but we didn't bother to create one here. */
13766 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13767 op0 = copy_to_mode_reg (mode0, op0);
13769 pat = GEN_FCN (icode) (target, op0, op1);
13776 /* Subroutine of ix86_expand_builtin to take care of stores. */
13779 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13782 tree arg0 = TREE_VALUE (arglist);
13783 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13784 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13785 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13786 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13787 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13789 if (VECTOR_MODE_P (mode1))
13790 op1 = safe_vector_operand (op1, mode1);
13792 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13793 op1 = copy_to_mode_reg (mode1, op1);
13795 pat = GEN_FCN (icode) (op0, op1);
13801 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13804 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13805 rtx target, int do_load)
13808 tree arg0 = TREE_VALUE (arglist);
13809 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13810 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13811 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13814 || GET_MODE (target) != tmode
13815 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13816 target = gen_reg_rtx (tmode);
13818 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13821 if (VECTOR_MODE_P (mode0))
13822 op0 = safe_vector_operand (op0, mode0);
13824 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13825 op0 = copy_to_mode_reg (mode0, op0);
13828 pat = GEN_FCN (icode) (target, op0);
13835 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13836 sqrtss, rsqrtss, rcpss. */
13839 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13842 tree arg0 = TREE_VALUE (arglist);
13843 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13844 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13845 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13848 || GET_MODE (target) != tmode
13849 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13850 target = gen_reg_rtx (tmode);
13852 if (VECTOR_MODE_P (mode0))
13853 op0 = safe_vector_operand (op0, mode0);
13855 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13856 op0 = copy_to_mode_reg (mode0, op0);
13859 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13860 op1 = copy_to_mode_reg (mode0, op1);
13862 pat = GEN_FCN (icode) (target, op0, op1);
13869 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13872 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13876 tree arg0 = TREE_VALUE (arglist);
13877 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13878 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13879 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13881 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13882 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13883 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13884 enum rtx_code comparison = d->comparison;
13886 if (VECTOR_MODE_P (mode0))
13887 op0 = safe_vector_operand (op0, mode0);
13888 if (VECTOR_MODE_P (mode1))
13889 op1 = safe_vector_operand (op1, mode1);
13891 /* Swap operands if we have a comparison that isn't available in
13895 rtx tmp = gen_reg_rtx (mode1);
13896 emit_move_insn (tmp, op1);
13902 || GET_MODE (target) != tmode
13903 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13904 target = gen_reg_rtx (tmode);
13906 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13907 op0 = copy_to_mode_reg (mode0, op0);
13908 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13909 op1 = copy_to_mode_reg (mode1, op1);
13911 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13912 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13919 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13922 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13926 tree arg0 = TREE_VALUE (arglist);
13927 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13928 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13929 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13931 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13932 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13933 enum rtx_code comparison = d->comparison;
13935 if (VECTOR_MODE_P (mode0))
13936 op0 = safe_vector_operand (op0, mode0);
13937 if (VECTOR_MODE_P (mode1))
13938 op1 = safe_vector_operand (op1, mode1);
13940 /* Swap operands if we have a comparison that isn't available in
13949 target = gen_reg_rtx (SImode);
13950 emit_move_insn (target, const0_rtx);
13951 target = gen_rtx_SUBREG (QImode, target, 0);
13953 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13954 op0 = copy_to_mode_reg (mode0, op0);
13955 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13956 op1 = copy_to_mode_reg (mode1, op1);
13958 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13959 pat = GEN_FCN (d->icode) (op0, op1);
13963 emit_insn (gen_rtx_SET (VOIDmode,
13964 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13965 gen_rtx_fmt_ee (comparison, QImode,
13969 return SUBREG_REG (target);
13972 /* Expand an expression EXP that calls a built-in function,
13973 with result going to TARGET if that's convenient
13974 (and in mode MODE if that's convenient).
13975 SUBTARGET may be used as the target for computing one of EXP's operands.
13976 IGNORE is nonzero if the value is to be ignored. */
13979 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13980 enum machine_mode mode ATTRIBUTE_UNUSED,
13981 int ignore ATTRIBUTE_UNUSED)
13983 const struct builtin_description *d;
13985 enum insn_code icode;
13986 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13987 tree arglist = TREE_OPERAND (exp, 1);
13988 tree arg0, arg1, arg2;
13989 rtx op0, op1, op2, pat;
13990 enum machine_mode tmode, mode0, mode1, mode2;
13991 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13995 case IX86_BUILTIN_EMMS:
13996 emit_insn (gen_emms ());
13999 case IX86_BUILTIN_SFENCE:
14000 emit_insn (gen_sfence ());
14003 case IX86_BUILTIN_PEXTRW:
14004 case IX86_BUILTIN_PEXTRW128:
14005 icode = (fcode == IX86_BUILTIN_PEXTRW
14006 ? CODE_FOR_mmx_pextrw
14007 : CODE_FOR_sse2_pextrw);
14008 arg0 = TREE_VALUE (arglist);
14009 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14010 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14011 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14012 tmode = insn_data[icode].operand[0].mode;
14013 mode0 = insn_data[icode].operand[1].mode;
14014 mode1 = insn_data[icode].operand[2].mode;
14016 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14017 op0 = copy_to_mode_reg (mode0, op0);
14018 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14020 error ("selector must be an integer constant in the range 0..%i",
14021 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14022 return gen_reg_rtx (tmode);
14025 || GET_MODE (target) != tmode
14026 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14027 target = gen_reg_rtx (tmode);
14028 pat = GEN_FCN (icode) (target, op0, op1);
14034 case IX86_BUILTIN_PINSRW:
14035 case IX86_BUILTIN_PINSRW128:
14036 icode = (fcode == IX86_BUILTIN_PINSRW
14037 ? CODE_FOR_mmx_pinsrw
14038 : CODE_FOR_sse2_pinsrw);
14039 arg0 = TREE_VALUE (arglist);
14040 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14041 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14042 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14043 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14044 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14045 tmode = insn_data[icode].operand[0].mode;
14046 mode0 = insn_data[icode].operand[1].mode;
14047 mode1 = insn_data[icode].operand[2].mode;
14048 mode2 = insn_data[icode].operand[3].mode;
14050 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14051 op0 = copy_to_mode_reg (mode0, op0);
14052 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14053 op1 = copy_to_mode_reg (mode1, op1);
14054 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14056 error ("selector must be an integer constant in the range 0..%i",
14057 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14061 || GET_MODE (target) != tmode
14062 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14063 target = gen_reg_rtx (tmode);
14064 pat = GEN_FCN (icode) (target, op0, op1, op2);
14070 case IX86_BUILTIN_MASKMOVQ:
14071 case IX86_BUILTIN_MASKMOVDQU:
14072 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14073 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14074 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14075 : CODE_FOR_sse2_maskmovdqu));
14076 /* Note the arg order is different from the operand order. */
14077 arg1 = TREE_VALUE (arglist);
14078 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14079 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14080 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14081 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14082 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14083 mode0 = insn_data[icode].operand[0].mode;
14084 mode1 = insn_data[icode].operand[1].mode;
14085 mode2 = insn_data[icode].operand[2].mode;
14087 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14088 op0 = copy_to_mode_reg (mode0, op0);
14089 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14090 op1 = copy_to_mode_reg (mode1, op1);
14091 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14092 op2 = copy_to_mode_reg (mode2, op2);
14093 pat = GEN_FCN (icode) (op0, op1, op2);
14099 case IX86_BUILTIN_SQRTSS:
14100 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14101 case IX86_BUILTIN_RSQRTSS:
14102 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14103 case IX86_BUILTIN_RCPSS:
14104 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14106 case IX86_BUILTIN_LOADAPS:
14107 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14109 case IX86_BUILTIN_LOADUPS:
14110 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14112 case IX86_BUILTIN_STOREAPS:
14113 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14115 case IX86_BUILTIN_STOREUPS:
14116 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14118 case IX86_BUILTIN_LOADSS:
14119 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14121 case IX86_BUILTIN_STORESS:
14122 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14124 case IX86_BUILTIN_LOADHPS:
14125 case IX86_BUILTIN_LOADLPS:
14126 case IX86_BUILTIN_LOADHPD:
14127 case IX86_BUILTIN_LOADLPD:
14128 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14129 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14130 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14131 : CODE_FOR_sse2_movsd);
14132 arg0 = TREE_VALUE (arglist);
14133 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14134 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14135 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14136 tmode = insn_data[icode].operand[0].mode;
14137 mode0 = insn_data[icode].operand[1].mode;
14138 mode1 = insn_data[icode].operand[2].mode;
14140 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14141 op0 = copy_to_mode_reg (mode0, op0);
14142 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14144 || GET_MODE (target) != tmode
14145 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14146 target = gen_reg_rtx (tmode);
14147 pat = GEN_FCN (icode) (target, op0, op1);
14153 case IX86_BUILTIN_STOREHPS:
14154 case IX86_BUILTIN_STORELPS:
14155 case IX86_BUILTIN_STOREHPD:
14156 case IX86_BUILTIN_STORELPD:
14157 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14158 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14159 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14160 : CODE_FOR_sse2_movsd);
14161 arg0 = TREE_VALUE (arglist);
14162 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14163 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14164 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14165 mode0 = insn_data[icode].operand[1].mode;
14166 mode1 = insn_data[icode].operand[2].mode;
14168 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14169 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14170 op1 = copy_to_mode_reg (mode1, op1);
14172 pat = GEN_FCN (icode) (op0, op0, op1);
14178 case IX86_BUILTIN_MOVNTPS:
14179 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14180 case IX86_BUILTIN_MOVNTQ:
14181 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14183 case IX86_BUILTIN_LDMXCSR:
14184 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14185 target = assign_386_stack_local (SImode, 0);
14186 emit_move_insn (target, op0);
14187 emit_insn (gen_ldmxcsr (target));
14190 case IX86_BUILTIN_STMXCSR:
14191 target = assign_386_stack_local (SImode, 0);
14192 emit_insn (gen_stmxcsr (target));
14193 return copy_to_mode_reg (SImode, target);
14195 case IX86_BUILTIN_SHUFPS:
14196 case IX86_BUILTIN_SHUFPD:
14197 icode = (fcode == IX86_BUILTIN_SHUFPS
14198 ? CODE_FOR_sse_shufps
14199 : CODE_FOR_sse2_shufpd);
14200 arg0 = TREE_VALUE (arglist);
14201 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14202 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14203 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14204 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14205 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14206 tmode = insn_data[icode].operand[0].mode;
14207 mode0 = insn_data[icode].operand[1].mode;
14208 mode1 = insn_data[icode].operand[2].mode;
14209 mode2 = insn_data[icode].operand[3].mode;
14211 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14212 op0 = copy_to_mode_reg (mode0, op0);
14213 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14214 op1 = copy_to_mode_reg (mode1, op1);
14215 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14217 /* @@@ better error message */
14218 error ("mask must be an immediate");
14219 return gen_reg_rtx (tmode);
14222 || GET_MODE (target) != tmode
14223 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14224 target = gen_reg_rtx (tmode);
14225 pat = GEN_FCN (icode) (target, op0, op1, op2);
14231 case IX86_BUILTIN_PSHUFW:
14232 case IX86_BUILTIN_PSHUFD:
14233 case IX86_BUILTIN_PSHUFHW:
14234 case IX86_BUILTIN_PSHUFLW:
14235 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14236 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14237 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14238 : CODE_FOR_mmx_pshufw);
14239 arg0 = TREE_VALUE (arglist);
14240 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14241 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14242 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14243 tmode = insn_data[icode].operand[0].mode;
14244 mode1 = insn_data[icode].operand[1].mode;
14245 mode2 = insn_data[icode].operand[2].mode;
14247 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14248 op0 = copy_to_mode_reg (mode1, op0);
14249 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14251 /* @@@ better error message */
14252 error ("mask must be an immediate");
14256 || GET_MODE (target) != tmode
14257 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14258 target = gen_reg_rtx (tmode);
14259 pat = GEN_FCN (icode) (target, op0, op1);
14265 case IX86_BUILTIN_PSLLDQI128:
14266 case IX86_BUILTIN_PSRLDQI128:
14267 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14268 : CODE_FOR_sse2_lshrti3);
14269 arg0 = TREE_VALUE (arglist);
14270 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14271 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14272 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14273 tmode = insn_data[icode].operand[0].mode;
14274 mode1 = insn_data[icode].operand[1].mode;
14275 mode2 = insn_data[icode].operand[2].mode;
14277 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14279 op0 = copy_to_reg (op0);
14280 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14282 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14284 error ("shift must be an immediate");
14287 target = gen_reg_rtx (V2DImode);
14288 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14294 case IX86_BUILTIN_FEMMS:
14295 emit_insn (gen_femms ());
14298 case IX86_BUILTIN_PAVGUSB:
14299 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14301 case IX86_BUILTIN_PF2ID:
14302 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14304 case IX86_BUILTIN_PFACC:
14305 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14307 case IX86_BUILTIN_PFADD:
14308 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14310 case IX86_BUILTIN_PFCMPEQ:
14311 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14313 case IX86_BUILTIN_PFCMPGE:
14314 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14316 case IX86_BUILTIN_PFCMPGT:
14317 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14319 case IX86_BUILTIN_PFMAX:
14320 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14322 case IX86_BUILTIN_PFMIN:
14323 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14325 case IX86_BUILTIN_PFMUL:
14326 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14328 case IX86_BUILTIN_PFRCP:
14329 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14331 case IX86_BUILTIN_PFRCPIT1:
14332 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14334 case IX86_BUILTIN_PFRCPIT2:
14335 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14337 case IX86_BUILTIN_PFRSQIT1:
14338 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14340 case IX86_BUILTIN_PFRSQRT:
14341 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14343 case IX86_BUILTIN_PFSUB:
14344 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14346 case IX86_BUILTIN_PFSUBR:
14347 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14349 case IX86_BUILTIN_PI2FD:
14350 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14352 case IX86_BUILTIN_PMULHRW:
14353 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14355 case IX86_BUILTIN_PF2IW:
14356 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14358 case IX86_BUILTIN_PFNACC:
14359 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14361 case IX86_BUILTIN_PFPNACC:
14362 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14364 case IX86_BUILTIN_PI2FW:
14365 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14367 case IX86_BUILTIN_PSWAPDSI:
14368 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14370 case IX86_BUILTIN_PSWAPDSF:
14371 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14373 case IX86_BUILTIN_SSE_ZERO:
14374 target = gen_reg_rtx (V4SFmode);
14375 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14378 case IX86_BUILTIN_MMX_ZERO:
14379 target = gen_reg_rtx (DImode);
14380 emit_insn (gen_mmx_clrdi (target));
14383 case IX86_BUILTIN_CLRTI:
14384 target = gen_reg_rtx (V2DImode);
14385 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14389 case IX86_BUILTIN_SQRTSD:
14390 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14391 case IX86_BUILTIN_LOADAPD:
14392 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14393 case IX86_BUILTIN_LOADUPD:
14394 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14396 case IX86_BUILTIN_STOREAPD:
14397 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14398 case IX86_BUILTIN_STOREUPD:
14399 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14401 case IX86_BUILTIN_LOADSD:
14402 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14404 case IX86_BUILTIN_STORESD:
14405 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14407 case IX86_BUILTIN_SETPD1:
14408 target = assign_386_stack_local (DFmode, 0);
14409 arg0 = TREE_VALUE (arglist);
14410 emit_move_insn (adjust_address (target, DFmode, 0),
14411 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14412 op0 = gen_reg_rtx (V2DFmode);
14413 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14414 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14417 case IX86_BUILTIN_SETPD:
14418 target = assign_386_stack_local (V2DFmode, 0);
14419 arg0 = TREE_VALUE (arglist);
14420 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14421 emit_move_insn (adjust_address (target, DFmode, 0),
14422 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14423 emit_move_insn (adjust_address (target, DFmode, 8),
14424 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14425 op0 = gen_reg_rtx (V2DFmode);
14426 emit_insn (gen_sse2_movapd (op0, target));
14429 case IX86_BUILTIN_LOADRPD:
14430 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14431 gen_reg_rtx (V2DFmode), 1);
14432 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14435 case IX86_BUILTIN_LOADPD1:
14436 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14437 gen_reg_rtx (V2DFmode), 1);
14438 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14441 case IX86_BUILTIN_STOREPD1:
14442 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14443 case IX86_BUILTIN_STORERPD:
14444 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14446 case IX86_BUILTIN_CLRPD:
14447 target = gen_reg_rtx (V2DFmode);
14448 emit_insn (gen_sse_clrv2df (target));
14451 case IX86_BUILTIN_MFENCE:
14452 emit_insn (gen_sse2_mfence ());
14454 case IX86_BUILTIN_LFENCE:
14455 emit_insn (gen_sse2_lfence ());
14458 case IX86_BUILTIN_CLFLUSH:
14459 arg0 = TREE_VALUE (arglist);
14460 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14461 icode = CODE_FOR_sse2_clflush;
14462 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14463 op0 = copy_to_mode_reg (Pmode, op0);
14465 emit_insn (gen_sse2_clflush (op0));
14468 case IX86_BUILTIN_MOVNTPD:
14469 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14470 case IX86_BUILTIN_MOVNTDQ:
14471 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14472 case IX86_BUILTIN_MOVNTI:
14473 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14475 case IX86_BUILTIN_LOADDQA:
14476 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14477 case IX86_BUILTIN_LOADDQU:
14478 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14479 case IX86_BUILTIN_LOADD:
14480 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14482 case IX86_BUILTIN_STOREDQA:
14483 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14484 case IX86_BUILTIN_STOREDQU:
14485 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14486 case IX86_BUILTIN_STORED:
14487 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14489 case IX86_BUILTIN_MONITOR:
14490 arg0 = TREE_VALUE (arglist);
14491 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14492 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14493 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14494 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14495 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14497 op0 = copy_to_mode_reg (SImode, op0);
14499 op1 = copy_to_mode_reg (SImode, op1);
14501 op2 = copy_to_mode_reg (SImode, op2);
14502 emit_insn (gen_monitor (op0, op1, op2));
14505 case IX86_BUILTIN_MWAIT:
14506 arg0 = TREE_VALUE (arglist);
14507 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14508 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14509 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14511 op0 = copy_to_mode_reg (SImode, op0);
14513 op1 = copy_to_mode_reg (SImode, op1);
14514 emit_insn (gen_mwait (op0, op1));
14517 case IX86_BUILTIN_LOADDDUP:
14518 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14520 case IX86_BUILTIN_LDDQU:
14521 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14528 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14529 if (d->code == fcode)
14531 /* Compares are treated specially. */
14532 if (d->icode == CODE_FOR_maskcmpv4sf3
14533 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14534 || d->icode == CODE_FOR_maskncmpv4sf3
14535 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14536 || d->icode == CODE_FOR_maskcmpv2df3
14537 || d->icode == CODE_FOR_vmmaskcmpv2df3
14538 || d->icode == CODE_FOR_maskncmpv2df3
14539 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14540 return ix86_expand_sse_compare (d, arglist, target);
14542 return ix86_expand_binop_builtin (d->icode, arglist, target);
14545 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14546 if (d->code == fcode)
14547 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14549 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14550 if (d->code == fcode)
14551 return ix86_expand_sse_comi (d, arglist, target);
14553 /* @@@ Should really do something sensible here. */
14557 /* Store OPERAND to the memory after reload is completed. This means
14558 that we can't easily use assign_stack_local. */
14560 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14563 if (!reload_completed)
14565 if (TARGET_RED_ZONE)
14567 result = gen_rtx_MEM (mode,
14568 gen_rtx_PLUS (Pmode,
14570 GEN_INT (-RED_ZONE_SIZE)));
14571 emit_move_insn (result, operand);
14573 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14579 operand = gen_lowpart (DImode, operand);
14583 gen_rtx_SET (VOIDmode,
14584 gen_rtx_MEM (DImode,
14585 gen_rtx_PRE_DEC (DImode,
14586 stack_pointer_rtx)),
14592 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14601 split_di (&operand, 1, operands, operands + 1);
14603 gen_rtx_SET (VOIDmode,
14604 gen_rtx_MEM (SImode,
14605 gen_rtx_PRE_DEC (Pmode,
14606 stack_pointer_rtx)),
14609 gen_rtx_SET (VOIDmode,
14610 gen_rtx_MEM (SImode,
14611 gen_rtx_PRE_DEC (Pmode,
14612 stack_pointer_rtx)),
14617 /* It is better to store HImodes as SImodes. */
14618 if (!TARGET_PARTIAL_REG_STALL)
14619 operand = gen_lowpart (SImode, operand);
14623 gen_rtx_SET (VOIDmode,
14624 gen_rtx_MEM (GET_MODE (operand),
14625 gen_rtx_PRE_DEC (SImode,
14626 stack_pointer_rtx)),
14632 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14637 /* Free operand from the memory. */
14639 ix86_free_from_memory (enum machine_mode mode)
14641 if (!TARGET_RED_ZONE)
14645 if (mode == DImode || TARGET_64BIT)
14647 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14651 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14652 to pop or add instruction if registers are available. */
14653 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14654 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14659 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14660 QImode must go into class Q_REGS.
14661 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14662 movdf to do mem-to-mem moves through integer regs. */
14664 ix86_preferred_reload_class (rtx x, enum reg_class class)
14666 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14668 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14670 /* SSE can't load any constant directly yet. */
14671 if (SSE_CLASS_P (class))
14673 /* Floats can load 0 and 1. */
14674 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14676 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14677 if (MAYBE_SSE_CLASS_P (class))
14678 return (reg_class_subset_p (class, GENERAL_REGS)
14679 ? GENERAL_REGS : FLOAT_REGS);
14683 /* General regs can load everything. */
14684 if (reg_class_subset_p (class, GENERAL_REGS))
14685 return GENERAL_REGS;
14686 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14687 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14690 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14692 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14697 /* If we are copying between general and FP registers, we need a memory
14698 location. The same is true for SSE and MMX registers.
14700 The macro can't work reliably when one of the CLASSES is class containing
14701 registers from multiple units (SSE, MMX, integer). We avoid this by never
14702 combining those units in single alternative in the machine description.
14703 Ensure that this constraint holds to avoid unexpected surprises.
14705 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14706 enforce these sanity checks. */
14708 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14709 enum machine_mode mode, int strict)
14711 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14712 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14713 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14714 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14715 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14716 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14723 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14724 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14725 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14726 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14727 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14729 /* Return the cost of moving data from a register in class CLASS1 to
14730 one in class CLASS2.
14732 It is not required that the cost always equal 2 when FROM is the same as TO;
14733 on some machines it is expensive to move between registers if they are not
14734 general registers. */
14736 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14737 enum reg_class class2)
14739 /* In case we require secondary memory, compute cost of the store followed
14740 by load. In order to avoid bad register allocation choices, we need
14741 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14743 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14747 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14748 MEMORY_MOVE_COST (mode, class1, 1));
14749 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14750 MEMORY_MOVE_COST (mode, class2, 1));
14752 /* In case of copying from general_purpose_register we may emit multiple
14753 stores followed by single load causing memory size mismatch stall.
14754 Count this as arbitrarily high cost of 20. */
14755 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14758 /* In the case of FP/MMX moves, the registers actually overlap, and we
14759 have to switch modes in order to treat them differently. */
14760 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14761 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14767 /* Moves between SSE/MMX and integer unit are expensive. */
14768 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14769 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14770 return ix86_cost->mmxsse_to_integer;
14771 if (MAYBE_FLOAT_CLASS_P (class1))
14772 return ix86_cost->fp_move;
14773 if (MAYBE_SSE_CLASS_P (class1))
14774 return ix86_cost->sse_move;
14775 if (MAYBE_MMX_CLASS_P (class1))
14776 return ix86_cost->mmx_move;
14780 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14782 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14784 /* Flags and only flags can only hold CCmode values. */
14785 if (CC_REGNO_P (regno))
14786 return GET_MODE_CLASS (mode) == MODE_CC;
14787 if (GET_MODE_CLASS (mode) == MODE_CC
14788 || GET_MODE_CLASS (mode) == MODE_RANDOM
14789 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14791 if (FP_REGNO_P (regno))
14792 return VALID_FP_MODE_P (mode);
14793 if (SSE_REGNO_P (regno))
14794 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14795 if (MMX_REGNO_P (regno))
14797 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14798 /* We handle both integer and floats in the general purpose registers.
14799 In future we should be able to handle vector modes as well. */
14800 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14802 /* Take care for QImode values - they can be in non-QI regs, but then
14803 they do cause partial register stalls. */
14804 if (regno < 4 || mode != QImode || TARGET_64BIT)
14806 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14809 /* Return the cost of moving data of mode M between a
14810 register and memory. A value of 2 is the default; this cost is
14811 relative to those in `REGISTER_MOVE_COST'.
14813 If moving between registers and memory is more expensive than
14814 between two registers, you should define this macro to express the
14817 Model also increased moving costs of QImode registers in non
14821 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14823 if (FLOAT_CLASS_P (class))
14840 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14842 if (SSE_CLASS_P (class))
14845 switch (GET_MODE_SIZE (mode))
14859 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14861 if (MMX_CLASS_P (class))
14864 switch (GET_MODE_SIZE (mode))
14875 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14877 switch (GET_MODE_SIZE (mode))
14881 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14882 : ix86_cost->movzbl_load);
14884 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14885 : ix86_cost->int_store[0] + 4);
14888 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14890 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14891 if (mode == TFmode)
14893 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14894 * (((int) GET_MODE_SIZE (mode)
14895 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14899 /* Compute a (partial) cost for rtx X. Return true if the complete
14900 cost has been computed, and false if subexpressions should be
14901 scanned. In either case, *TOTAL contains the cost result. */
14904 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14906 enum machine_mode mode = GET_MODE (x);
14914 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14916 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14918 else if (flag_pic && SYMBOLIC_CONST (x)
14920 || (!GET_CODE (x) != LABEL_REF
14921 && (GET_CODE (x) != SYMBOL_REF
14922 || !SYMBOL_REF_LOCAL_P (x)))))
14929 if (mode == VOIDmode)
14932 switch (standard_80387_constant_p (x))
14937 default: /* Other constants */
14942 /* Start with (MEM (SYMBOL_REF)), since that's where
14943 it'll probably end up. Add a penalty for size. */
14944 *total = (COSTS_N_INSNS (1)
14945 + (flag_pic != 0 && !TARGET_64BIT)
14946 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14952 /* The zero extensions is often completely free on x86_64, so make
14953 it as cheap as possible. */
14954 if (TARGET_64BIT && mode == DImode
14955 && GET_MODE (XEXP (x, 0)) == SImode)
14957 else if (TARGET_ZERO_EXTEND_WITH_AND)
14958 *total = COSTS_N_INSNS (ix86_cost->add);
14960 *total = COSTS_N_INSNS (ix86_cost->movzx);
14964 *total = COSTS_N_INSNS (ix86_cost->movsx);
14968 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14969 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14971 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14974 *total = COSTS_N_INSNS (ix86_cost->add);
14977 if ((value == 2 || value == 3)
14978 && ix86_cost->lea <= ix86_cost->shift_const)
14980 *total = COSTS_N_INSNS (ix86_cost->lea);
14990 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14992 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14994 if (INTVAL (XEXP (x, 1)) > 32)
14995 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14997 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15001 if (GET_CODE (XEXP (x, 1)) == AND)
15002 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15004 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15009 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15010 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15012 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15017 if (FLOAT_MODE_P (mode))
15019 *total = COSTS_N_INSNS (ix86_cost->fmul);
15024 rtx op0 = XEXP (x, 0);
15025 rtx op1 = XEXP (x, 1);
15027 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15029 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15030 for (nbits = 0; value != 0; value &= value - 1)
15034 /* This is arbitrary. */
15037 /* Compute costs correctly for widening multiplication. */
15038 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15039 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15040 == GET_MODE_SIZE (mode))
15042 int is_mulwiden = 0;
15043 enum machine_mode inner_mode = GET_MODE (op0);
15045 if (GET_CODE (op0) == GET_CODE (op1))
15046 is_mulwiden = 1, op1 = XEXP (op1, 0);
15047 else if (GET_CODE (op1) == CONST_INT)
15049 if (GET_CODE (op0) == SIGN_EXTEND)
15050 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15053 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15057 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15060 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15061 + nbits * ix86_cost->mult_bit)
15062 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15071 if (FLOAT_MODE_P (mode))
15072 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15074 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15078 if (FLOAT_MODE_P (mode))
15079 *total = COSTS_N_INSNS (ix86_cost->fadd);
15080 else if (GET_MODE_CLASS (mode) == MODE_INT
15081 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15083 if (GET_CODE (XEXP (x, 0)) == PLUS
15084 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15085 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15086 && CONSTANT_P (XEXP (x, 1)))
15088 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15089 if (val == 2 || val == 4 || val == 8)
15091 *total = COSTS_N_INSNS (ix86_cost->lea);
15092 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15093 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15095 *total += rtx_cost (XEXP (x, 1), outer_code);
15099 else if (GET_CODE (XEXP (x, 0)) == MULT
15100 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15102 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15103 if (val == 2 || val == 4 || val == 8)
15105 *total = COSTS_N_INSNS (ix86_cost->lea);
15106 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15107 *total += rtx_cost (XEXP (x, 1), outer_code);
15111 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15113 *total = COSTS_N_INSNS (ix86_cost->lea);
15114 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15115 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15116 *total += rtx_cost (XEXP (x, 1), outer_code);
15123 if (FLOAT_MODE_P (mode))
15125 *total = COSTS_N_INSNS (ix86_cost->fadd);
15133 if (!TARGET_64BIT && mode == DImode)
15135 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15136 + (rtx_cost (XEXP (x, 0), outer_code)
15137 << (GET_MODE (XEXP (x, 0)) != DImode))
15138 + (rtx_cost (XEXP (x, 1), outer_code)
15139 << (GET_MODE (XEXP (x, 1)) != DImode)));
15145 if (FLOAT_MODE_P (mode))
15147 *total = COSTS_N_INSNS (ix86_cost->fchs);
15153 if (!TARGET_64BIT && mode == DImode)
15154 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15156 *total = COSTS_N_INSNS (ix86_cost->add);
15160 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15165 if (FLOAT_MODE_P (mode))
15166 *total = COSTS_N_INSNS (ix86_cost->fabs);
15170 if (FLOAT_MODE_P (mode))
15171 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15175 if (XINT (x, 1) == UNSPEC_TP)
15184 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15186 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15189 fputs ("\tpushl $", asm_out_file);
15190 assemble_name (asm_out_file, XSTR (symbol, 0));
15191 fputc ('\n', asm_out_file);
15197 static int current_machopic_label_num;
15199 /* Given a symbol name and its associated stub, write out the
15200 definition of the stub. */
15203 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15205 unsigned int length;
15206 char *binder_name, *symbol_name, lazy_ptr_name[32];
15207 int label = ++current_machopic_label_num;
15209 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15210 symb = (*targetm.strip_name_encoding) (symb);
15212 length = strlen (stub);
15213 binder_name = alloca (length + 32);
15214 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15216 length = strlen (symb);
15217 symbol_name = alloca (length + 32);
15218 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15220 sprintf (lazy_ptr_name, "L%d$lz", label);
15223 machopic_picsymbol_stub_section ();
15225 machopic_symbol_stub_section ();
15227 fprintf (file, "%s:\n", stub);
15228 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15232 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15233 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15234 fprintf (file, "\tjmp %%edx\n");
15237 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15239 fprintf (file, "%s:\n", binder_name);
15243 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15244 fprintf (file, "\tpushl %%eax\n");
15247 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15249 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15251 machopic_lazy_symbol_ptr_section ();
15252 fprintf (file, "%s:\n", lazy_ptr_name);
15253 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15254 fprintf (file, "\t.long %s\n", binder_name);
15256 #endif /* TARGET_MACHO */
15258 /* Order the registers for register allocator. */
15261 x86_order_regs_for_local_alloc (void)
15266 /* First allocate the local general purpose registers. */
15267 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15268 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15269 reg_alloc_order [pos++] = i;
15271 /* Global general purpose registers. */
15272 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15273 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15274 reg_alloc_order [pos++] = i;
15276 /* x87 registers come first in case we are doing FP math
15278 if (!TARGET_SSE_MATH)
15279 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15280 reg_alloc_order [pos++] = i;
15282 /* SSE registers. */
15283 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15284 reg_alloc_order [pos++] = i;
15285 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15286 reg_alloc_order [pos++] = i;
15288 /* x87 registers. */
15289 if (TARGET_SSE_MATH)
15290 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15291 reg_alloc_order [pos++] = i;
15293 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15294 reg_alloc_order [pos++] = i;
15296 /* Initialize the rest of array as we do not allocate some registers
15298 while (pos < FIRST_PSEUDO_REGISTER)
15299 reg_alloc_order [pos++] = 0;
15302 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15303 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15306 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15307 struct attribute_spec.handler. */
15309 ix86_handle_struct_attribute (tree *node, tree name,
15310 tree args ATTRIBUTE_UNUSED,
15311 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15314 if (DECL_P (*node))
15316 if (TREE_CODE (*node) == TYPE_DECL)
15317 type = &TREE_TYPE (*node);
15322 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15323 || TREE_CODE (*type) == UNION_TYPE)))
15325 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15326 *no_add_attrs = true;
15329 else if ((is_attribute_p ("ms_struct", name)
15330 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15331 || ((is_attribute_p ("gcc_struct", name)
15332 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15334 warning ("`%s' incompatible attribute ignored",
15335 IDENTIFIER_POINTER (name));
15336 *no_add_attrs = true;
15343 ix86_ms_bitfield_layout_p (tree record_type)
15345 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15346 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15347 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15350 /* Returns an expression indicating where the this parameter is
15351 located on entry to the FUNCTION. */
15354 x86_this_parameter (tree function)
15356 tree type = TREE_TYPE (function);
15360 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15361 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15364 if (ix86_function_regparm (type, function) > 0)
15368 parm = TYPE_ARG_TYPES (type);
15369 /* Figure out whether or not the function has a variable number of
15371 for (; parm; parm = TREE_CHAIN (parm))
15372 if (TREE_VALUE (parm) == void_type_node)
15374 /* If not, the this parameter is in the first argument. */
15378 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15380 return gen_rtx_REG (SImode, regno);
15384 if (aggregate_value_p (TREE_TYPE (type), type))
15385 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15387 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15390 /* Determine whether x86_output_mi_thunk can succeed. */
15393 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15394 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15395 HOST_WIDE_INT vcall_offset, tree function)
15397 /* 64-bit can handle anything. */
15401 /* For 32-bit, everything's fine if we have one free register. */
15402 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15405 /* Need a free register for vcall_offset. */
15409 /* Need a free register for GOT references. */
15410 if (flag_pic && !(*targetm.binds_local_p) (function))
15413 /* Otherwise ok. */
15417 /* Output the assembler code for a thunk function. THUNK_DECL is the
15418 declaration for the thunk function itself, FUNCTION is the decl for
15419 the target function. DELTA is an immediate constant offset to be
15420 added to THIS. If VCALL_OFFSET is nonzero, the word at
15421 *(*this + vcall_offset) should be added to THIS. */
15424 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15425 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15426 HOST_WIDE_INT vcall_offset, tree function)
15429 rtx this = x86_this_parameter (function);
15432 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15433 pull it in now and let DELTA benefit. */
15436 else if (vcall_offset)
15438 /* Put the this parameter into %eax. */
15440 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15441 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15444 this_reg = NULL_RTX;
15446 /* Adjust the this parameter by a fixed constant. */
15449 xops[0] = GEN_INT (delta);
15450 xops[1] = this_reg ? this_reg : this;
15453 if (!x86_64_general_operand (xops[0], DImode))
15455 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15457 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15461 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15464 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15467 /* Adjust the this parameter by a value stored in the vtable. */
15471 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15474 int tmp_regno = 2 /* ECX */;
15475 if (lookup_attribute ("fastcall",
15476 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15477 tmp_regno = 0 /* EAX */;
15478 tmp = gen_rtx_REG (SImode, tmp_regno);
15481 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15484 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15486 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15488 /* Adjust the this parameter. */
15489 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15490 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15492 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15493 xops[0] = GEN_INT (vcall_offset);
15495 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15496 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15498 xops[1] = this_reg;
15500 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15502 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15505 /* If necessary, drop THIS back to its stack slot. */
15506 if (this_reg && this_reg != this)
15508 xops[0] = this_reg;
15510 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15513 xops[0] = XEXP (DECL_RTL (function), 0);
15516 if (!flag_pic || (*targetm.binds_local_p) (function))
15517 output_asm_insn ("jmp\t%P0", xops);
15520 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15521 tmp = gen_rtx_CONST (Pmode, tmp);
15522 tmp = gen_rtx_MEM (QImode, tmp);
15524 output_asm_insn ("jmp\t%A0", xops);
15529 if (!flag_pic || (*targetm.binds_local_p) (function))
15530 output_asm_insn ("jmp\t%P0", xops);
15535 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15536 tmp = (gen_rtx_SYMBOL_REF
15538 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15539 tmp = gen_rtx_MEM (QImode, tmp);
15541 output_asm_insn ("jmp\t%0", xops);
15544 #endif /* TARGET_MACHO */
15546 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15547 output_set_got (tmp);
15550 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15551 output_asm_insn ("jmp\t{*}%1", xops);
15557 x86_file_start (void)
15559 default_file_start ();
15560 if (X86_FILE_START_VERSION_DIRECTIVE)
15561 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15562 if (X86_FILE_START_FLTUSED)
15563 fputs ("\t.global\t__fltused\n", asm_out_file);
15564 if (ix86_asm_dialect == ASM_INTEL)
15565 fputs ("\t.intel_syntax\n", asm_out_file);
15569 x86_field_alignment (tree field, int computed)
15571 enum machine_mode mode;
15572 tree type = TREE_TYPE (field);
15574 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15576 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15577 ? get_inner_array_type (type) : type);
15578 if (mode == DFmode || mode == DCmode
15579 || GET_MODE_CLASS (mode) == MODE_INT
15580 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15581 return MIN (32, computed);
15585 /* Output assembler code to FILE to increment profiler label # LABELNO
15586 for profiling a function entry. */
15588 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15593 #ifndef NO_PROFILE_COUNTERS
15594 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15596 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15600 #ifndef NO_PROFILE_COUNTERS
15601 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15603 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15607 #ifndef NO_PROFILE_COUNTERS
15608 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15609 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15611 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15615 #ifndef NO_PROFILE_COUNTERS
15616 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15617 PROFILE_COUNT_REGISTER);
15619 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15623 /* We don't have exact information about the insn sizes, but we may assume
15624 quite safely that we are informed about all 1 byte insns and memory
15625 address sizes. This is enough to eliminate unnecessary padding in
15629 min_insn_size (rtx insn)
15633 if (!INSN_P (insn) || !active_insn_p (insn))
15636 /* Discard alignments we've emit and jump instructions. */
15637 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15638 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15640 if (GET_CODE (insn) == JUMP_INSN
15641 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15642 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15645 /* Important case - calls are always 5 bytes.
15646 It is common to have many calls in the row. */
15647 if (GET_CODE (insn) == CALL_INSN
15648 && symbolic_reference_mentioned_p (PATTERN (insn))
15649 && !SIBLING_CALL_P (insn))
15651 if (get_attr_length (insn) <= 1)
15654 /* For normal instructions we may rely on the sizes of addresses
15655 and the presence of symbol to require 4 bytes of encoding.
15656 This is not the case for jumps where references are PC relative. */
15657 if (GET_CODE (insn) != JUMP_INSN)
15659 l = get_attr_length_address (insn);
15660 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15669 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15673 ix86_avoid_jump_misspredicts (void)
15675 rtx insn, start = get_insns ();
15676 int nbytes = 0, njumps = 0;
15679 /* Look for all minimal intervals of instructions containing 4 jumps.
15680 The intervals are bounded by START and INSN. NBYTES is the total
15681 size of instructions in the interval including INSN and not including
15682 START. When the NBYTES is smaller than 16 bytes, it is possible
15683 that the end of START and INSN ends up in the same 16byte page.
15685 The smallest offset in the page INSN can start is the case where START
15686 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15687 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15689 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15692 nbytes += min_insn_size (insn);
15694 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15695 INSN_UID (insn), min_insn_size (insn));
15696 if ((GET_CODE (insn) == JUMP_INSN
15697 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15698 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15699 || GET_CODE (insn) == CALL_INSN)
15706 start = NEXT_INSN (start);
15707 if ((GET_CODE (start) == JUMP_INSN
15708 && GET_CODE (PATTERN (start)) != ADDR_VEC
15709 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15710 || GET_CODE (start) == CALL_INSN)
15711 njumps--, isjump = 1;
15714 nbytes -= min_insn_size (start);
15719 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15720 INSN_UID (start), INSN_UID (insn), nbytes);
15722 if (njumps == 3 && isjump && nbytes < 16)
15724 int padsize = 15 - nbytes + min_insn_size (insn);
15727 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15728 INSN_UID (insn), padsize);
15729 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15734 /* AMD Athlon works faster
15735 when RET is not destination of conditional jump or directly preceded
15736 by other jump instruction. We avoid the penalty by inserting NOP just
15737 before the RET instructions in such cases. */
15739 ix86_pad_returns (void)
15743 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15745 basic_block bb = e->src;
15746 rtx ret = BB_END (bb);
15748 bool replace = false;
15750 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15751 || !maybe_hot_bb_p (bb))
15753 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15754 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15756 if (prev && GET_CODE (prev) == CODE_LABEL)
15759 for (e = bb->pred; e; e = e->pred_next)
15760 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15761 && !(e->flags & EDGE_FALLTHRU))
15766 prev = prev_active_insn (ret);
15768 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15769 || GET_CODE (prev) == CALL_INSN))
15771 /* Empty functions get branch mispredict even when the jump destination
15772 is not visible to us. */
15773 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15778 emit_insn_before (gen_return_internal_long (), ret);
15784 /* Implement machine specific optimizations. We implement padding of returns
15785 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15789 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15790 ix86_pad_returns ();
15791 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15792 ix86_avoid_jump_misspredicts ();
15795 /* Return nonzero when QImode register that must be represented via REX prefix
15798 x86_extended_QIreg_mentioned_p (rtx insn)
15801 extract_insn_cached (insn);
15802 for (i = 0; i < recog_data.n_operands; i++)
15803 if (REG_P (recog_data.operand[i])
15804 && REGNO (recog_data.operand[i]) >= 4)
15809 /* Return nonzero when P points to register encoded via REX prefix.
15810 Called via for_each_rtx. */
15812 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15814 unsigned int regno;
15817 regno = REGNO (*p);
15818 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15821 /* Return true when INSN mentions register that must be encoded using REX
15824 x86_extended_reg_mentioned_p (rtx insn)
15826 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15829 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15830 optabs would emit if we didn't have TFmode patterns. */
15833 x86_emit_floatuns (rtx operands[2])
15835 rtx neglab, donelab, i0, i1, f0, in, out;
15836 enum machine_mode mode, inmode;
15838 inmode = GET_MODE (operands[1]);
15839 if (inmode != SImode
15840 && inmode != DImode)
15844 in = force_reg (inmode, operands[1]);
15845 mode = GET_MODE (out);
15846 neglab = gen_label_rtx ();
15847 donelab = gen_label_rtx ();
15848 i1 = gen_reg_rtx (Pmode);
15849 f0 = gen_reg_rtx (mode);
15851 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15853 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15854 emit_jump_insn (gen_jump (donelab));
15857 emit_label (neglab);
15859 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15860 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15861 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15862 expand_float (f0, i0, 0);
15863 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15865 emit_label (donelab);
15868 /* Initialize vector TARGET via VALS. */
15870 ix86_expand_vector_init (rtx target, rtx vals)
15872 enum machine_mode mode = GET_MODE (target);
15873 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15874 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15877 for (i = n_elts - 1; i >= 0; i--)
15878 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15879 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15882 /* Few special cases first...
15883 ... constants are best loaded from constant pool. */
15886 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15890 /* ... values where only first field is non-constant are best loaded
15891 from the pool and overwritten via move later. */
15894 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15895 GET_MODE_INNER (mode), 0);
15897 op = force_reg (mode, op);
15898 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15899 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15900 switch (GET_MODE (target))
15903 emit_insn (gen_sse2_movsd (target, target, op));
15906 emit_insn (gen_sse_movss (target, target, op));
15914 /* And the busy sequence doing rotations. */
15915 switch (GET_MODE (target))
15920 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15922 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15924 vecop0 = force_reg (V2DFmode, vecop0);
15925 vecop1 = force_reg (V2DFmode, vecop1);
15926 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15932 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15934 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15936 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15938 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15939 rtx tmp1 = gen_reg_rtx (V4SFmode);
15940 rtx tmp2 = gen_reg_rtx (V4SFmode);
15942 vecop0 = force_reg (V4SFmode, vecop0);
15943 vecop1 = force_reg (V4SFmode, vecop1);
15944 vecop2 = force_reg (V4SFmode, vecop2);
15945 vecop3 = force_reg (V4SFmode, vecop3);
15946 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15947 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15948 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15956 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15958 We do this in the new i386 backend to maintain source compatibility
15959 with the old cc0-based compiler. */
15962 ix86_md_asm_clobbers (tree clobbers)
15964 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15966 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15968 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15973 /* Worker function for REVERSE_CONDITION. */
15976 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15978 return (mode != CCFPmode && mode != CCFPUmode
15979 ? reverse_condition (code)
15980 : reverse_condition_maybe_unordered (code));
15983 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15987 output_387_reg_move (rtx insn, rtx *operands)
15989 if (REG_P (operands[1])
15990 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15992 if (REGNO (operands[0]) == FIRST_STACK_REG
15993 && TARGET_USE_FFREEP)
15994 return "ffreep\t%y0";
15995 return "fstp\t%y0";
15997 if (STACK_TOP_P (operands[0]))
15998 return "fld%z1\t%y1";
16002 /* Output code to perform a conditional jump to LABEL, if C2 flag in
16003 FP status register is set. */
16006 ix86_emit_fp_unordered_jump (rtx label)
16008 rtx reg = gen_reg_rtx (HImode);
16011 emit_insn (gen_x86_fnstsw_1 (reg));
16013 if (TARGET_USE_SAHF)
16015 emit_insn (gen_x86_sahf_1 (reg));
16017 temp = gen_rtx_REG (CCmode, FLAGS_REG);
16018 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16022 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16024 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16025 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16028 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16029 gen_rtx_LABEL_REF (VOIDmode, label),
16031 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16032 emit_jump_insn (temp);
16035 /* Output code to perform a log1p XFmode calculation. */
16037 void ix86_emit_i387_log1p (rtx op0, rtx op1)
16039 rtx label1 = gen_label_rtx ();
16040 rtx label2 = gen_label_rtx ();
16042 rtx tmp = gen_reg_rtx (XFmode);
16043 rtx tmp2 = gen_reg_rtx (XFmode);
16045 emit_insn (gen_absxf2 (tmp, op1));
16046 emit_insn (gen_cmpxf (tmp,
16047 CONST_DOUBLE_FROM_REAL_VALUE (
16048 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16050 emit_jump_insn (gen_bge (label1));
16052 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16053 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16054 emit_jump (label2);
16056 emit_label (label1);
16057 emit_move_insn (tmp, CONST1_RTX (XFmode));
16058 emit_insn (gen_addxf3 (tmp, op1, tmp));
16059 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16060 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16062 emit_label (label2);
16065 #include "gt-i386.h"