1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
594 AREG, DREG, CREG, BREG,
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
749 > to_allocate <- FRAME_POINTER
761 int outgoing_arguments_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
780 enum cmodel ix86_cmodel;
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix[16];
833 static int internal_label_prefix_len;
835 static int local_symbolic_operand (rtx, enum machine_mode);
836 static int tls_symbolic_operand_1 (rtx, enum tls_model);
837 static void output_pic_addr_const (FILE *, rtx, int);
838 static void put_condition_code (enum rtx_code, enum machine_mode,
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx *, void *);
842 static rtx maybe_get_pool_constant (rtx);
843 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
849 static rtx get_thread_pointer (int);
850 static rtx legitimize_tls_address (rtx, enum tls_model, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx gen_push (rtx);
853 static int memory_address_length (rtx addr);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_use_dfa_pipeline_interface (void);
870 static int ia32_multipass_dfa_lookahead (void);
871 static void ix86_init_mmx_sse_builtins (void);
872 static rtx x86_this_parameter (tree);
873 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
874 HOST_WIDE_INT, tree);
875 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
876 static void x86_file_start (void);
877 static void ix86_reorg (void);
878 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
879 static tree ix86_build_builtin_va_list (void);
880 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
882 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
886 rtx base, index, disp;
888 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
891 static int ix86_decompose_address (rtx, struct ix86_address *);
892 static int ix86_address_cost (rtx);
893 static bool ix86_cannot_force_const_mem (rtx);
894 static rtx ix86_delegitimize_address (rtx);
896 struct builtin_description;
897 static rtx ix86_expand_sse_comi (const struct builtin_description *,
899 static rtx ix86_expand_sse_compare (const struct builtin_description *,
901 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
902 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
903 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
904 static rtx ix86_expand_store_builtin (enum insn_code, tree);
905 static rtx safe_vector_operand (rtx, enum machine_mode);
906 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
907 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
908 enum rtx_code *, enum rtx_code *);
909 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
910 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
911 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
912 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
913 static int ix86_fp_comparison_cost (enum rtx_code code);
914 static unsigned int ix86_select_alt_pic_regnum (void);
915 static int ix86_save_reg (unsigned int, int);
916 static void ix86_compute_frame_layout (struct ix86_frame *);
917 static int ix86_comp_type_attributes (tree, tree);
918 static int ix86_function_regparm (tree, tree);
919 const struct attribute_spec ix86_attribute_table[];
920 static bool ix86_function_ok_for_sibcall (tree, tree);
921 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
922 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
923 static int ix86_value_regno (enum machine_mode);
924 static bool contains_128bit_aligned_vector_p (tree);
925 static rtx ix86_struct_value_rtx (tree, int);
926 static bool ix86_ms_bitfield_layout_p (tree);
927 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
928 static int extended_reg_mentioned_1 (rtx *, void *);
929 static bool ix86_rtx_costs (rtx, int, int, int *);
930 static int min_insn_size (rtx);
931 static tree ix86_md_asm_clobbers (tree clobbers);
933 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
934 static void ix86_svr3_asm_out_constructor (rtx, int);
937 /* Register class used for passing given 64bit part of the argument.
938 These represent classes as documented by the PS ABI, with the exception
939 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
940 use SF or DFmode move instead of DImode to avoid reformatting penalties.
942 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
943 whenever possible (upper half does contain padding).
945 enum x86_64_reg_class
948 X86_64_INTEGER_CLASS,
949 X86_64_INTEGERSI_CLASS,
958 static const char * const x86_64_reg_class_name[] =
959 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
961 #define MAX_CLASSES 4
962 static int classify_argument (enum machine_mode, tree,
963 enum x86_64_reg_class [MAX_CLASSES], int);
964 static int examine_argument (enum machine_mode, tree, int, int *, int *);
965 static rtx construct_container (enum machine_mode, tree, int, int, int,
967 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
968 enum x86_64_reg_class);
970 /* Table of constants used by fldpi, fldln2, etc.... */
971 static REAL_VALUE_TYPE ext_80387_constants_table [5];
972 static bool ext_80387_constants_init = 0;
973 static void init_ext_80387_constants (void);
975 /* Initialize the GCC target structure. */
976 #undef TARGET_ATTRIBUTE_TABLE
977 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
978 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
979 # undef TARGET_MERGE_DECL_ATTRIBUTES
980 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
983 #undef TARGET_COMP_TYPE_ATTRIBUTES
984 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
986 #undef TARGET_INIT_BUILTINS
987 #define TARGET_INIT_BUILTINS ix86_init_builtins
989 #undef TARGET_EXPAND_BUILTIN
990 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
992 #undef TARGET_ASM_FUNCTION_EPILOGUE
993 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
995 #undef TARGET_ASM_OPEN_PAREN
996 #define TARGET_ASM_OPEN_PAREN ""
997 #undef TARGET_ASM_CLOSE_PAREN
998 #define TARGET_ASM_CLOSE_PAREN ""
1000 #undef TARGET_ASM_ALIGNED_HI_OP
1001 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1002 #undef TARGET_ASM_ALIGNED_SI_OP
1003 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1005 #undef TARGET_ASM_ALIGNED_DI_OP
1006 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1009 #undef TARGET_ASM_UNALIGNED_HI_OP
1010 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1011 #undef TARGET_ASM_UNALIGNED_SI_OP
1012 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1013 #undef TARGET_ASM_UNALIGNED_DI_OP
1014 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1016 #undef TARGET_SCHED_ADJUST_COST
1017 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1018 #undef TARGET_SCHED_ISSUE_RATE
1019 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1020 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
1021 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
1022 ia32_use_dfa_pipeline_interface
1023 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1024 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1025 ia32_multipass_dfa_lookahead
1027 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1028 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1031 #undef TARGET_HAVE_TLS
1032 #define TARGET_HAVE_TLS true
1034 #undef TARGET_CANNOT_FORCE_CONST_MEM
1035 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1037 #undef TARGET_DELEGITIMIZE_ADDRESS
1038 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1040 #undef TARGET_MS_BITFIELD_LAYOUT_P
1041 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1043 #undef TARGET_ASM_OUTPUT_MI_THUNK
1044 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1045 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1046 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1048 #undef TARGET_ASM_FILE_START
1049 #define TARGET_ASM_FILE_START x86_file_start
1051 #undef TARGET_RTX_COSTS
1052 #define TARGET_RTX_COSTS ix86_rtx_costs
1053 #undef TARGET_ADDRESS_COST
1054 #define TARGET_ADDRESS_COST ix86_address_cost
1056 #undef TARGET_FIXED_CONDITION_CODE_REGS
1057 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1058 #undef TARGET_CC_MODES_COMPATIBLE
1059 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1061 #undef TARGET_MACHINE_DEPENDENT_REORG
1062 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1064 #undef TARGET_BUILD_BUILTIN_VA_LIST
1065 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1067 #undef TARGET_MD_ASM_CLOBBERS
1068 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1070 #undef TARGET_PROMOTE_PROTOTYPES
1071 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1072 #undef TARGET_STRUCT_VALUE_RTX
1073 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1074 #undef TARGET_SETUP_INCOMING_VARARGS
1075 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1077 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1078 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1080 struct gcc_target targetm = TARGET_INITIALIZER;
1083 /* The svr4 ABI for the i386 says that records and unions are returned
1085 #ifndef DEFAULT_PCC_STRUCT_RETURN
1086 #define DEFAULT_PCC_STRUCT_RETURN 1
1089 /* Sometimes certain combinations of command options do not make
1090 sense on a particular target machine. You can define a macro
1091 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1092 defined, is executed once just after all the command options have
1095 Don't use this macro to turn on various extra optimizations for
1096 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1099 override_options (void)
1102 /* Comes from final.c -- no real reason to change it. */
1103 #define MAX_CODE_ALIGN 16
1107 const struct processor_costs *cost; /* Processor costs */
1108 const int target_enable; /* Target flags to enable. */
1109 const int target_disable; /* Target flags to disable. */
1110 const int align_loop; /* Default alignments. */
1111 const int align_loop_max_skip;
1112 const int align_jump;
1113 const int align_jump_max_skip;
1114 const int align_func;
1116 const processor_target_table[PROCESSOR_max] =
1118 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1119 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1120 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1121 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1122 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1123 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1124 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1125 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1126 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1129 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1132 const char *const name; /* processor name or nickname. */
1133 const enum processor_type processor;
1134 const enum pta_flags
1140 PTA_PREFETCH_SSE = 16,
1146 const processor_alias_table[] =
1148 {"i386", PROCESSOR_I386, 0},
1149 {"i486", PROCESSOR_I486, 0},
1150 {"i586", PROCESSOR_PENTIUM, 0},
1151 {"pentium", PROCESSOR_PENTIUM, 0},
1152 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1153 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1154 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1155 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1156 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1157 {"i686", PROCESSOR_PENTIUMPRO, 0},
1158 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1159 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1160 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1161 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1162 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1163 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1164 | PTA_MMX | PTA_PREFETCH_SSE},
1165 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1166 | PTA_MMX | PTA_PREFETCH_SSE},
1167 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1170 | PTA_MMX | PTA_PREFETCH_SSE},
1171 {"k6", PROCESSOR_K6, PTA_MMX},
1172 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1173 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1174 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1176 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1177 | PTA_3DNOW | PTA_3DNOW_A},
1178 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1179 | PTA_3DNOW_A | PTA_SSE},
1180 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1181 | PTA_3DNOW_A | PTA_SSE},
1182 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1183 | PTA_3DNOW_A | PTA_SSE},
1184 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1185 | PTA_SSE | PTA_SSE2 },
1186 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1187 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1188 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1189 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1190 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1192 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1196 int const pta_size = ARRAY_SIZE (processor_alias_table);
1198 /* Set the default values for switches whose default depends on TARGET_64BIT
1199 in case they weren't overwritten by command line options. */
1202 if (flag_omit_frame_pointer == 2)
1203 flag_omit_frame_pointer = 1;
1204 if (flag_asynchronous_unwind_tables == 2)
1205 flag_asynchronous_unwind_tables = 1;
1206 if (flag_pcc_struct_return == 2)
1207 flag_pcc_struct_return = 0;
1211 if (flag_omit_frame_pointer == 2)
1212 flag_omit_frame_pointer = 0;
1213 if (flag_asynchronous_unwind_tables == 2)
1214 flag_asynchronous_unwind_tables = 0;
1215 if (flag_pcc_struct_return == 2)
1216 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1219 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1220 SUBTARGET_OVERRIDE_OPTIONS;
1223 if (!ix86_tune_string && ix86_arch_string)
1224 ix86_tune_string = ix86_arch_string;
1225 if (!ix86_tune_string)
1226 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1227 if (!ix86_arch_string)
1228 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1230 if (ix86_cmodel_string != 0)
1232 if (!strcmp (ix86_cmodel_string, "small"))
1233 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1235 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1236 else if (!strcmp (ix86_cmodel_string, "32"))
1237 ix86_cmodel = CM_32;
1238 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1239 ix86_cmodel = CM_KERNEL;
1240 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1241 ix86_cmodel = CM_MEDIUM;
1242 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1243 ix86_cmodel = CM_LARGE;
1245 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1249 ix86_cmodel = CM_32;
1251 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1253 if (ix86_asm_string != 0)
1255 if (!strcmp (ix86_asm_string, "intel"))
1256 ix86_asm_dialect = ASM_INTEL;
1257 else if (!strcmp (ix86_asm_string, "att"))
1258 ix86_asm_dialect = ASM_ATT;
1260 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1262 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1263 error ("code model `%s' not supported in the %s bit mode",
1264 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1265 if (ix86_cmodel == CM_LARGE)
1266 sorry ("code model `large' not supported yet");
1267 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1268 sorry ("%i-bit mode not compiled in",
1269 (target_flags & MASK_64BIT) ? 64 : 32);
1271 for (i = 0; i < pta_size; i++)
1272 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1274 ix86_arch = processor_alias_table[i].processor;
1275 /* Default cpu tuning to the architecture. */
1276 ix86_tune = ix86_arch;
1277 if (processor_alias_table[i].flags & PTA_MMX
1278 && !(target_flags_explicit & MASK_MMX))
1279 target_flags |= MASK_MMX;
1280 if (processor_alias_table[i].flags & PTA_3DNOW
1281 && !(target_flags_explicit & MASK_3DNOW))
1282 target_flags |= MASK_3DNOW;
1283 if (processor_alias_table[i].flags & PTA_3DNOW_A
1284 && !(target_flags_explicit & MASK_3DNOW_A))
1285 target_flags |= MASK_3DNOW_A;
1286 if (processor_alias_table[i].flags & PTA_SSE
1287 && !(target_flags_explicit & MASK_SSE))
1288 target_flags |= MASK_SSE;
1289 if (processor_alias_table[i].flags & PTA_SSE2
1290 && !(target_flags_explicit & MASK_SSE2))
1291 target_flags |= MASK_SSE2;
1292 if (processor_alias_table[i].flags & PTA_SSE3
1293 && !(target_flags_explicit & MASK_SSE3))
1294 target_flags |= MASK_SSE3;
1295 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1296 x86_prefetch_sse = true;
1297 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1298 error ("CPU you selected does not support x86-64 instruction set");
1303 error ("bad value (%s) for -march= switch", ix86_arch_string);
1305 for (i = 0; i < pta_size; i++)
1306 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1308 ix86_tune = processor_alias_table[i].processor;
1309 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1310 error ("CPU you selected does not support x86-64 instruction set");
1313 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1314 x86_prefetch_sse = true;
1316 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1319 ix86_cost = &size_cost;
1321 ix86_cost = processor_target_table[ix86_tune].cost;
1322 target_flags |= processor_target_table[ix86_tune].target_enable;
1323 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1325 /* Arrange to set up i386_stack_locals for all functions. */
1326 init_machine_status = ix86_init_machine_status;
1328 /* Validate -mregparm= value. */
1329 if (ix86_regparm_string)
1331 i = atoi (ix86_regparm_string);
1332 if (i < 0 || i > REGPARM_MAX)
1333 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1339 ix86_regparm = REGPARM_MAX;
1341 /* If the user has provided any of the -malign-* options,
1342 warn and use that value only if -falign-* is not set.
1343 Remove this code in GCC 3.2 or later. */
1344 if (ix86_align_loops_string)
1346 warning ("-malign-loops is obsolete, use -falign-loops");
1347 if (align_loops == 0)
1349 i = atoi (ix86_align_loops_string);
1350 if (i < 0 || i > MAX_CODE_ALIGN)
1351 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1353 align_loops = 1 << i;
1357 if (ix86_align_jumps_string)
1359 warning ("-malign-jumps is obsolete, use -falign-jumps");
1360 if (align_jumps == 0)
1362 i = atoi (ix86_align_jumps_string);
1363 if (i < 0 || i > MAX_CODE_ALIGN)
1364 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1366 align_jumps = 1 << i;
1370 if (ix86_align_funcs_string)
1372 warning ("-malign-functions is obsolete, use -falign-functions");
1373 if (align_functions == 0)
1375 i = atoi (ix86_align_funcs_string);
1376 if (i < 0 || i > MAX_CODE_ALIGN)
1377 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1379 align_functions = 1 << i;
1383 /* Default align_* from the processor table. */
1384 if (align_loops == 0)
1386 align_loops = processor_target_table[ix86_tune].align_loop;
1387 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1389 if (align_jumps == 0)
1391 align_jumps = processor_target_table[ix86_tune].align_jump;
1392 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1394 if (align_functions == 0)
1396 align_functions = processor_target_table[ix86_tune].align_func;
1399 /* Validate -mpreferred-stack-boundary= value, or provide default.
1400 The default of 128 bits is for Pentium III's SSE __m128, but we
1401 don't want additional code to keep the stack aligned when
1402 optimizing for code size. */
1403 ix86_preferred_stack_boundary = (optimize_size
1404 ? TARGET_64BIT ? 128 : 32
1406 if (ix86_preferred_stack_boundary_string)
1408 i = atoi (ix86_preferred_stack_boundary_string);
1409 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1410 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1411 TARGET_64BIT ? 4 : 2);
1413 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1416 /* Validate -mbranch-cost= value, or provide default. */
1417 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1418 if (ix86_branch_cost_string)
1420 i = atoi (ix86_branch_cost_string);
1422 error ("-mbranch-cost=%d is not between 0 and 5", i);
1424 ix86_branch_cost = i;
1427 if (ix86_tls_dialect_string)
1429 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1430 ix86_tls_dialect = TLS_DIALECT_GNU;
1431 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1432 ix86_tls_dialect = TLS_DIALECT_SUN;
1434 error ("bad value (%s) for -mtls-dialect= switch",
1435 ix86_tls_dialect_string);
1438 /* Keep nonleaf frame pointers. */
1439 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1440 flag_omit_frame_pointer = 1;
1442 /* If we're doing fast math, we don't care about comparison order
1443 wrt NaNs. This lets us use a shorter comparison sequence. */
1444 if (flag_unsafe_math_optimizations)
1445 target_flags &= ~MASK_IEEE_FP;
1447 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1448 since the insns won't need emulation. */
1449 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1450 target_flags &= ~MASK_NO_FANCY_MATH_387;
1452 /* Turn on SSE2 builtins for -msse3. */
1454 target_flags |= MASK_SSE2;
1456 /* Turn on SSE builtins for -msse2. */
1458 target_flags |= MASK_SSE;
1462 if (TARGET_ALIGN_DOUBLE)
1463 error ("-malign-double makes no sense in the 64bit mode");
1465 error ("-mrtd calling convention not supported in the 64bit mode");
1466 /* Enable by default the SSE and MMX builtins. */
1467 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1468 ix86_fpmath = FPMATH_SSE;
1472 ix86_fpmath = FPMATH_387;
1473 /* i386 ABI does not specify red zone. It still makes sense to use it
1474 when programmer takes care to stack from being destroyed. */
1475 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1476 target_flags |= MASK_NO_RED_ZONE;
1479 if (ix86_fpmath_string != 0)
1481 if (! strcmp (ix86_fpmath_string, "387"))
1482 ix86_fpmath = FPMATH_387;
1483 else if (! strcmp (ix86_fpmath_string, "sse"))
1487 warning ("SSE instruction set disabled, using 387 arithmetics");
1488 ix86_fpmath = FPMATH_387;
1491 ix86_fpmath = FPMATH_SSE;
1493 else if (! strcmp (ix86_fpmath_string, "387,sse")
1494 || ! strcmp (ix86_fpmath_string, "sse,387"))
1498 warning ("SSE instruction set disabled, using 387 arithmetics");
1499 ix86_fpmath = FPMATH_387;
1501 else if (!TARGET_80387)
1503 warning ("387 instruction set disabled, using SSE arithmetics");
1504 ix86_fpmath = FPMATH_SSE;
1507 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1510 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1513 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1517 target_flags |= MASK_MMX;
1518 x86_prefetch_sse = true;
1521 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1524 target_flags |= MASK_MMX;
1525 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1526 extensions it adds. */
1527 if (x86_3dnow_a & (1 << ix86_arch))
1528 target_flags |= MASK_3DNOW_A;
1530 if ((x86_accumulate_outgoing_args & TUNEMASK)
1531 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1533 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1535 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1538 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1539 p = strchr (internal_label_prefix, 'X');
1540 internal_label_prefix_len = p - internal_label_prefix;
1546 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1548 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1549 make the problem with not enough registers even worse. */
1550 #ifdef INSN_SCHEDULING
1552 flag_schedule_insns = 0;
1555 /* The default values of these switches depend on the TARGET_64BIT
1556 that is not known at this moment. Mark these values with 2 and
1557 let user the to override these. In case there is no command line option
1558 specifying them, we will set the defaults in override_options. */
1560 flag_omit_frame_pointer = 2;
1561 flag_pcc_struct_return = 2;
1562 flag_asynchronous_unwind_tables = 2;
1565 /* Table of valid machine attributes. */
1566 const struct attribute_spec ix86_attribute_table[] =
1568 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1569 /* Stdcall attribute says callee is responsible for popping arguments
1570 if they are not variable. */
1571 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1572 /* Fastcall attribute says callee is responsible for popping arguments
1573 if they are not variable. */
1574 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1575 /* Cdecl attribute says the callee is a normal C declaration */
1576 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1577 /* Regparm attribute specifies how many integer arguments are to be
1578 passed in registers. */
1579 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1580 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1581 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1582 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1583 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1585 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1586 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1587 { NULL, 0, 0, false, false, false, NULL }
1590 /* Decide whether we can make a sibling call to a function. DECL is the
1591 declaration of the function being targeted by the call and EXP is the
1592 CALL_EXPR representing the call. */
1595 ix86_function_ok_for_sibcall (tree decl, tree exp)
1597 /* If we are generating position-independent code, we cannot sibcall
1598 optimize any indirect call, or a direct call to a global function,
1599 as the PLT requires %ebx be live. */
1600 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1603 /* If we are returning floats on the 80387 register stack, we cannot
1604 make a sibcall from a function that doesn't return a float to a
1605 function that does or, conversely, from a function that does return
1606 a float to a function that doesn't; the necessary stack adjustment
1607 would not be executed. */
1608 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1609 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1612 /* If this call is indirect, we'll need to be able to use a call-clobbered
1613 register for the address of the target function. Make sure that all
1614 such registers are not used for passing parameters. */
1615 if (!decl && !TARGET_64BIT)
1619 /* We're looking at the CALL_EXPR, we need the type of the function. */
1620 type = TREE_OPERAND (exp, 0); /* pointer expression */
1621 type = TREE_TYPE (type); /* pointer type */
1622 type = TREE_TYPE (type); /* function type */
1624 if (ix86_function_regparm (type, NULL) >= 3)
1626 /* ??? Need to count the actual number of registers to be used,
1627 not the possible number of registers. Fix later. */
1632 /* Otherwise okay. That also includes certain types of indirect calls. */
1636 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1637 arguments as in struct attribute_spec.handler. */
1639 ix86_handle_cdecl_attribute (tree *node, tree name,
1640 tree args ATTRIBUTE_UNUSED,
1641 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1643 if (TREE_CODE (*node) != FUNCTION_TYPE
1644 && TREE_CODE (*node) != METHOD_TYPE
1645 && TREE_CODE (*node) != FIELD_DECL
1646 && TREE_CODE (*node) != TYPE_DECL)
1648 warning ("`%s' attribute only applies to functions",
1649 IDENTIFIER_POINTER (name));
1650 *no_add_attrs = true;
1654 if (is_attribute_p ("fastcall", name))
1656 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1658 error ("fastcall and stdcall attributes are not compatible");
1660 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1662 error ("fastcall and regparm attributes are not compatible");
1665 else if (is_attribute_p ("stdcall", name))
1667 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1669 error ("fastcall and stdcall attributes are not compatible");
1676 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1677 *no_add_attrs = true;
1683 /* Handle a "regparm" attribute;
1684 arguments as in struct attribute_spec.handler. */
1686 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1687 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1689 if (TREE_CODE (*node) != FUNCTION_TYPE
1690 && TREE_CODE (*node) != METHOD_TYPE
1691 && TREE_CODE (*node) != FIELD_DECL
1692 && TREE_CODE (*node) != TYPE_DECL)
1694 warning ("`%s' attribute only applies to functions",
1695 IDENTIFIER_POINTER (name));
1696 *no_add_attrs = true;
1702 cst = TREE_VALUE (args);
1703 if (TREE_CODE (cst) != INTEGER_CST)
1705 warning ("`%s' attribute requires an integer constant argument",
1706 IDENTIFIER_POINTER (name));
1707 *no_add_attrs = true;
1709 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1711 warning ("argument to `%s' attribute larger than %d",
1712 IDENTIFIER_POINTER (name), REGPARM_MAX);
1713 *no_add_attrs = true;
1716 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1718 error ("fastcall and regparm attributes are not compatible");
1725 /* Return 0 if the attributes for two types are incompatible, 1 if they
1726 are compatible, and 2 if they are nearly compatible (which causes a
1727 warning to be generated). */
1730 ix86_comp_type_attributes (tree type1, tree type2)
1732 /* Check for mismatch of non-default calling convention. */
1733 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1735 if (TREE_CODE (type1) != FUNCTION_TYPE)
1738 /* Check for mismatched fastcall types */
1739 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1740 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1743 /* Check for mismatched return types (cdecl vs stdcall). */
1744 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1745 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1747 if (ix86_function_regparm (type1, NULL)
1748 != ix86_function_regparm (type2, NULL))
1753 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1754 DECL may be NULL when calling function indirectly
1755 or considering a libcall. */
1758 ix86_function_regparm (tree type, tree decl)
1761 int regparm = ix86_regparm;
1762 bool user_convention = false;
1766 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1769 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1770 user_convention = true;
1773 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1776 user_convention = true;
1779 /* Use register calling convention for local functions when possible. */
1780 if (!TARGET_64BIT && !user_convention && decl
1781 && flag_unit_at_a_time && !profile_flag)
1783 struct cgraph_local_info *i = cgraph_local_info (decl);
1786 /* We can't use regparm(3) for nested functions as these use
1787 static chain pointer in third argument. */
1788 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1798 /* Return true if EAX is live at the start of the function. Used by
1799 ix86_expand_prologue to determine if we need special help before
1800 calling allocate_stack_worker. */
1803 ix86_eax_live_at_start_p (void)
1805 /* Cheat. Don't bother working forward from ix86_function_regparm
1806 to the function type to whether an actual argument is located in
1807 eax. Instead just look at cfg info, which is still close enough
1808 to correct at this point. This gives false positives for broken
1809 functions that might use uninitialized data that happens to be
1810 allocated in eax, but who cares? */
1811 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1814 /* Value is the number of bytes of arguments automatically
1815 popped when returning from a subroutine call.
1816 FUNDECL is the declaration node of the function (as a tree),
1817 FUNTYPE is the data type of the function (as a tree),
1818 or for a library call it is an identifier node for the subroutine name.
1819 SIZE is the number of bytes of arguments passed on the stack.
1821 On the 80386, the RTD insn may be used to pop them if the number
1822 of args is fixed, but if the number is variable then the caller
1823 must pop them all. RTD can't be used for library calls now
1824 because the library is compiled with the Unix compiler.
1825 Use of RTD is a selectable option, since it is incompatible with
1826 standard Unix calling sequences. If the option is not selected,
1827 the caller must always pop the args.
1829 The attribute stdcall is equivalent to RTD on a per module basis. */
1832 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1834 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1836 /* Cdecl functions override -mrtd, and never pop the stack. */
1837 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1839 /* Stdcall and fastcall functions will pop the stack if not
1841 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1842 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1846 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1847 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1848 == void_type_node)))
1852 /* Lose any fake structure return argument if it is passed on the stack. */
1853 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1856 int nregs = ix86_function_regparm (funtype, fundecl);
1859 return GET_MODE_SIZE (Pmode);
1865 /* Argument support functions. */
1867 /* Return true when register may be used to pass function parameters. */
1869 ix86_function_arg_regno_p (int regno)
1873 return (regno < REGPARM_MAX
1874 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1875 if (SSE_REGNO_P (regno) && TARGET_SSE)
1877 /* RAX is used as hidden argument to va_arg functions. */
1880 for (i = 0; i < REGPARM_MAX; i++)
1881 if (regno == x86_64_int_parameter_registers[i])
1886 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1887 for a call to a function whose data type is FNTYPE.
1888 For a library call, FNTYPE is 0. */
1891 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1892 tree fntype, /* tree ptr for function decl */
1893 rtx libname, /* SYMBOL_REF of library name or 0 */
1896 static CUMULATIVE_ARGS zero_cum;
1897 tree param, next_param;
1899 if (TARGET_DEBUG_ARG)
1901 fprintf (stderr, "\ninit_cumulative_args (");
1903 fprintf (stderr, "fntype code = %s, ret code = %s",
1904 tree_code_name[(int) TREE_CODE (fntype)],
1905 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1907 fprintf (stderr, "no fntype");
1910 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1915 /* Set up the number of registers to use for passing arguments. */
1917 cum->nregs = ix86_function_regparm (fntype, fndecl);
1919 cum->nregs = ix86_regparm;
1920 cum->sse_nregs = SSE_REGPARM_MAX;
1921 cum->mmx_nregs = MMX_REGPARM_MAX;
1922 cum->warn_sse = true;
1923 cum->warn_mmx = true;
1924 cum->maybe_vaarg = false;
1926 /* Use ecx and edx registers if function has fastcall attribute */
1927 if (fntype && !TARGET_64BIT)
1929 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1937 /* Determine if this function has variable arguments. This is
1938 indicated by the last argument being 'void_type_mode' if there
1939 are no variable arguments. If there are variable arguments, then
1940 we won't pass anything in registers */
1942 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1944 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1945 param != 0; param = next_param)
1947 next_param = TREE_CHAIN (param);
1948 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1959 cum->maybe_vaarg = true;
1963 if ((!fntype && !libname)
1964 || (fntype && !TYPE_ARG_TYPES (fntype)))
1965 cum->maybe_vaarg = 1;
1967 if (TARGET_DEBUG_ARG)
1968 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1973 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1974 of this code is to classify each 8bytes of incoming argument by the register
1975 class and assign registers accordingly. */
1977 /* Return the union class of CLASS1 and CLASS2.
1978 See the x86-64 PS ABI for details. */
1980 static enum x86_64_reg_class
1981 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1983 /* Rule #1: If both classes are equal, this is the resulting class. */
1984 if (class1 == class2)
1987 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1989 if (class1 == X86_64_NO_CLASS)
1991 if (class2 == X86_64_NO_CLASS)
1994 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1995 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1996 return X86_64_MEMORY_CLASS;
1998 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1999 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2000 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2001 return X86_64_INTEGERSI_CLASS;
2002 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2003 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2004 return X86_64_INTEGER_CLASS;
2006 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2007 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2008 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2009 return X86_64_MEMORY_CLASS;
2011 /* Rule #6: Otherwise class SSE is used. */
2012 return X86_64_SSE_CLASS;
2015 /* Classify the argument of type TYPE and mode MODE.
2016 CLASSES will be filled by the register class used to pass each word
2017 of the operand. The number of words is returned. In case the parameter
2018 should be passed in memory, 0 is returned. As a special case for zero
2019 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2021 BIT_OFFSET is used internally for handling records and specifies offset
2022 of the offset in bits modulo 256 to avoid overflow cases.
2024 See the x86-64 PS ABI for details.
2028 classify_argument (enum machine_mode mode, tree type,
2029 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2031 HOST_WIDE_INT bytes =
2032 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2033 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2035 /* Variable sized entities are always passed/returned in memory. */
2039 if (mode != VOIDmode
2040 && MUST_PASS_IN_STACK (mode, type))
2043 if (type && AGGREGATE_TYPE_P (type))
2047 enum x86_64_reg_class subclasses[MAX_CLASSES];
2049 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2053 for (i = 0; i < words; i++)
2054 classes[i] = X86_64_NO_CLASS;
2056 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2057 signalize memory class, so handle it as special case. */
2060 classes[0] = X86_64_NO_CLASS;
2064 /* Classify each field of record and merge classes. */
2065 if (TREE_CODE (type) == RECORD_TYPE)
2067 /* For classes first merge in the field of the subclasses. */
2068 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2070 tree bases = TYPE_BINFO_BASETYPES (type);
2071 int n_bases = TREE_VEC_LENGTH (bases);
2074 for (i = 0; i < n_bases; ++i)
2076 tree binfo = TREE_VEC_ELT (bases, i);
2078 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2079 tree type = BINFO_TYPE (binfo);
2081 num = classify_argument (TYPE_MODE (type),
2083 (offset + bit_offset) % 256);
2086 for (i = 0; i < num; i++)
2088 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2090 merge_classes (subclasses[i], classes[i + pos]);
2094 /* And now merge the fields of structure. */
2095 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2097 if (TREE_CODE (field) == FIELD_DECL)
2101 /* Bitfields are always classified as integer. Handle them
2102 early, since later code would consider them to be
2103 misaligned integers. */
2104 if (DECL_BIT_FIELD (field))
2106 for (i = int_bit_position (field) / 8 / 8;
2107 i < (int_bit_position (field)
2108 + tree_low_cst (DECL_SIZE (field), 0)
2111 merge_classes (X86_64_INTEGER_CLASS,
2116 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2117 TREE_TYPE (field), subclasses,
2118 (int_bit_position (field)
2119 + bit_offset) % 256);
2122 for (i = 0; i < num; i++)
2125 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2127 merge_classes (subclasses[i], classes[i + pos]);
2133 /* Arrays are handled as small records. */
2134 else if (TREE_CODE (type) == ARRAY_TYPE)
2137 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2138 TREE_TYPE (type), subclasses, bit_offset);
2142 /* The partial classes are now full classes. */
2143 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2144 subclasses[0] = X86_64_SSE_CLASS;
2145 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2146 subclasses[0] = X86_64_INTEGER_CLASS;
2148 for (i = 0; i < words; i++)
2149 classes[i] = subclasses[i % num];
2151 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2152 else if (TREE_CODE (type) == UNION_TYPE
2153 || TREE_CODE (type) == QUAL_UNION_TYPE)
2155 /* For classes first merge in the field of the subclasses. */
2156 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2158 tree bases = TYPE_BINFO_BASETYPES (type);
2159 int n_bases = TREE_VEC_LENGTH (bases);
2162 for (i = 0; i < n_bases; ++i)
2164 tree binfo = TREE_VEC_ELT (bases, i);
2166 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2167 tree type = BINFO_TYPE (binfo);
2169 num = classify_argument (TYPE_MODE (type),
2171 (offset + (bit_offset % 64)) % 256);
2174 for (i = 0; i < num; i++)
2176 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2178 merge_classes (subclasses[i], classes[i + pos]);
2182 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2184 if (TREE_CODE (field) == FIELD_DECL)
2187 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2188 TREE_TYPE (field), subclasses,
2192 for (i = 0; i < num; i++)
2193 classes[i] = merge_classes (subclasses[i], classes[i]);
2197 else if (TREE_CODE (type) == SET_TYPE)
2201 classes[0] = X86_64_INTEGERSI_CLASS;
2204 else if (bytes <= 8)
2206 classes[0] = X86_64_INTEGER_CLASS;
2209 else if (bytes <= 12)
2211 classes[0] = X86_64_INTEGER_CLASS;
2212 classes[1] = X86_64_INTEGERSI_CLASS;
2217 classes[0] = X86_64_INTEGER_CLASS;
2218 classes[1] = X86_64_INTEGER_CLASS;
2225 /* Final merger cleanup. */
2226 for (i = 0; i < words; i++)
2228 /* If one class is MEMORY, everything should be passed in
2230 if (classes[i] == X86_64_MEMORY_CLASS)
2233 /* The X86_64_SSEUP_CLASS should be always preceded by
2234 X86_64_SSE_CLASS. */
2235 if (classes[i] == X86_64_SSEUP_CLASS
2236 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2237 classes[i] = X86_64_SSE_CLASS;
2239 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2240 if (classes[i] == X86_64_X87UP_CLASS
2241 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2242 classes[i] = X86_64_SSE_CLASS;
2247 /* Compute alignment needed. We align all types to natural boundaries with
2248 exception of XFmode that is aligned to 64bits. */
2249 if (mode != VOIDmode && mode != BLKmode)
2251 int mode_alignment = GET_MODE_BITSIZE (mode);
2254 mode_alignment = 128;
2255 else if (mode == XCmode)
2256 mode_alignment = 256;
2257 if (COMPLEX_MODE_P (mode))
2258 mode_alignment /= 2;
2259 /* Misaligned fields are always returned in memory. */
2260 if (bit_offset % mode_alignment)
2264 /* Classification of atomic types. */
2274 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2275 classes[0] = X86_64_INTEGERSI_CLASS;
2277 classes[0] = X86_64_INTEGER_CLASS;
2281 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2284 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2285 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2288 if (!(bit_offset % 64))
2289 classes[0] = X86_64_SSESF_CLASS;
2291 classes[0] = X86_64_SSE_CLASS;
2294 classes[0] = X86_64_SSEDF_CLASS;
2297 classes[0] = X86_64_X87_CLASS;
2298 classes[1] = X86_64_X87UP_CLASS;
2304 classes[0] = X86_64_X87_CLASS;
2305 classes[1] = X86_64_X87UP_CLASS;
2306 classes[2] = X86_64_X87_CLASS;
2307 classes[3] = X86_64_X87UP_CLASS;
2310 classes[0] = X86_64_SSEDF_CLASS;
2311 classes[1] = X86_64_SSEDF_CLASS;
2314 classes[0] = X86_64_SSE_CLASS;
2322 classes[0] = X86_64_SSE_CLASS;
2323 classes[1] = X86_64_SSEUP_CLASS;
2338 /* Examine the argument and return set number of register required in each
2339 class. Return 0 iff parameter should be passed in memory. */
2341 examine_argument (enum machine_mode mode, tree type, int in_return,
2342 int *int_nregs, int *sse_nregs)
2344 enum x86_64_reg_class class[MAX_CLASSES];
2345 int n = classify_argument (mode, type, class, 0);
2351 for (n--; n >= 0; n--)
2354 case X86_64_INTEGER_CLASS:
2355 case X86_64_INTEGERSI_CLASS:
2358 case X86_64_SSE_CLASS:
2359 case X86_64_SSESF_CLASS:
2360 case X86_64_SSEDF_CLASS:
2363 case X86_64_NO_CLASS:
2364 case X86_64_SSEUP_CLASS:
2366 case X86_64_X87_CLASS:
2367 case X86_64_X87UP_CLASS:
2371 case X86_64_MEMORY_CLASS:
2376 /* Construct container for the argument used by GCC interface. See
2377 FUNCTION_ARG for the detailed description. */
2379 construct_container (enum machine_mode mode, tree type, int in_return,
2380 int nintregs, int nsseregs, const int * intreg,
2383 enum machine_mode tmpmode;
2385 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2386 enum x86_64_reg_class class[MAX_CLASSES];
2390 int needed_sseregs, needed_intregs;
2391 rtx exp[MAX_CLASSES];
2394 n = classify_argument (mode, type, class, 0);
2395 if (TARGET_DEBUG_ARG)
2398 fprintf (stderr, "Memory class\n");
2401 fprintf (stderr, "Classes:");
2402 for (i = 0; i < n; i++)
2404 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2406 fprintf (stderr, "\n");
2411 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2413 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2416 /* First construct simple cases. Avoid SCmode, since we want to use
2417 single register to pass this type. */
2418 if (n == 1 && mode != SCmode)
2421 case X86_64_INTEGER_CLASS:
2422 case X86_64_INTEGERSI_CLASS:
2423 return gen_rtx_REG (mode, intreg[0]);
2424 case X86_64_SSE_CLASS:
2425 case X86_64_SSESF_CLASS:
2426 case X86_64_SSEDF_CLASS:
2427 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2428 case X86_64_X87_CLASS:
2429 return gen_rtx_REG (mode, FIRST_STACK_REG);
2430 case X86_64_NO_CLASS:
2431 /* Zero sized array, struct or class. */
2436 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2438 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2440 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2441 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2442 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2443 && class[1] == X86_64_INTEGER_CLASS
2444 && (mode == CDImode || mode == TImode || mode == TFmode)
2445 && intreg[0] + 1 == intreg[1])
2446 return gen_rtx_REG (mode, intreg[0]);
2448 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2449 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2451 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2453 /* Otherwise figure out the entries of the PARALLEL. */
2454 for (i = 0; i < n; i++)
2458 case X86_64_NO_CLASS:
2460 case X86_64_INTEGER_CLASS:
2461 case X86_64_INTEGERSI_CLASS:
2462 /* Merge TImodes on aligned occasions here too. */
2463 if (i * 8 + 8 > bytes)
2464 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2465 else if (class[i] == X86_64_INTEGERSI_CLASS)
2469 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2470 if (tmpmode == BLKmode)
2472 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2473 gen_rtx_REG (tmpmode, *intreg),
2477 case X86_64_SSESF_CLASS:
2478 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2479 gen_rtx_REG (SFmode,
2480 SSE_REGNO (sse_regno)),
2484 case X86_64_SSEDF_CLASS:
2485 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2486 gen_rtx_REG (DFmode,
2487 SSE_REGNO (sse_regno)),
2491 case X86_64_SSE_CLASS:
2492 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2496 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2497 gen_rtx_REG (tmpmode,
2498 SSE_REGNO (sse_regno)),
2500 if (tmpmode == TImode)
2508 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2509 for (i = 0; i < nexps; i++)
2510 XVECEXP (ret, 0, i) = exp [i];
2514 /* Update the data in CUM to advance over an argument
2515 of mode MODE and data type TYPE.
2516 (TYPE is null for libcalls where that information may not be available.) */
2519 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2520 enum machine_mode mode, /* current arg mode */
2521 tree type, /* type of the argument or 0 if lib support */
2522 int named) /* whether or not the argument was named */
2525 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2526 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2528 if (TARGET_DEBUG_ARG)
2530 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2531 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2534 int int_nregs, sse_nregs;
2535 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2536 cum->words += words;
2537 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2539 cum->nregs -= int_nregs;
2540 cum->sse_nregs -= sse_nregs;
2541 cum->regno += int_nregs;
2542 cum->sse_regno += sse_nregs;
2545 cum->words += words;
2549 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2550 && (!type || !AGGREGATE_TYPE_P (type)))
2552 cum->sse_words += words;
2553 cum->sse_nregs -= 1;
2554 cum->sse_regno += 1;
2555 if (cum->sse_nregs <= 0)
2561 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2562 && (!type || !AGGREGATE_TYPE_P (type)))
2564 cum->mmx_words += words;
2565 cum->mmx_nregs -= 1;
2566 cum->mmx_regno += 1;
2567 if (cum->mmx_nregs <= 0)
2575 cum->words += words;
2576 cum->nregs -= words;
2577 cum->regno += words;
2579 if (cum->nregs <= 0)
2589 /* Define where to put the arguments to a function.
2590 Value is zero to push the argument on the stack,
2591 or a hard register in which to store the argument.
2593 MODE is the argument's machine mode.
2594 TYPE is the data type of the argument (as a tree).
2595 This is null for libcalls where that information may
2597 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2598 the preceding args and about the function being called.
2599 NAMED is nonzero if this argument is a named parameter
2600 (otherwise it is an extra parameter matching an ellipsis). */
2603 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2604 enum machine_mode mode, /* current arg mode */
2605 tree type, /* type of the argument or 0 if lib support */
2606 int named) /* != 0 for normal args, == 0 for ... args */
2610 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2611 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2612 static bool warnedsse, warnedmmx;
2614 /* Handle a hidden AL argument containing number of registers for varargs
2615 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2617 if (mode == VOIDmode)
2620 return GEN_INT (cum->maybe_vaarg
2621 ? (cum->sse_nregs < 0
2629 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2630 &x86_64_int_parameter_registers [cum->regno],
2635 /* For now, pass fp/complex values on the stack. */
2647 if (words <= cum->nregs)
2649 int regno = cum->regno;
2651 /* Fastcall allocates the first two DWORD (SImode) or
2652 smaller arguments to ECX and EDX. */
2655 if (mode == BLKmode || mode == DImode)
2658 /* ECX not EAX is the first allocated register. */
2662 ret = gen_rtx_REG (mode, regno);
2672 if (!type || !AGGREGATE_TYPE_P (type))
2674 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2677 warning ("SSE vector argument without SSE enabled "
2681 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2688 if (!type || !AGGREGATE_TYPE_P (type))
2690 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2693 warning ("MMX vector argument without MMX enabled "
2697 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2702 if (TARGET_DEBUG_ARG)
2705 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2706 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2709 print_simple_rtl (stderr, ret);
2711 fprintf (stderr, ", stack");
2713 fprintf (stderr, " )\n");
2719 /* A C expression that indicates when an argument must be passed by
2720 reference. If nonzero for an argument, a copy of that argument is
2721 made in memory and a pointer to the argument is passed instead of
2722 the argument itself. The pointer is passed in whatever way is
2723 appropriate for passing a pointer to that type. */
2726 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2727 enum machine_mode mode ATTRIBUTE_UNUSED,
2728 tree type, int named ATTRIBUTE_UNUSED)
2733 if (type && int_size_in_bytes (type) == -1)
2735 if (TARGET_DEBUG_ARG)
2736 fprintf (stderr, "function_arg_pass_by_reference\n");
2743 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2746 contains_128bit_aligned_vector_p (tree type)
2748 enum machine_mode mode = TYPE_MODE (type);
2749 if (SSE_REG_MODE_P (mode)
2750 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2752 if (TYPE_ALIGN (type) < 128)
2755 if (AGGREGATE_TYPE_P (type))
2757 /* Walk the aggregates recursively. */
2758 if (TREE_CODE (type) == RECORD_TYPE
2759 || TREE_CODE (type) == UNION_TYPE
2760 || TREE_CODE (type) == QUAL_UNION_TYPE)
2764 if (TYPE_BINFO (type) != NULL
2765 && TYPE_BINFO_BASETYPES (type) != NULL)
2767 tree bases = TYPE_BINFO_BASETYPES (type);
2768 int n_bases = TREE_VEC_LENGTH (bases);
2771 for (i = 0; i < n_bases; ++i)
2773 tree binfo = TREE_VEC_ELT (bases, i);
2774 tree type = BINFO_TYPE (binfo);
2776 if (contains_128bit_aligned_vector_p (type))
2780 /* And now merge the fields of structure. */
2781 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2783 if (TREE_CODE (field) == FIELD_DECL
2784 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2788 /* Just for use if some languages passes arrays by value. */
2789 else if (TREE_CODE (type) == ARRAY_TYPE)
2791 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2800 /* Gives the alignment boundary, in bits, of an argument with the
2801 specified mode and type. */
2804 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2808 align = TYPE_ALIGN (type);
2810 align = GET_MODE_ALIGNMENT (mode);
2811 if (align < PARM_BOUNDARY)
2812 align = PARM_BOUNDARY;
2815 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2816 make an exception for SSE modes since these require 128bit
2819 The handling here differs from field_alignment. ICC aligns MMX
2820 arguments to 4 byte boundaries, while structure fields are aligned
2821 to 8 byte boundaries. */
2824 if (!SSE_REG_MODE_P (mode))
2825 align = PARM_BOUNDARY;
2829 if (!contains_128bit_aligned_vector_p (type))
2830 align = PARM_BOUNDARY;
2838 /* Return true if N is a possible register number of function value. */
2840 ix86_function_value_regno_p (int regno)
2844 return ((regno) == 0
2845 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2846 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2848 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2849 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2850 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2853 /* Define how to find the value returned by a function.
2854 VALTYPE is the data type of the value (as a tree).
2855 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2856 otherwise, FUNC is 0. */
2858 ix86_function_value (tree valtype)
2862 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2863 REGPARM_MAX, SSE_REGPARM_MAX,
2864 x86_64_int_return_registers, 0);
2865 /* For zero sized structures, construct_container return NULL, but we need
2866 to keep rest of compiler happy by returning meaningful value. */
2868 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2872 return gen_rtx_REG (TYPE_MODE (valtype),
2873 ix86_value_regno (TYPE_MODE (valtype)));
2876 /* Return false iff type is returned in memory. */
2878 ix86_return_in_memory (tree type)
2880 int needed_intregs, needed_sseregs, size;
2881 enum machine_mode mode = TYPE_MODE (type);
2884 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2886 if (mode == BLKmode)
2889 size = int_size_in_bytes (type);
2891 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2894 if (VECTOR_MODE_P (mode) || mode == TImode)
2896 /* User-created vectors small enough to fit in EAX. */
2900 /* MMX/3dNow values are returned on the stack, since we've
2901 got to EMMS/FEMMS before returning. */
2905 /* SSE values are returned in XMM0, except when it doesn't exist. */
2907 return (TARGET_SSE ? 0 : 1);
2918 /* When returning SSE vector types, we have a choice of either
2919 (1) being abi incompatible with a -march switch, or
2920 (2) generating an error.
2921 Given no good solution, I think the safest thing is one warning.
2922 The user won't be able to use -Werror, but....
2924 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2925 called in response to actually generating a caller or callee that
2926 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2927 via aggregate_value_p for general type probing from tree-ssa. */
2930 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2934 if (!TARGET_SSE && type && !warned)
2936 /* Look at the return type of the function, not the function type. */
2937 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
2940 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2943 warning ("SSE vector return without SSE enabled changes the ABI");
2950 /* Define how to find the value returned by a library function
2951 assuming the value has mode MODE. */
2953 ix86_libcall_value (enum machine_mode mode)
2963 return gen_rtx_REG (mode, FIRST_SSE_REG);
2966 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2971 return gen_rtx_REG (mode, 0);
2975 return gen_rtx_REG (mode, ix86_value_regno (mode));
2978 /* Given a mode, return the register to use for a return value. */
2981 ix86_value_regno (enum machine_mode mode)
2983 /* Floating point return values in %st(0). */
2984 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2985 return FIRST_FLOAT_REG;
2986 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2987 we prevent this case when sse is not available. */
2988 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2989 return FIRST_SSE_REG;
2990 /* Everything else in %eax. */
2994 /* Create the va_list data type. */
2997 ix86_build_builtin_va_list (void)
2999 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3001 /* For i386 we use plain pointer to argument area. */
3003 return build_pointer_type (char_type_node);
3005 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3006 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3008 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3009 unsigned_type_node);
3010 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3011 unsigned_type_node);
3012 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3014 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3017 DECL_FIELD_CONTEXT (f_gpr) = record;
3018 DECL_FIELD_CONTEXT (f_fpr) = record;
3019 DECL_FIELD_CONTEXT (f_ovf) = record;
3020 DECL_FIELD_CONTEXT (f_sav) = record;
3022 TREE_CHAIN (record) = type_decl;
3023 TYPE_NAME (record) = type_decl;
3024 TYPE_FIELDS (record) = f_gpr;
3025 TREE_CHAIN (f_gpr) = f_fpr;
3026 TREE_CHAIN (f_fpr) = f_ovf;
3027 TREE_CHAIN (f_ovf) = f_sav;
3029 layout_type (record);
3031 /* The correct type is an array type of one element. */
3032 return build_array_type (record, build_index_type (size_zero_node));
3035 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3038 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3039 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3042 CUMULATIVE_ARGS next_cum;
3043 rtx save_area = NULL_RTX, mem;
3056 /* Indicate to allocate space on the stack for varargs save area. */
3057 ix86_save_varrargs_registers = 1;
3059 cfun->stack_alignment_needed = 128;
3061 fntype = TREE_TYPE (current_function_decl);
3062 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3063 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3064 != void_type_node));
3066 /* For varargs, we do not want to skip the dummy va_dcl argument.
3067 For stdargs, we do want to skip the last named argument. */
3070 function_arg_advance (&next_cum, mode, type, 1);
3073 save_area = frame_pointer_rtx;
3075 set = get_varargs_alias_set ();
3077 for (i = next_cum.regno; i < ix86_regparm; i++)
3079 mem = gen_rtx_MEM (Pmode,
3080 plus_constant (save_area, i * UNITS_PER_WORD));
3081 set_mem_alias_set (mem, set);
3082 emit_move_insn (mem, gen_rtx_REG (Pmode,
3083 x86_64_int_parameter_registers[i]));
3086 if (next_cum.sse_nregs)
3088 /* Now emit code to save SSE registers. The AX parameter contains number
3089 of SSE parameter registers used to call this function. We use
3090 sse_prologue_save insn template that produces computed jump across
3091 SSE saves. We need some preparation work to get this working. */
3093 label = gen_label_rtx ();
3094 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3096 /* Compute address to jump to :
3097 label - 5*eax + nnamed_sse_arguments*5 */
3098 tmp_reg = gen_reg_rtx (Pmode);
3099 nsse_reg = gen_reg_rtx (Pmode);
3100 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3101 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3102 gen_rtx_MULT (Pmode, nsse_reg,
3104 if (next_cum.sse_regno)
3107 gen_rtx_CONST (DImode,
3108 gen_rtx_PLUS (DImode,
3110 GEN_INT (next_cum.sse_regno * 4))));
3112 emit_move_insn (nsse_reg, label_ref);
3113 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3115 /* Compute address of memory block we save into. We always use pointer
3116 pointing 127 bytes after first byte to store - this is needed to keep
3117 instruction size limited by 4 bytes. */
3118 tmp_reg = gen_reg_rtx (Pmode);
3119 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3120 plus_constant (save_area,
3121 8 * REGPARM_MAX + 127)));
3122 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3123 set_mem_alias_set (mem, set);
3124 set_mem_align (mem, BITS_PER_WORD);
3126 /* And finally do the dirty job! */
3127 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3128 GEN_INT (next_cum.sse_regno), label));
3133 /* Implement va_start. */
3136 ix86_va_start (tree valist, rtx nextarg)
3138 HOST_WIDE_INT words, n_gpr, n_fpr;
3139 tree f_gpr, f_fpr, f_ovf, f_sav;
3140 tree gpr, fpr, ovf, sav, t;
3142 /* Only 64bit target needs something special. */
3145 std_expand_builtin_va_start (valist, nextarg);
3149 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3150 f_fpr = TREE_CHAIN (f_gpr);
3151 f_ovf = TREE_CHAIN (f_fpr);
3152 f_sav = TREE_CHAIN (f_ovf);
3154 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3155 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3156 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3157 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3158 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3160 /* Count number of gp and fp argument registers used. */
3161 words = current_function_args_info.words;
3162 n_gpr = current_function_args_info.regno;
3163 n_fpr = current_function_args_info.sse_regno;
3165 if (TARGET_DEBUG_ARG)
3166 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3167 (int) words, (int) n_gpr, (int) n_fpr);
3169 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3170 build_int_2 (n_gpr * 8, 0));
3171 TREE_SIDE_EFFECTS (t) = 1;
3172 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3174 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3175 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3176 TREE_SIDE_EFFECTS (t) = 1;
3177 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3179 /* Find the overflow area. */
3180 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3182 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3183 build_int_2 (words * UNITS_PER_WORD, 0));
3184 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3185 TREE_SIDE_EFFECTS (t) = 1;
3186 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3188 /* Find the register save area.
3189 Prologue of the function save it right above stack frame. */
3190 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3191 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3192 TREE_SIDE_EFFECTS (t) = 1;
3193 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3196 /* Implement va_arg. */
3199 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3201 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3202 tree f_gpr, f_fpr, f_ovf, f_sav;
3203 tree gpr, fpr, ovf, sav, t;
3205 tree lab_false, lab_over = NULL_TREE;
3211 /* Only 64bit target needs something special. */
3213 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3215 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3216 f_fpr = TREE_CHAIN (f_gpr);
3217 f_ovf = TREE_CHAIN (f_fpr);
3218 f_sav = TREE_CHAIN (f_ovf);
3220 valist = build_fold_indirect_ref (valist);
3221 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3222 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3223 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3224 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3226 size = int_size_in_bytes (type);
3229 /* Variable-size types are passed by reference. */
3231 type = build_pointer_type (type);
3232 size = int_size_in_bytes (type);
3234 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3236 container = construct_container (TYPE_MODE (type), type, 0,
3237 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3239 * Pull the value out of the saved registers ...
3242 addr = create_tmp_var (ptr_type_node, "addr");
3243 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3247 int needed_intregs, needed_sseregs;
3249 tree int_addr, sse_addr;
3251 lab_false = create_artificial_label ();
3252 lab_over = create_artificial_label ();
3254 examine_argument (TYPE_MODE (type), type, 0,
3255 &needed_intregs, &needed_sseregs);
3258 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3259 || TYPE_ALIGN (type) > 128);
3261 /* In case we are passing structure, verify that it is consecutive block
3262 on the register save area. If not we need to do moves. */
3263 if (!need_temp && !REG_P (container))
3265 /* Verify that all registers are strictly consecutive */
3266 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3270 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3272 rtx slot = XVECEXP (container, 0, i);
3273 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3274 || INTVAL (XEXP (slot, 1)) != i * 16)
3282 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3284 rtx slot = XVECEXP (container, 0, i);
3285 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3286 || INTVAL (XEXP (slot, 1)) != i * 8)
3298 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3299 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3300 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3301 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3303 /* First ensure that we fit completely in registers. */
3306 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3307 TREE_TYPE (t) = TREE_TYPE (gpr);
3308 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3309 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3310 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3311 gimplify_and_add (t, pre_p);
3315 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3316 + REGPARM_MAX * 8, 0);
3317 TREE_TYPE (t) = TREE_TYPE (fpr);
3318 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3319 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3320 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3321 gimplify_and_add (t, pre_p);
3324 /* Compute index to start of area used for integer regs. */
3327 /* int_addr = gpr + sav; */
3328 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3329 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3330 gimplify_and_add (t, pre_p);
3334 /* sse_addr = fpr + sav; */
3335 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3336 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3337 gimplify_and_add (t, pre_p);
3342 tree temp = create_tmp_var (type, "va_arg_tmp");
3345 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3346 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3347 gimplify_and_add (t, pre_p);
3349 for (i = 0; i < XVECLEN (container, 0); i++)
3351 rtx slot = XVECEXP (container, 0, i);
3352 rtx reg = XEXP (slot, 0);
3353 enum machine_mode mode = GET_MODE (reg);
3354 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3355 tree addr_type = build_pointer_type (piece_type);
3358 tree dest_addr, dest;
3360 if (SSE_REGNO_P (REGNO (reg)))
3362 src_addr = sse_addr;
3363 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3367 src_addr = int_addr;
3368 src_offset = REGNO (reg) * 8;
3370 src_addr = fold_convert (addr_type, src_addr);
3371 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3372 size_int (src_offset)));
3373 src = build_fold_indirect_ref (src_addr);
3375 dest_addr = fold_convert (addr_type, addr);
3376 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3377 size_int (INTVAL (XEXP (slot, 1)))));
3378 dest = build_fold_indirect_ref (dest_addr);
3380 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3381 gimplify_and_add (t, pre_p);
3387 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3388 build_int_2 (needed_intregs * 8, 0));
3389 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3390 gimplify_and_add (t, pre_p);
3395 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3396 build_int_2 (needed_sseregs * 16, 0));
3397 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3398 gimplify_and_add (t, pre_p);
3401 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3402 gimplify_and_add (t, pre_p);
3404 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3405 append_to_statement_list (t, pre_p);
3408 /* ... otherwise out of the overflow area. */
3410 /* Care for on-stack alignment if needed. */
3411 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3415 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3416 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3417 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3419 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3421 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3422 gimplify_and_add (t2, pre_p);
3424 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3425 build_int_2 (rsize * UNITS_PER_WORD, 0));
3426 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3427 gimplify_and_add (t, pre_p);
3431 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3432 append_to_statement_list (t, pre_p);
3435 ptrtype = build_pointer_type (type);
3436 addr = fold_convert (ptrtype, addr);
3439 addr = build_fold_indirect_ref (addr);
3440 return build_fold_indirect_ref (addr);
3443 /* Return nonzero if OP is either a i387 or SSE fp register. */
3445 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3447 return ANY_FP_REG_P (op);
3450 /* Return nonzero if OP is an i387 fp register. */
3452 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3454 return FP_REG_P (op);
3457 /* Return nonzero if OP is a non-fp register_operand. */
3459 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3461 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3464 /* Return nonzero if OP is a register operand other than an
3465 i387 fp register. */
3467 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3469 return register_operand (op, mode) && !FP_REG_P (op);
3472 /* Return nonzero if OP is general operand representable on x86_64. */
3475 x86_64_general_operand (rtx op, enum machine_mode mode)
3478 return general_operand (op, mode);
3479 if (nonimmediate_operand (op, mode))
3481 return x86_64_sign_extended_value (op);
3484 /* Return nonzero if OP is general operand representable on x86_64
3485 as either sign extended or zero extended constant. */
3488 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3491 return general_operand (op, mode);
3492 if (nonimmediate_operand (op, mode))
3494 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3497 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3500 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3503 return nonmemory_operand (op, mode);
3504 if (register_operand (op, mode))
3506 return x86_64_sign_extended_value (op);
3509 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3512 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3514 if (!TARGET_64BIT || !flag_pic)
3515 return nonmemory_operand (op, mode);
3516 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3518 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3523 /* Return nonzero if OPNUM's MEM should be matched
3524 in movabs* patterns. */
3527 ix86_check_movabs (rtx insn, int opnum)
3531 set = PATTERN (insn);
3532 if (GET_CODE (set) == PARALLEL)
3533 set = XVECEXP (set, 0, 0);
3534 if (GET_CODE (set) != SET)
3536 mem = XEXP (set, opnum);
3537 while (GET_CODE (mem) == SUBREG)
3538 mem = SUBREG_REG (mem);
3539 if (GET_CODE (mem) != MEM)
3541 return (volatile_ok || !MEM_VOLATILE_P (mem));
3544 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3547 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3550 return nonmemory_operand (op, mode);
3551 if (register_operand (op, mode))
3553 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3556 /* Return nonzero if OP is immediate operand representable on x86_64. */
3559 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3562 return immediate_operand (op, mode);
3563 return x86_64_sign_extended_value (op);
3566 /* Return nonzero if OP is immediate operand representable on x86_64. */
3569 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3571 return x86_64_zero_extended_value (op);
3574 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3575 for shift & compare patterns, as shifting by 0 does not change flags),
3576 else return zero. */
3579 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3581 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3584 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3585 reference and a constant. */
3588 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3590 switch (GET_CODE (op))
3598 if (GET_CODE (op) == SYMBOL_REF
3599 || GET_CODE (op) == LABEL_REF
3600 || (GET_CODE (op) == UNSPEC
3601 && (XINT (op, 1) == UNSPEC_GOT
3602 || XINT (op, 1) == UNSPEC_GOTOFF
3603 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3605 if (GET_CODE (op) != PLUS
3606 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3610 if (GET_CODE (op) == SYMBOL_REF
3611 || GET_CODE (op) == LABEL_REF)
3613 /* Only @GOTOFF gets offsets. */
3614 if (GET_CODE (op) != UNSPEC
3615 || XINT (op, 1) != UNSPEC_GOTOFF)
3618 op = XVECEXP (op, 0, 0);
3619 if (GET_CODE (op) == SYMBOL_REF
3620 || GET_CODE (op) == LABEL_REF)
3629 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3632 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3634 if (GET_CODE (op) != CONST)
3639 if (GET_CODE (op) == UNSPEC
3640 && XINT (op, 1) == UNSPEC_GOTPCREL)
3642 if (GET_CODE (op) == PLUS
3643 && GET_CODE (XEXP (op, 0)) == UNSPEC
3644 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3649 if (GET_CODE (op) == UNSPEC)
3651 if (GET_CODE (op) != PLUS
3652 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3655 if (GET_CODE (op) == UNSPEC)
3661 /* Return true if OP is a symbolic operand that resolves locally. */
3664 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3666 if (GET_CODE (op) == CONST
3667 && GET_CODE (XEXP (op, 0)) == PLUS
3668 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3669 op = XEXP (XEXP (op, 0), 0);
3671 if (GET_CODE (op) == LABEL_REF)
3674 if (GET_CODE (op) != SYMBOL_REF)
3677 if (SYMBOL_REF_LOCAL_P (op))
3680 /* There is, however, a not insubstantial body of code in the rest of
3681 the compiler that assumes it can just stick the results of
3682 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3683 /* ??? This is a hack. Should update the body of the compiler to
3684 always create a DECL an invoke targetm.encode_section_info. */
3685 if (strncmp (XSTR (op, 0), internal_label_prefix,
3686 internal_label_prefix_len) == 0)
3692 /* Test for various thread-local symbols. */
3695 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3697 if (GET_CODE (op) != SYMBOL_REF)
3699 return SYMBOL_REF_TLS_MODEL (op);
3703 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3705 if (GET_CODE (op) != SYMBOL_REF)
3707 return SYMBOL_REF_TLS_MODEL (op) == kind;
3711 global_dynamic_symbolic_operand (rtx op,
3712 enum machine_mode mode ATTRIBUTE_UNUSED)
3714 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3718 local_dynamic_symbolic_operand (rtx op,
3719 enum machine_mode mode ATTRIBUTE_UNUSED)
3721 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3725 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3727 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3731 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3733 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3736 /* Test for a valid operand for a call instruction. Don't allow the
3737 arg pointer register or virtual regs since they may decay into
3738 reg + const, which the patterns can't handle. */
3741 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3743 /* Disallow indirect through a virtual register. This leads to
3744 compiler aborts when trying to eliminate them. */
3745 if (GET_CODE (op) == REG
3746 && (op == arg_pointer_rtx
3747 || op == frame_pointer_rtx
3748 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3749 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3752 /* Disallow `call 1234'. Due to varying assembler lameness this
3753 gets either rejected or translated to `call .+1234'. */
3754 if (GET_CODE (op) == CONST_INT)
3757 /* Explicitly allow SYMBOL_REF even if pic. */
3758 if (GET_CODE (op) == SYMBOL_REF)
3761 /* Otherwise we can allow any general_operand in the address. */
3762 return general_operand (op, Pmode);
3765 /* Test for a valid operand for a call instruction. Don't allow the
3766 arg pointer register or virtual regs since they may decay into
3767 reg + const, which the patterns can't handle. */
3770 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3772 /* Disallow indirect through a virtual register. This leads to
3773 compiler aborts when trying to eliminate them. */
3774 if (GET_CODE (op) == REG
3775 && (op == arg_pointer_rtx
3776 || op == frame_pointer_rtx
3777 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3778 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3781 /* Explicitly allow SYMBOL_REF even if pic. */
3782 if (GET_CODE (op) == SYMBOL_REF)
3785 /* Otherwise we can only allow register operands. */
3786 return register_operand (op, Pmode);
3790 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3792 if (GET_CODE (op) == CONST
3793 && GET_CODE (XEXP (op, 0)) == PLUS
3794 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3795 op = XEXP (XEXP (op, 0), 0);
3796 return GET_CODE (op) == SYMBOL_REF;
3799 /* Match exactly zero and one. */
3802 const0_operand (rtx op, enum machine_mode mode)
3804 return op == CONST0_RTX (mode);
3808 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3810 return op == const1_rtx;
3813 /* Match 2, 4, or 8. Used for leal multiplicands. */
3816 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3818 return (GET_CODE (op) == CONST_INT
3819 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3823 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3825 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3829 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3831 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3835 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3837 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3841 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3843 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3847 /* True if this is a constant appropriate for an increment or decrement. */
3850 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3852 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3853 registers, since carry flag is not set. */
3854 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3856 return op == const1_rtx || op == constm1_rtx;
3859 /* Return nonzero if OP is acceptable as operand of DImode shift
3863 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3866 return nonimmediate_operand (op, mode);
3868 return register_operand (op, mode);
3871 /* Return false if this is the stack pointer, or any other fake
3872 register eliminable to the stack pointer. Otherwise, this is
3875 This is used to prevent esp from being used as an index reg.
3876 Which would only happen in pathological cases. */
3879 reg_no_sp_operand (rtx op, enum machine_mode mode)
3882 if (GET_CODE (t) == SUBREG)
3884 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3887 return register_operand (op, mode);
3891 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3893 return MMX_REG_P (op);
3896 /* Return false if this is any eliminable register. Otherwise
3900 general_no_elim_operand (rtx op, enum machine_mode mode)
3903 if (GET_CODE (t) == SUBREG)
3905 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3906 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3907 || t == virtual_stack_dynamic_rtx)
3910 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3911 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3914 return general_operand (op, mode);
3917 /* Return false if this is any eliminable register. Otherwise
3918 register_operand or const_int. */
3921 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3924 if (GET_CODE (t) == SUBREG)
3926 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3927 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3928 || t == virtual_stack_dynamic_rtx)
3931 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3934 /* Return false if this is any eliminable register or stack register,
3935 otherwise work like register_operand. */
3938 index_register_operand (rtx op, enum machine_mode mode)
3941 if (GET_CODE (t) == SUBREG)
3945 if (t == arg_pointer_rtx
3946 || t == frame_pointer_rtx
3947 || t == virtual_incoming_args_rtx
3948 || t == virtual_stack_vars_rtx
3949 || t == virtual_stack_dynamic_rtx
3950 || REGNO (t) == STACK_POINTER_REGNUM)
3953 return general_operand (op, mode);
3956 /* Return true if op is a Q_REGS class register. */
3959 q_regs_operand (rtx op, enum machine_mode mode)
3961 if (mode != VOIDmode && GET_MODE (op) != mode)
3963 if (GET_CODE (op) == SUBREG)
3964 op = SUBREG_REG (op);
3965 return ANY_QI_REG_P (op);
3968 /* Return true if op is an flags register. */
3971 flags_reg_operand (rtx op, enum machine_mode mode)
3973 if (mode != VOIDmode && GET_MODE (op) != mode)
3975 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3978 /* Return true if op is a NON_Q_REGS class register. */
3981 non_q_regs_operand (rtx op, enum machine_mode mode)
3983 if (mode != VOIDmode && GET_MODE (op) != mode)
3985 if (GET_CODE (op) == SUBREG)
3986 op = SUBREG_REG (op);
3987 return NON_QI_REG_P (op);
3991 zero_extended_scalar_load_operand (rtx op,
3992 enum machine_mode mode ATTRIBUTE_UNUSED)
3995 if (GET_CODE (op) != MEM)
3997 op = maybe_get_pool_constant (op);
4000 if (GET_CODE (op) != CONST_VECTOR)
4003 (GET_MODE_SIZE (GET_MODE (op)) /
4004 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4005 for (n_elts--; n_elts > 0; n_elts--)
4007 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4008 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4014 /* Return 1 when OP is operand acceptable for standard SSE move. */
4016 vector_move_operand (rtx op, enum machine_mode mode)
4018 if (nonimmediate_operand (op, mode))
4020 if (GET_MODE (op) != mode && mode != VOIDmode)
4022 return (op == CONST0_RTX (GET_MODE (op)));
4025 /* Return true if op if a valid address, and does not contain
4026 a segment override. */
4029 no_seg_address_operand (rtx op, enum machine_mode mode)
4031 struct ix86_address parts;
4033 if (! address_operand (op, mode))
4036 if (! ix86_decompose_address (op, &parts))
4039 return parts.seg == SEG_DEFAULT;
4042 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4045 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4047 enum rtx_code code = GET_CODE (op);
4050 /* Operations supported directly. */
4060 /* These are equivalent to ones above in non-IEEE comparisons. */
4067 return !TARGET_IEEE_FP;
4072 /* Return 1 if OP is a valid comparison operator in valid mode. */
4074 ix86_comparison_operator (rtx op, enum machine_mode mode)
4076 enum machine_mode inmode;
4077 enum rtx_code code = GET_CODE (op);
4078 if (mode != VOIDmode && GET_MODE (op) != mode)
4080 if (!COMPARISON_P (op))
4082 inmode = GET_MODE (XEXP (op, 0));
4084 if (inmode == CCFPmode || inmode == CCFPUmode)
4086 enum rtx_code second_code, bypass_code;
4087 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4088 return (bypass_code == NIL && second_code == NIL);
4095 if (inmode == CCmode || inmode == CCGCmode
4096 || inmode == CCGOCmode || inmode == CCNOmode)
4099 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4100 if (inmode == CCmode)
4104 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4112 /* Return 1 if OP is a valid comparison operator testing carry flag
4115 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4117 enum machine_mode inmode;
4118 enum rtx_code code = GET_CODE (op);
4120 if (mode != VOIDmode && GET_MODE (op) != mode)
4122 if (!COMPARISON_P (op))
4124 inmode = GET_MODE (XEXP (op, 0));
4125 if (GET_CODE (XEXP (op, 0)) != REG
4126 || REGNO (XEXP (op, 0)) != 17
4127 || XEXP (op, 1) != const0_rtx)
4130 if (inmode == CCFPmode || inmode == CCFPUmode)
4132 enum rtx_code second_code, bypass_code;
4134 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4135 if (bypass_code != NIL || second_code != NIL)
4137 code = ix86_fp_compare_code_to_integer (code);
4139 else if (inmode != CCmode)
4144 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4147 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4149 enum machine_mode inmode;
4150 enum rtx_code code = GET_CODE (op);
4152 if (mode != VOIDmode && GET_MODE (op) != mode)
4154 if (!COMPARISON_P (op))
4156 inmode = GET_MODE (XEXP (op, 0));
4157 if (inmode == CCFPmode || inmode == CCFPUmode)
4159 enum rtx_code second_code, bypass_code;
4161 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4162 if (bypass_code != NIL || second_code != NIL)
4164 code = ix86_fp_compare_code_to_integer (code);
4166 /* i387 supports just limited amount of conditional codes. */
4169 case LTU: case GTU: case LEU: case GEU:
4170 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4173 case ORDERED: case UNORDERED:
4181 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4184 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4186 switch (GET_CODE (op))
4189 /* Modern CPUs have same latency for HImode and SImode multiply,
4190 but 386 and 486 do HImode multiply faster. */
4191 return ix86_tune > PROCESSOR_I486;
4203 /* Nearly general operand, but accept any const_double, since we wish
4204 to be able to drop them into memory rather than have them get pulled
4208 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4210 if (mode != VOIDmode && mode != GET_MODE (op))
4212 if (GET_CODE (op) == CONST_DOUBLE)
4214 return general_operand (op, mode);
4217 /* Match an SI or HImode register for a zero_extract. */
4220 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4223 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4224 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4227 if (!register_operand (op, VOIDmode))
4230 /* Be careful to accept only registers having upper parts. */
4231 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4232 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4235 /* Return 1 if this is a valid binary floating-point operation.
4236 OP is the expression matched, and MODE is its mode. */
4239 binary_fp_operator (rtx op, enum machine_mode mode)
4241 if (mode != VOIDmode && mode != GET_MODE (op))
4244 switch (GET_CODE (op))
4250 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4258 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4260 return GET_CODE (op) == MULT;
4264 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4266 return GET_CODE (op) == DIV;
4270 arith_or_logical_operator (rtx op, enum machine_mode mode)
4272 return ((mode == VOIDmode || GET_MODE (op) == mode)
4273 && ARITHMETIC_P (op));
4276 /* Returns 1 if OP is memory operand with a displacement. */
4279 memory_displacement_operand (rtx op, enum machine_mode mode)
4281 struct ix86_address parts;
4283 if (! memory_operand (op, mode))
4286 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4289 return parts.disp != NULL_RTX;
4292 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4293 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4295 ??? It seems likely that this will only work because cmpsi is an
4296 expander, and no actual insns use this. */
4299 cmpsi_operand (rtx op, enum machine_mode mode)
4301 if (nonimmediate_operand (op, mode))
4304 if (GET_CODE (op) == AND
4305 && GET_MODE (op) == SImode
4306 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4307 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4308 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4309 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4310 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4311 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4317 /* Returns 1 if OP is memory operand that can not be represented by the
4321 long_memory_operand (rtx op, enum machine_mode mode)
4323 if (! memory_operand (op, mode))
4326 return memory_address_length (op) != 0;
4329 /* Return nonzero if the rtx is known aligned. */
4332 aligned_operand (rtx op, enum machine_mode mode)
4334 struct ix86_address parts;
4336 if (!general_operand (op, mode))
4339 /* Registers and immediate operands are always "aligned". */
4340 if (GET_CODE (op) != MEM)
4343 /* Don't even try to do any aligned optimizations with volatiles. */
4344 if (MEM_VOLATILE_P (op))
4349 /* Pushes and pops are only valid on the stack pointer. */
4350 if (GET_CODE (op) == PRE_DEC
4351 || GET_CODE (op) == POST_INC)
4354 /* Decode the address. */
4355 if (! ix86_decompose_address (op, &parts))
4358 /* Look for some component that isn't known to be aligned. */
4362 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4367 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4372 if (GET_CODE (parts.disp) != CONST_INT
4373 || (INTVAL (parts.disp) & 3) != 0)
4377 /* Didn't find one -- this must be an aligned address. */
4381 /* Initialize the table of extra 80387 mathematical constants. */
4384 init_ext_80387_constants (void)
4386 static const char * cst[5] =
4388 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4389 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4390 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4391 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4392 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4396 for (i = 0; i < 5; i++)
4398 real_from_string (&ext_80387_constants_table[i], cst[i]);
4399 /* Ensure each constant is rounded to XFmode precision. */
4400 real_convert (&ext_80387_constants_table[i],
4401 XFmode, &ext_80387_constants_table[i]);
4404 ext_80387_constants_init = 1;
4407 /* Return true if the constant is something that can be loaded with
4408 a special instruction. */
4411 standard_80387_constant_p (rtx x)
4413 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4416 if (x == CONST0_RTX (GET_MODE (x)))
4418 if (x == CONST1_RTX (GET_MODE (x)))
4421 /* For XFmode constants, try to find a special 80387 instruction when
4422 optimizing for size or on those CPUs that benefit from them. */
4423 if (GET_MODE (x) == XFmode
4424 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4429 if (! ext_80387_constants_init)
4430 init_ext_80387_constants ();
4432 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4433 for (i = 0; i < 5; i++)
4434 if (real_identical (&r, &ext_80387_constants_table[i]))
4441 /* Return the opcode of the special instruction to be used to load
4445 standard_80387_constant_opcode (rtx x)
4447 switch (standard_80387_constant_p (x))
4467 /* Return the CONST_DOUBLE representing the 80387 constant that is
4468 loaded by the specified special instruction. The argument IDX
4469 matches the return value from standard_80387_constant_p. */
4472 standard_80387_constant_rtx (int idx)
4476 if (! ext_80387_constants_init)
4477 init_ext_80387_constants ();
4493 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4497 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4500 standard_sse_constant_p (rtx x)
4502 if (x == const0_rtx)
4504 return (x == CONST0_RTX (GET_MODE (x)));
4507 /* Returns 1 if OP contains a symbol reference */
4510 symbolic_reference_mentioned_p (rtx op)
4515 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4518 fmt = GET_RTX_FORMAT (GET_CODE (op));
4519 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4525 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4526 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4530 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4537 /* Return 1 if it is appropriate to emit `ret' instructions in the
4538 body of a function. Do this only if the epilogue is simple, needing a
4539 couple of insns. Prior to reloading, we can't tell how many registers
4540 must be saved, so return 0 then. Return 0 if there is no frame
4541 marker to de-allocate.
4543 If NON_SAVING_SETJMP is defined and true, then it is not possible
4544 for the epilogue to be simple, so return 0. This is a special case
4545 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4546 until final, but jump_optimize may need to know sooner if a
4550 ix86_can_use_return_insn_p (void)
4552 struct ix86_frame frame;
4554 #ifdef NON_SAVING_SETJMP
4555 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4559 if (! reload_completed || frame_pointer_needed)
4562 /* Don't allow more than 32 pop, since that's all we can do
4563 with one instruction. */
4564 if (current_function_pops_args
4565 && current_function_args_size >= 32768)
4568 ix86_compute_frame_layout (&frame);
4569 return frame.to_allocate == 0 && frame.nregs == 0;
4572 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4574 x86_64_sign_extended_value (rtx value)
4576 switch (GET_CODE (value))
4578 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4579 to be at least 32 and this all acceptable constants are
4580 represented as CONST_INT. */
4582 if (HOST_BITS_PER_WIDE_INT == 32)
4586 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4587 return trunc_int_for_mode (val, SImode) == val;
4591 /* For certain code models, the symbolic references are known to fit.
4592 in CM_SMALL_PIC model we know it fits if it is local to the shared
4593 library. Don't count TLS SYMBOL_REFs here, since they should fit
4594 only if inside of UNSPEC handled below. */
4596 /* TLS symbols are not constant. */
4597 if (tls_symbolic_operand (value, Pmode))
4599 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4601 /* For certain code models, the code is near as well. */
4603 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4604 || ix86_cmodel == CM_KERNEL);
4606 /* We also may accept the offsetted memory references in certain special
4609 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4610 switch (XINT (XEXP (value, 0), 1))
4612 case UNSPEC_GOTPCREL:
4614 case UNSPEC_GOTNTPOFF:
4620 if (GET_CODE (XEXP (value, 0)) == PLUS)
4622 rtx op1 = XEXP (XEXP (value, 0), 0);
4623 rtx op2 = XEXP (XEXP (value, 0), 1);
4624 HOST_WIDE_INT offset;
4626 if (ix86_cmodel == CM_LARGE)
4628 if (GET_CODE (op2) != CONST_INT)
4630 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4631 switch (GET_CODE (op1))
4634 /* For CM_SMALL assume that latest object is 16MB before
4635 end of 31bits boundary. We may also accept pretty
4636 large negative constants knowing that all objects are
4637 in the positive half of address space. */
4638 if (ix86_cmodel == CM_SMALL
4639 && offset < 16*1024*1024
4640 && trunc_int_for_mode (offset, SImode) == offset)
4642 /* For CM_KERNEL we know that all object resist in the
4643 negative half of 32bits address space. We may not
4644 accept negative offsets, since they may be just off
4645 and we may accept pretty large positive ones. */
4646 if (ix86_cmodel == CM_KERNEL
4648 && trunc_int_for_mode (offset, SImode) == offset)
4652 /* These conditions are similar to SYMBOL_REF ones, just the
4653 constraints for code models differ. */
4654 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4655 && offset < 16*1024*1024
4656 && trunc_int_for_mode (offset, SImode) == offset)
4658 if (ix86_cmodel == CM_KERNEL
4660 && trunc_int_for_mode (offset, SImode) == offset)
4664 switch (XINT (op1, 1))
4669 && trunc_int_for_mode (offset, SImode) == offset)
4683 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4685 x86_64_zero_extended_value (rtx value)
4687 switch (GET_CODE (value))
4690 if (HOST_BITS_PER_WIDE_INT == 32)
4691 return (GET_MODE (value) == VOIDmode
4692 && !CONST_DOUBLE_HIGH (value));
4696 if (HOST_BITS_PER_WIDE_INT == 32)
4697 return INTVAL (value) >= 0;
4699 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4702 /* For certain code models, the symbolic references are known to fit. */
4704 /* TLS symbols are not constant. */
4705 if (tls_symbolic_operand (value, Pmode))
4707 return ix86_cmodel == CM_SMALL;
4709 /* For certain code models, the code is near as well. */
4711 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4713 /* We also may accept the offsetted memory references in certain special
4716 if (GET_CODE (XEXP (value, 0)) == PLUS)
4718 rtx op1 = XEXP (XEXP (value, 0), 0);
4719 rtx op2 = XEXP (XEXP (value, 0), 1);
4721 if (ix86_cmodel == CM_LARGE)
4723 switch (GET_CODE (op1))
4727 /* For small code model we may accept pretty large positive
4728 offsets, since one bit is available for free. Negative
4729 offsets are limited by the size of NULL pointer area
4730 specified by the ABI. */
4731 if (ix86_cmodel == CM_SMALL
4732 && GET_CODE (op2) == CONST_INT
4733 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4734 && (trunc_int_for_mode (INTVAL (op2), SImode)
4737 /* ??? For the kernel, we may accept adjustment of
4738 -0x10000000, since we know that it will just convert
4739 negative address space to positive, but perhaps this
4740 is not worthwhile. */
4743 /* These conditions are similar to SYMBOL_REF ones, just the
4744 constraints for code models differ. */
4745 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4746 && GET_CODE (op2) == CONST_INT
4747 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4748 && (trunc_int_for_mode (INTVAL (op2), SImode)
4762 /* Value should be nonzero if functions must have frame pointers.
4763 Zero means the frame pointer need not be set up (and parms may
4764 be accessed via the stack pointer) in functions that seem suitable. */
4767 ix86_frame_pointer_required (void)
4769 /* If we accessed previous frames, then the generated code expects
4770 to be able to access the saved ebp value in our frame. */
4771 if (cfun->machine->accesses_prev_frame)
4774 /* Several x86 os'es need a frame pointer for other reasons,
4775 usually pertaining to setjmp. */
4776 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4779 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4780 the frame pointer by default. Turn it back on now if we've not
4781 got a leaf function. */
4782 if (TARGET_OMIT_LEAF_FRAME_POINTER
4783 && (!current_function_is_leaf))
4786 if (current_function_profile)
4792 /* Record that the current function accesses previous call frames. */
4795 ix86_setup_frame_addresses (void)
4797 cfun->machine->accesses_prev_frame = 1;
4800 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4801 # define USE_HIDDEN_LINKONCE 1
4803 # define USE_HIDDEN_LINKONCE 0
4806 static int pic_labels_used;
4808 /* Fills in the label name that should be used for a pc thunk for
4809 the given register. */
4812 get_pc_thunk_name (char name[32], unsigned int regno)
4814 if (USE_HIDDEN_LINKONCE)
4815 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4817 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4821 /* This function generates code for -fpic that loads %ebx with
4822 the return address of the caller and then returns. */
4825 ix86_file_end (void)
4830 for (regno = 0; regno < 8; ++regno)
4834 if (! ((pic_labels_used >> regno) & 1))
4837 get_pc_thunk_name (name, regno);
4839 if (USE_HIDDEN_LINKONCE)
4843 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4845 TREE_PUBLIC (decl) = 1;
4846 TREE_STATIC (decl) = 1;
4847 DECL_ONE_ONLY (decl) = 1;
4849 (*targetm.asm_out.unique_section) (decl, 0);
4850 named_section (decl, NULL, 0);
4852 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4853 fputs ("\t.hidden\t", asm_out_file);
4854 assemble_name (asm_out_file, name);
4855 fputc ('\n', asm_out_file);
4856 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4861 ASM_OUTPUT_LABEL (asm_out_file, name);
4864 xops[0] = gen_rtx_REG (SImode, regno);
4865 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4866 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4867 output_asm_insn ("ret", xops);
4870 if (NEED_INDICATE_EXEC_STACK)
4871 file_end_indicate_exec_stack ();
4874 /* Emit code for the SET_GOT patterns. */
4877 output_set_got (rtx dest)
4882 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4884 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4886 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4889 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4891 output_asm_insn ("call\t%a2", xops);
4894 /* Output the "canonical" label name ("Lxx$pb") here too. This
4895 is what will be referred to by the Mach-O PIC subsystem. */
4896 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4898 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4899 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4902 output_asm_insn ("pop{l}\t%0", xops);
4907 get_pc_thunk_name (name, REGNO (dest));
4908 pic_labels_used |= 1 << REGNO (dest);
4910 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4911 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4912 output_asm_insn ("call\t%X2", xops);
4915 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4916 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4917 else if (!TARGET_MACHO)
4918 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4923 /* Generate an "push" pattern for input ARG. */
4928 return gen_rtx_SET (VOIDmode,
4930 gen_rtx_PRE_DEC (Pmode,
4931 stack_pointer_rtx)),
4935 /* Return >= 0 if there is an unused call-clobbered register available
4936 for the entire function. */
4939 ix86_select_alt_pic_regnum (void)
4941 if (current_function_is_leaf && !current_function_profile)
4944 for (i = 2; i >= 0; --i)
4945 if (!regs_ever_live[i])
4949 return INVALID_REGNUM;
4952 /* Return 1 if we need to save REGNO. */
4954 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4956 if (pic_offset_table_rtx
4957 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4958 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4959 || current_function_profile
4960 || current_function_calls_eh_return
4961 || current_function_uses_const_pool))
4963 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4968 if (current_function_calls_eh_return && maybe_eh_return)
4973 unsigned test = EH_RETURN_DATA_REGNO (i);
4974 if (test == INVALID_REGNUM)
4981 return (regs_ever_live[regno]
4982 && !call_used_regs[regno]
4983 && !fixed_regs[regno]
4984 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4987 /* Return number of registers to be saved on the stack. */
4990 ix86_nsaved_regs (void)
4995 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4996 if (ix86_save_reg (regno, true))
5001 /* Return the offset between two registers, one to be eliminated, and the other
5002 its replacement, at the start of a routine. */
5005 ix86_initial_elimination_offset (int from, int to)
5007 struct ix86_frame frame;
5008 ix86_compute_frame_layout (&frame);
5010 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5011 return frame.hard_frame_pointer_offset;
5012 else if (from == FRAME_POINTER_REGNUM
5013 && to == HARD_FRAME_POINTER_REGNUM)
5014 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5017 if (to != STACK_POINTER_REGNUM)
5019 else if (from == ARG_POINTER_REGNUM)
5020 return frame.stack_pointer_offset;
5021 else if (from != FRAME_POINTER_REGNUM)
5024 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5028 /* Fill structure ix86_frame about frame of currently computed function. */
5031 ix86_compute_frame_layout (struct ix86_frame *frame)
5033 HOST_WIDE_INT total_size;
5034 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5035 HOST_WIDE_INT offset;
5036 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5037 HOST_WIDE_INT size = get_frame_size ();
5039 frame->nregs = ix86_nsaved_regs ();
5042 /* During reload iteration the amount of registers saved can change.
5043 Recompute the value as needed. Do not recompute when amount of registers
5044 didn't change as reload does mutiple calls to the function and does not
5045 expect the decision to change within single iteration. */
5047 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5049 int count = frame->nregs;
5051 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5052 /* The fast prologue uses move instead of push to save registers. This
5053 is significantly longer, but also executes faster as modern hardware
5054 can execute the moves in parallel, but can't do that for push/pop.
5056 Be careful about choosing what prologue to emit: When function takes
5057 many instructions to execute we may use slow version as well as in
5058 case function is known to be outside hot spot (this is known with
5059 feedback only). Weight the size of function by number of registers
5060 to save as it is cheap to use one or two push instructions but very
5061 slow to use many of them. */
5063 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5064 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5065 || (flag_branch_probabilities
5066 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5067 cfun->machine->use_fast_prologue_epilogue = false;
5069 cfun->machine->use_fast_prologue_epilogue
5070 = !expensive_function_p (count);
5072 if (TARGET_PROLOGUE_USING_MOVE
5073 && cfun->machine->use_fast_prologue_epilogue)
5074 frame->save_regs_using_mov = true;
5076 frame->save_regs_using_mov = false;
5079 /* Skip return address and saved base pointer. */
5080 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5082 frame->hard_frame_pointer_offset = offset;
5084 /* Do some sanity checking of stack_alignment_needed and
5085 preferred_alignment, since i386 port is the only using those features
5086 that may break easily. */
5088 if (size && !stack_alignment_needed)
5090 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5092 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5094 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5097 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5098 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5100 /* Register save area */
5101 offset += frame->nregs * UNITS_PER_WORD;
5104 if (ix86_save_varrargs_registers)
5106 offset += X86_64_VARARGS_SIZE;
5107 frame->va_arg_size = X86_64_VARARGS_SIZE;
5110 frame->va_arg_size = 0;
5112 /* Align start of frame for local function. */
5113 frame->padding1 = ((offset + stack_alignment_needed - 1)
5114 & -stack_alignment_needed) - offset;
5116 offset += frame->padding1;
5118 /* Frame pointer points here. */
5119 frame->frame_pointer_offset = offset;
5123 /* Add outgoing arguments area. Can be skipped if we eliminated
5124 all the function calls as dead code.
5125 Skipping is however impossible when function calls alloca. Alloca
5126 expander assumes that last current_function_outgoing_args_size
5127 of stack frame are unused. */
5128 if (ACCUMULATE_OUTGOING_ARGS
5129 && (!current_function_is_leaf || current_function_calls_alloca))
5131 offset += current_function_outgoing_args_size;
5132 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5135 frame->outgoing_arguments_size = 0;
5137 /* Align stack boundary. Only needed if we're calling another function
5139 if (!current_function_is_leaf || current_function_calls_alloca)
5140 frame->padding2 = ((offset + preferred_alignment - 1)
5141 & -preferred_alignment) - offset;
5143 frame->padding2 = 0;
5145 offset += frame->padding2;
5147 /* We've reached end of stack frame. */
5148 frame->stack_pointer_offset = offset;
5150 /* Size prologue needs to allocate. */
5151 frame->to_allocate =
5152 (size + frame->padding1 + frame->padding2
5153 + frame->outgoing_arguments_size + frame->va_arg_size);
5155 if ((!frame->to_allocate && frame->nregs <= 1)
5156 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5157 frame->save_regs_using_mov = false;
5159 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5160 && current_function_is_leaf)
5162 frame->red_zone_size = frame->to_allocate;
5163 if (frame->save_regs_using_mov)
5164 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5165 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5166 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5169 frame->red_zone_size = 0;
5170 frame->to_allocate -= frame->red_zone_size;
5171 frame->stack_pointer_offset -= frame->red_zone_size;
5173 fprintf (stderr, "nregs: %i\n", frame->nregs);
5174 fprintf (stderr, "size: %i\n", size);
5175 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5176 fprintf (stderr, "padding1: %i\n", frame->padding1);
5177 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5178 fprintf (stderr, "padding2: %i\n", frame->padding2);
5179 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5180 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5181 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5182 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5183 frame->hard_frame_pointer_offset);
5184 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5188 /* Emit code to save registers in the prologue. */
5191 ix86_emit_save_regs (void)
5196 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5197 if (ix86_save_reg (regno, true))
5199 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5200 RTX_FRAME_RELATED_P (insn) = 1;
5204 /* Emit code to save registers using MOV insns. First register
5205 is restored from POINTER + OFFSET. */
5207 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5212 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5213 if (ix86_save_reg (regno, true))
5215 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5217 gen_rtx_REG (Pmode, regno));
5218 RTX_FRAME_RELATED_P (insn) = 1;
5219 offset += UNITS_PER_WORD;
5223 /* Expand prologue or epilogue stack adjustment.
5224 The pattern exist to put a dependency on all ebp-based memory accesses.
5225 STYLE should be negative if instructions should be marked as frame related,
5226 zero if %r11 register is live and cannot be freely used and positive
5230 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5235 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5236 else if (x86_64_immediate_operand (offset, DImode))
5237 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5241 /* r11 is used by indirect sibcall return as well, set before the
5242 epilogue and used after the epilogue. ATM indirect sibcall
5243 shouldn't be used together with huge frame sizes in one
5244 function because of the frame_size check in sibcall.c. */
5247 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5248 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5250 RTX_FRAME_RELATED_P (insn) = 1;
5251 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5255 RTX_FRAME_RELATED_P (insn) = 1;
5258 /* Expand the prologue into a bunch of separate insns. */
5261 ix86_expand_prologue (void)
5265 struct ix86_frame frame;
5266 HOST_WIDE_INT allocate;
5268 ix86_compute_frame_layout (&frame);
5270 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5271 slower on all targets. Also sdb doesn't like it. */
5273 if (frame_pointer_needed)
5275 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5276 RTX_FRAME_RELATED_P (insn) = 1;
5278 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5279 RTX_FRAME_RELATED_P (insn) = 1;
5282 allocate = frame.to_allocate;
5284 if (!frame.save_regs_using_mov)
5285 ix86_emit_save_regs ();
5287 allocate += frame.nregs * UNITS_PER_WORD;
5289 /* When using red zone we may start register saving before allocating
5290 the stack frame saving one cycle of the prologue. */
5291 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5292 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5293 : stack_pointer_rtx,
5294 -frame.nregs * UNITS_PER_WORD);
5298 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5299 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5300 GEN_INT (-allocate), -1);
5303 /* Only valid for Win32. */
5304 rtx eax = gen_rtx_REG (SImode, 0);
5305 bool eax_live = ix86_eax_live_at_start_p ();
5312 emit_insn (gen_push (eax));
5316 insn = emit_move_insn (eax, GEN_INT (allocate));
5317 RTX_FRAME_RELATED_P (insn) = 1;
5319 insn = emit_insn (gen_allocate_stack_worker (eax));
5320 RTX_FRAME_RELATED_P (insn) = 1;
5324 rtx t = plus_constant (stack_pointer_rtx, allocate);
5325 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5329 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5331 if (!frame_pointer_needed || !frame.to_allocate)
5332 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5334 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5335 -frame.nregs * UNITS_PER_WORD);
5338 pic_reg_used = false;
5339 if (pic_offset_table_rtx
5340 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5341 || current_function_profile))
5343 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5345 if (alt_pic_reg_used != INVALID_REGNUM)
5346 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5348 pic_reg_used = true;
5353 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5355 /* Even with accurate pre-reload life analysis, we can wind up
5356 deleting all references to the pic register after reload.
5357 Consider if cross-jumping unifies two sides of a branch
5358 controlled by a comparison vs the only read from a global.
5359 In which case, allow the set_got to be deleted, though we're
5360 too late to do anything about the ebx save in the prologue. */
5361 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5364 /* Prevent function calls from be scheduled before the call to mcount.
5365 In the pic_reg_used case, make sure that the got load isn't deleted. */
5366 if (current_function_profile)
5367 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5370 /* Emit code to restore saved registers using MOV insns. First register
5371 is restored from POINTER + OFFSET. */
5373 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5374 int maybe_eh_return)
5377 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5379 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5380 if (ix86_save_reg (regno, maybe_eh_return))
5382 /* Ensure that adjust_address won't be forced to produce pointer
5383 out of range allowed by x86-64 instruction set. */
5384 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5388 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5389 emit_move_insn (r11, GEN_INT (offset));
5390 emit_insn (gen_adddi3 (r11, r11, pointer));
5391 base_address = gen_rtx_MEM (Pmode, r11);
5394 emit_move_insn (gen_rtx_REG (Pmode, regno),
5395 adjust_address (base_address, Pmode, offset));
5396 offset += UNITS_PER_WORD;
5400 /* Restore function stack, frame, and registers. */
5403 ix86_expand_epilogue (int style)
5406 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5407 struct ix86_frame frame;
5408 HOST_WIDE_INT offset;
5410 ix86_compute_frame_layout (&frame);
5412 /* Calculate start of saved registers relative to ebp. Special care
5413 must be taken for the normal return case of a function using
5414 eh_return: the eax and edx registers are marked as saved, but not
5415 restored along this path. */
5416 offset = frame.nregs;
5417 if (current_function_calls_eh_return && style != 2)
5419 offset *= -UNITS_PER_WORD;
5421 /* If we're only restoring one register and sp is not valid then
5422 using a move instruction to restore the register since it's
5423 less work than reloading sp and popping the register.
5425 The default code result in stack adjustment using add/lea instruction,
5426 while this code results in LEAVE instruction (or discrete equivalent),
5427 so it is profitable in some other cases as well. Especially when there
5428 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5429 and there is exactly one register to pop. This heuristic may need some
5430 tuning in future. */
5431 if ((!sp_valid && frame.nregs <= 1)
5432 || (TARGET_EPILOGUE_USING_MOVE
5433 && cfun->machine->use_fast_prologue_epilogue
5434 && (frame.nregs > 1 || frame.to_allocate))
5435 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5436 || (frame_pointer_needed && TARGET_USE_LEAVE
5437 && cfun->machine->use_fast_prologue_epilogue
5438 && frame.nregs == 1)
5439 || current_function_calls_eh_return)
5441 /* Restore registers. We can use ebp or esp to address the memory
5442 locations. If both are available, default to ebp, since offsets
5443 are known to be small. Only exception is esp pointing directly to the
5444 end of block of saved registers, where we may simplify addressing
5447 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5448 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5449 frame.to_allocate, style == 2);
5451 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5452 offset, style == 2);
5454 /* eh_return epilogues need %ecx added to the stack pointer. */
5457 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5459 if (frame_pointer_needed)
5461 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5462 tmp = plus_constant (tmp, UNITS_PER_WORD);
5463 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5465 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5466 emit_move_insn (hard_frame_pointer_rtx, tmp);
5468 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5473 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5474 tmp = plus_constant (tmp, (frame.to_allocate
5475 + frame.nregs * UNITS_PER_WORD));
5476 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5479 else if (!frame_pointer_needed)
5480 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5481 GEN_INT (frame.to_allocate
5482 + frame.nregs * UNITS_PER_WORD),
5484 /* If not an i386, mov & pop is faster than "leave". */
5485 else if (TARGET_USE_LEAVE || optimize_size
5486 || !cfun->machine->use_fast_prologue_epilogue)
5487 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5490 pro_epilogue_adjust_stack (stack_pointer_rtx,
5491 hard_frame_pointer_rtx,
5494 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5496 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5501 /* First step is to deallocate the stack frame so that we can
5502 pop the registers. */
5505 if (!frame_pointer_needed)
5507 pro_epilogue_adjust_stack (stack_pointer_rtx,
5508 hard_frame_pointer_rtx,
5509 GEN_INT (offset), style);
5511 else if (frame.to_allocate)
5512 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5513 GEN_INT (frame.to_allocate), style);
5515 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5516 if (ix86_save_reg (regno, false))
5519 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5521 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5523 if (frame_pointer_needed)
5525 /* Leave results in shorter dependency chains on CPUs that are
5526 able to grok it fast. */
5527 if (TARGET_USE_LEAVE)
5528 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5529 else if (TARGET_64BIT)
5530 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5532 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5536 /* Sibcall epilogues don't want a return instruction. */
5540 if (current_function_pops_args && current_function_args_size)
5542 rtx popc = GEN_INT (current_function_pops_args);
5544 /* i386 can only pop 64K bytes. If asked to pop more, pop
5545 return address, do explicit add, and jump indirectly to the
5548 if (current_function_pops_args >= 65536)
5550 rtx ecx = gen_rtx_REG (SImode, 2);
5552 /* There is no "pascal" calling convention in 64bit ABI. */
5556 emit_insn (gen_popsi1 (ecx));
5557 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5558 emit_jump_insn (gen_return_indirect_internal (ecx));
5561 emit_jump_insn (gen_return_pop_internal (popc));
5564 emit_jump_insn (gen_return_internal ());
5567 /* Reset from the function's potential modifications. */
5570 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5571 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5573 if (pic_offset_table_rtx)
5574 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5577 /* Extract the parts of an RTL expression that is a valid memory address
5578 for an instruction. Return 0 if the structure of the address is
5579 grossly off. Return -1 if the address contains ASHIFT, so it is not
5580 strictly valid, but still used for computing length of lea instruction. */
5583 ix86_decompose_address (rtx addr, struct ix86_address *out)
5585 rtx base = NULL_RTX;
5586 rtx index = NULL_RTX;
5587 rtx disp = NULL_RTX;
5588 HOST_WIDE_INT scale = 1;
5589 rtx scale_rtx = NULL_RTX;
5591 enum ix86_address_seg seg = SEG_DEFAULT;
5593 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5595 else if (GET_CODE (addr) == PLUS)
5605 addends[n++] = XEXP (op, 1);
5608 while (GET_CODE (op) == PLUS);
5613 for (i = n; i >= 0; --i)
5616 switch (GET_CODE (op))
5621 index = XEXP (op, 0);
5622 scale_rtx = XEXP (op, 1);
5626 if (XINT (op, 1) == UNSPEC_TP
5627 && TARGET_TLS_DIRECT_SEG_REFS
5628 && seg == SEG_DEFAULT)
5629 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5658 else if (GET_CODE (addr) == MULT)
5660 index = XEXP (addr, 0); /* index*scale */
5661 scale_rtx = XEXP (addr, 1);
5663 else if (GET_CODE (addr) == ASHIFT)
5667 /* We're called for lea too, which implements ashift on occasion. */
5668 index = XEXP (addr, 0);
5669 tmp = XEXP (addr, 1);
5670 if (GET_CODE (tmp) != CONST_INT)
5672 scale = INTVAL (tmp);
5673 if ((unsigned HOST_WIDE_INT) scale > 3)
5679 disp = addr; /* displacement */
5681 /* Extract the integral value of scale. */
5684 if (GET_CODE (scale_rtx) != CONST_INT)
5686 scale = INTVAL (scale_rtx);
5689 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5690 if (base && index && scale == 1
5691 && (index == arg_pointer_rtx
5692 || index == frame_pointer_rtx
5693 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5700 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5701 if ((base == hard_frame_pointer_rtx
5702 || base == frame_pointer_rtx
5703 || base == arg_pointer_rtx) && !disp)
5706 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5707 Avoid this by transforming to [%esi+0]. */
5708 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5709 && base && !index && !disp
5711 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5714 /* Special case: encode reg+reg instead of reg*2. */
5715 if (!base && index && scale && scale == 2)
5716 base = index, scale = 1;
5718 /* Special case: scaling cannot be encoded without base or displacement. */
5719 if (!base && !disp && index && scale != 1)
5731 /* Return cost of the memory address x.
5732 For i386, it is better to use a complex address than let gcc copy
5733 the address into a reg and make a new pseudo. But not if the address
5734 requires to two regs - that would mean more pseudos with longer
5737 ix86_address_cost (rtx x)
5739 struct ix86_address parts;
5742 if (!ix86_decompose_address (x, &parts))
5745 /* More complex memory references are better. */
5746 if (parts.disp && parts.disp != const0_rtx)
5748 if (parts.seg != SEG_DEFAULT)
5751 /* Attempt to minimize number of registers in the address. */
5753 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5755 && (!REG_P (parts.index)
5756 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5760 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5762 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5763 && parts.base != parts.index)
5766 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5767 since it's predecode logic can't detect the length of instructions
5768 and it degenerates to vector decoded. Increase cost of such
5769 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5770 to split such addresses or even refuse such addresses at all.
5772 Following addressing modes are affected:
5777 The first and last case may be avoidable by explicitly coding the zero in
5778 memory address, but I don't have AMD-K6 machine handy to check this
5782 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5783 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5784 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5790 /* If X is a machine specific address (i.e. a symbol or label being
5791 referenced as a displacement from the GOT implemented using an
5792 UNSPEC), then return the base term. Otherwise return X. */
5795 ix86_find_base_term (rtx x)
5801 if (GET_CODE (x) != CONST)
5804 if (GET_CODE (term) == PLUS
5805 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5806 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5807 term = XEXP (term, 0);
5808 if (GET_CODE (term) != UNSPEC
5809 || XINT (term, 1) != UNSPEC_GOTPCREL)
5812 term = XVECEXP (term, 0, 0);
5814 if (GET_CODE (term) != SYMBOL_REF
5815 && GET_CODE (term) != LABEL_REF)
5821 term = ix86_delegitimize_address (x);
5823 if (GET_CODE (term) != SYMBOL_REF
5824 && GET_CODE (term) != LABEL_REF)
5830 /* Determine if a given RTX is a valid constant. We already know this
5831 satisfies CONSTANT_P. */
5834 legitimate_constant_p (rtx x)
5838 switch (GET_CODE (x))
5841 /* TLS symbols are not constant. */
5842 if (tls_symbolic_operand (x, Pmode))
5847 inner = XEXP (x, 0);
5849 /* Offsets of TLS symbols are never valid.
5850 Discourage CSE from creating them. */
5851 if (GET_CODE (inner) == PLUS
5852 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5855 if (GET_CODE (inner) == PLUS
5856 || GET_CODE (inner) == MINUS)
5858 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5860 inner = XEXP (inner, 0);
5863 /* Only some unspecs are valid as "constants". */
5864 if (GET_CODE (inner) == UNSPEC)
5865 switch (XINT (inner, 1))
5869 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5871 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5881 /* Otherwise we handle everything else in the move patterns. */
5885 /* Determine if it's legal to put X into the constant pool. This
5886 is not possible for the address of thread-local symbols, which
5887 is checked above. */
5890 ix86_cannot_force_const_mem (rtx x)
5892 return !legitimate_constant_p (x);
5895 /* Determine if a given RTX is a valid constant address. */
5898 constant_address_p (rtx x)
5900 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5903 /* Nonzero if the constant value X is a legitimate general operand
5904 when generating PIC code. It is given that flag_pic is on and
5905 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5908 legitimate_pic_operand_p (rtx x)
5912 switch (GET_CODE (x))
5915 inner = XEXP (x, 0);
5917 /* Only some unspecs are valid as "constants". */
5918 if (GET_CODE (inner) == UNSPEC)
5919 switch (XINT (inner, 1))
5922 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5930 return legitimate_pic_address_disp_p (x);
5937 /* Determine if a given CONST RTX is a valid memory displacement
5941 legitimate_pic_address_disp_p (rtx disp)
5945 /* In 64bit mode we can allow direct addresses of symbols and labels
5946 when they are not dynamic symbols. */
5949 /* TLS references should always be enclosed in UNSPEC. */
5950 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5952 if (GET_CODE (disp) == SYMBOL_REF
5953 && ix86_cmodel == CM_SMALL_PIC
5954 && SYMBOL_REF_LOCAL_P (disp))
5956 if (GET_CODE (disp) == LABEL_REF)
5958 if (GET_CODE (disp) == CONST
5959 && GET_CODE (XEXP (disp, 0)) == PLUS)
5961 rtx op0 = XEXP (XEXP (disp, 0), 0);
5962 rtx op1 = XEXP (XEXP (disp, 0), 1);
5964 /* TLS references should always be enclosed in UNSPEC. */
5965 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5967 if (((GET_CODE (op0) == SYMBOL_REF
5968 && ix86_cmodel == CM_SMALL_PIC
5969 && SYMBOL_REF_LOCAL_P (op0))
5970 || GET_CODE (op0) == LABEL_REF)
5971 && GET_CODE (op1) == CONST_INT
5972 && INTVAL (op1) < 16*1024*1024
5973 && INTVAL (op1) >= -16*1024*1024)
5977 if (GET_CODE (disp) != CONST)
5979 disp = XEXP (disp, 0);
5983 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5984 of GOT tables. We should not need these anyway. */
5985 if (GET_CODE (disp) != UNSPEC
5986 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5989 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5990 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5996 if (GET_CODE (disp) == PLUS)
5998 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6000 disp = XEXP (disp, 0);
6004 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6005 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6007 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6008 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6009 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6011 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6012 if (! strcmp (sym_name, "<pic base>"))
6017 if (GET_CODE (disp) != UNSPEC)
6020 switch (XINT (disp, 1))
6025 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6027 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6028 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6029 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6031 case UNSPEC_GOTTPOFF:
6032 case UNSPEC_GOTNTPOFF:
6033 case UNSPEC_INDNTPOFF:
6036 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6038 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6040 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6046 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6047 memory address for an instruction. The MODE argument is the machine mode
6048 for the MEM expression that wants to use this address.
6050 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6051 convert common non-canonical forms to canonical form so that they will
6055 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6057 struct ix86_address parts;
6058 rtx base, index, disp;
6059 HOST_WIDE_INT scale;
6060 const char *reason = NULL;
6061 rtx reason_rtx = NULL_RTX;
6063 if (TARGET_DEBUG_ADDR)
6066 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6067 GET_MODE_NAME (mode), strict);
6071 if (ix86_decompose_address (addr, &parts) <= 0)
6073 reason = "decomposition failed";
6078 index = parts.index;
6080 scale = parts.scale;
6082 /* Validate base register.
6084 Don't allow SUBREG's here, it can lead to spill failures when the base
6085 is one word out of a two word structure, which is represented internally
6092 if (GET_CODE (base) != REG)
6094 reason = "base is not a register";
6098 if (GET_MODE (base) != Pmode)
6100 reason = "base is not in Pmode";
6104 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6105 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6107 reason = "base is not valid";
6112 /* Validate index register.
6114 Don't allow SUBREG's here, it can lead to spill failures when the index
6115 is one word out of a two word structure, which is represented internally
6122 if (GET_CODE (index) != REG)
6124 reason = "index is not a register";
6128 if (GET_MODE (index) != Pmode)
6130 reason = "index is not in Pmode";
6134 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6135 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6137 reason = "index is not valid";
6142 /* Validate scale factor. */
6145 reason_rtx = GEN_INT (scale);
6148 reason = "scale without index";
6152 if (scale != 2 && scale != 4 && scale != 8)
6154 reason = "scale is not a valid multiplier";
6159 /* Validate displacement. */
6164 if (GET_CODE (disp) == CONST
6165 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6166 switch (XINT (XEXP (disp, 0), 1))
6170 case UNSPEC_GOTPCREL:
6173 goto is_legitimate_pic;
6175 case UNSPEC_GOTTPOFF:
6176 case UNSPEC_GOTNTPOFF:
6177 case UNSPEC_INDNTPOFF:
6183 reason = "invalid address unspec";
6187 else if (flag_pic && (SYMBOLIC_CONST (disp)
6189 && !machopic_operand_p (disp)
6194 if (TARGET_64BIT && (index || base))
6196 /* foo@dtpoff(%rX) is ok. */
6197 if (GET_CODE (disp) != CONST
6198 || GET_CODE (XEXP (disp, 0)) != PLUS
6199 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6200 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6201 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6202 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6204 reason = "non-constant pic memory reference";
6208 else if (! legitimate_pic_address_disp_p (disp))
6210 reason = "displacement is an invalid pic construct";
6214 /* This code used to verify that a symbolic pic displacement
6215 includes the pic_offset_table_rtx register.
6217 While this is good idea, unfortunately these constructs may
6218 be created by "adds using lea" optimization for incorrect
6227 This code is nonsensical, but results in addressing
6228 GOT table with pic_offset_table_rtx base. We can't
6229 just refuse it easily, since it gets matched by
6230 "addsi3" pattern, that later gets split to lea in the
6231 case output register differs from input. While this
6232 can be handled by separate addsi pattern for this case
6233 that never results in lea, this seems to be easier and
6234 correct fix for crash to disable this test. */
6236 else if (GET_CODE (disp) != LABEL_REF
6237 && GET_CODE (disp) != CONST_INT
6238 && (GET_CODE (disp) != CONST
6239 || !legitimate_constant_p (disp))
6240 && (GET_CODE (disp) != SYMBOL_REF
6241 || !legitimate_constant_p (disp)))
6243 reason = "displacement is not constant";
6246 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6248 reason = "displacement is out of range";
6253 /* Everything looks valid. */
6254 if (TARGET_DEBUG_ADDR)
6255 fprintf (stderr, "Success.\n");
6259 if (TARGET_DEBUG_ADDR)
6261 fprintf (stderr, "Error: %s\n", reason);
6262 debug_rtx (reason_rtx);
6267 /* Return an unique alias set for the GOT. */
6269 static HOST_WIDE_INT
6270 ix86_GOT_alias_set (void)
6272 static HOST_WIDE_INT set = -1;
6274 set = new_alias_set ();
6278 /* Return a legitimate reference for ORIG (an address) using the
6279 register REG. If REG is 0, a new pseudo is generated.
6281 There are two types of references that must be handled:
6283 1. Global data references must load the address from the GOT, via
6284 the PIC reg. An insn is emitted to do this load, and the reg is
6287 2. Static data references, constant pool addresses, and code labels
6288 compute the address as an offset from the GOT, whose base is in
6289 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6290 differentiate them from global data objects. The returned
6291 address is the PIC reg + an unspec constant.
6293 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6294 reg also appears in the address. */
6297 legitimize_pic_address (rtx orig, rtx reg)
6305 reg = gen_reg_rtx (Pmode);
6306 /* Use the generic Mach-O PIC machinery. */
6307 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6310 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6312 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6314 /* This symbol may be referenced via a displacement from the PIC
6315 base address (@GOTOFF). */
6317 if (reload_in_progress)
6318 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6319 if (GET_CODE (addr) == CONST)
6320 addr = XEXP (addr, 0);
6321 if (GET_CODE (addr) == PLUS)
6323 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6324 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6327 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6328 new = gen_rtx_CONST (Pmode, new);
6329 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6333 emit_move_insn (reg, new);
6337 else if (GET_CODE (addr) == SYMBOL_REF)
6341 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6342 new = gen_rtx_CONST (Pmode, new);
6343 new = gen_rtx_MEM (Pmode, new);
6344 RTX_UNCHANGING_P (new) = 1;
6345 set_mem_alias_set (new, ix86_GOT_alias_set ());
6348 reg = gen_reg_rtx (Pmode);
6349 /* Use directly gen_movsi, otherwise the address is loaded
6350 into register for CSE. We don't want to CSE this addresses,
6351 instead we CSE addresses from the GOT table, so skip this. */
6352 emit_insn (gen_movsi (reg, new));
6357 /* This symbol must be referenced via a load from the
6358 Global Offset Table (@GOT). */
6360 if (reload_in_progress)
6361 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6362 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6363 new = gen_rtx_CONST (Pmode, new);
6364 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6365 new = gen_rtx_MEM (Pmode, new);
6366 RTX_UNCHANGING_P (new) = 1;
6367 set_mem_alias_set (new, ix86_GOT_alias_set ());
6370 reg = gen_reg_rtx (Pmode);
6371 emit_move_insn (reg, new);
6377 if (GET_CODE (addr) == CONST)
6379 addr = XEXP (addr, 0);
6381 /* We must match stuff we generate before. Assume the only
6382 unspecs that can get here are ours. Not that we could do
6383 anything with them anyway.... */
6384 if (GET_CODE (addr) == UNSPEC
6385 || (GET_CODE (addr) == PLUS
6386 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6388 if (GET_CODE (addr) != PLUS)
6391 if (GET_CODE (addr) == PLUS)
6393 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6395 /* Check first to see if this is a constant offset from a @GOTOFF
6396 symbol reference. */
6397 if (local_symbolic_operand (op0, Pmode)
6398 && GET_CODE (op1) == CONST_INT)
6402 if (reload_in_progress)
6403 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6404 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6406 new = gen_rtx_PLUS (Pmode, new, op1);
6407 new = gen_rtx_CONST (Pmode, new);
6408 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6412 emit_move_insn (reg, new);
6418 if (INTVAL (op1) < -16*1024*1024
6419 || INTVAL (op1) >= 16*1024*1024)
6420 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6425 base = legitimize_pic_address (XEXP (addr, 0), reg);
6426 new = legitimize_pic_address (XEXP (addr, 1),
6427 base == reg ? NULL_RTX : reg);
6429 if (GET_CODE (new) == CONST_INT)
6430 new = plus_constant (base, INTVAL (new));
6433 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6435 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6436 new = XEXP (new, 1);
6438 new = gen_rtx_PLUS (Pmode, base, new);
6446 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6449 get_thread_pointer (int to_reg)
6453 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6457 reg = gen_reg_rtx (Pmode);
6458 insn = gen_rtx_SET (VOIDmode, reg, tp);
6459 insn = emit_insn (insn);
6464 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6465 false if we expect this to be used for a memory address and true if
6466 we expect to load the address into a register. */
6469 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6471 rtx dest, base, off, pic;
6476 case TLS_MODEL_GLOBAL_DYNAMIC:
6477 dest = gen_reg_rtx (Pmode);
6480 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6483 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6484 insns = get_insns ();
6487 emit_libcall_block (insns, dest, rax, x);
6490 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6493 case TLS_MODEL_LOCAL_DYNAMIC:
6494 base = gen_reg_rtx (Pmode);
6497 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6500 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6501 insns = get_insns ();
6504 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6505 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6506 emit_libcall_block (insns, base, rax, note);
6509 emit_insn (gen_tls_local_dynamic_base_32 (base));
6511 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6512 off = gen_rtx_CONST (Pmode, off);
6514 return gen_rtx_PLUS (Pmode, base, off);
6516 case TLS_MODEL_INITIAL_EXEC:
6520 type = UNSPEC_GOTNTPOFF;
6524 if (reload_in_progress)
6525 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6526 pic = pic_offset_table_rtx;
6527 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6529 else if (!TARGET_GNU_TLS)
6531 pic = gen_reg_rtx (Pmode);
6532 emit_insn (gen_set_got (pic));
6533 type = UNSPEC_GOTTPOFF;
6538 type = UNSPEC_INDNTPOFF;
6541 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6542 off = gen_rtx_CONST (Pmode, off);
6544 off = gen_rtx_PLUS (Pmode, pic, off);
6545 off = gen_rtx_MEM (Pmode, off);
6546 RTX_UNCHANGING_P (off) = 1;
6547 set_mem_alias_set (off, ix86_GOT_alias_set ());
6549 if (TARGET_64BIT || TARGET_GNU_TLS)
6551 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6552 off = force_reg (Pmode, off);
6553 return gen_rtx_PLUS (Pmode, base, off);
6557 base = get_thread_pointer (true);
6558 dest = gen_reg_rtx (Pmode);
6559 emit_insn (gen_subsi3 (dest, base, off));
6563 case TLS_MODEL_LOCAL_EXEC:
6564 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6565 (TARGET_64BIT || TARGET_GNU_TLS)
6566 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6567 off = gen_rtx_CONST (Pmode, off);
6569 if (TARGET_64BIT || TARGET_GNU_TLS)
6571 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6572 return gen_rtx_PLUS (Pmode, base, off);
6576 base = get_thread_pointer (true);
6577 dest = gen_reg_rtx (Pmode);
6578 emit_insn (gen_subsi3 (dest, base, off));
6589 /* Try machine-dependent ways of modifying an illegitimate address
6590 to be legitimate. If we find one, return the new, valid address.
6591 This macro is used in only one place: `memory_address' in explow.c.
6593 OLDX is the address as it was before break_out_memory_refs was called.
6594 In some cases it is useful to look at this to decide what needs to be done.
6596 MODE and WIN are passed so that this macro can use
6597 GO_IF_LEGITIMATE_ADDRESS.
6599 It is always safe for this macro to do nothing. It exists to recognize
6600 opportunities to optimize the output.
6602 For the 80386, we handle X+REG by loading X into a register R and
6603 using R+REG. R will go in a general reg and indexing will be used.
6604 However, if REG is a broken-out memory address or multiplication,
6605 nothing needs to be done because REG can certainly go in a general reg.
6607 When -fpic is used, special handling is needed for symbolic references.
6608 See comments by legitimize_pic_address in i386.c for details. */
6611 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6616 if (TARGET_DEBUG_ADDR)
6618 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6619 GET_MODE_NAME (mode));
6623 log = tls_symbolic_operand (x, mode);
6625 return legitimize_tls_address (x, log, false);
6627 if (flag_pic && SYMBOLIC_CONST (x))
6628 return legitimize_pic_address (x, 0);
6630 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6631 if (GET_CODE (x) == ASHIFT
6632 && GET_CODE (XEXP (x, 1)) == CONST_INT
6633 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6636 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6637 GEN_INT (1 << log));
6640 if (GET_CODE (x) == PLUS)
6642 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6644 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6645 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6646 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6649 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6650 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6651 GEN_INT (1 << log));
6654 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6655 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6656 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6659 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6660 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6661 GEN_INT (1 << log));
6664 /* Put multiply first if it isn't already. */
6665 if (GET_CODE (XEXP (x, 1)) == MULT)
6667 rtx tmp = XEXP (x, 0);
6668 XEXP (x, 0) = XEXP (x, 1);
6673 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6674 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6675 created by virtual register instantiation, register elimination, and
6676 similar optimizations. */
6677 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6680 x = gen_rtx_PLUS (Pmode,
6681 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6682 XEXP (XEXP (x, 1), 0)),
6683 XEXP (XEXP (x, 1), 1));
6687 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6688 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6689 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6690 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6691 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6692 && CONSTANT_P (XEXP (x, 1)))
6695 rtx other = NULL_RTX;
6697 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6699 constant = XEXP (x, 1);
6700 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6702 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6704 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6705 other = XEXP (x, 1);
6713 x = gen_rtx_PLUS (Pmode,
6714 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6715 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6716 plus_constant (other, INTVAL (constant)));
6720 if (changed && legitimate_address_p (mode, x, FALSE))
6723 if (GET_CODE (XEXP (x, 0)) == MULT)
6726 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6729 if (GET_CODE (XEXP (x, 1)) == MULT)
6732 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6736 && GET_CODE (XEXP (x, 1)) == REG
6737 && GET_CODE (XEXP (x, 0)) == REG)
6740 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6743 x = legitimize_pic_address (x, 0);
6746 if (changed && legitimate_address_p (mode, x, FALSE))
6749 if (GET_CODE (XEXP (x, 0)) == REG)
6751 rtx temp = gen_reg_rtx (Pmode);
6752 rtx val = force_operand (XEXP (x, 1), temp);
6754 emit_move_insn (temp, val);
6760 else if (GET_CODE (XEXP (x, 1)) == REG)
6762 rtx temp = gen_reg_rtx (Pmode);
6763 rtx val = force_operand (XEXP (x, 0), temp);
6765 emit_move_insn (temp, val);
6775 /* Print an integer constant expression in assembler syntax. Addition
6776 and subtraction are the only arithmetic that may appear in these
6777 expressions. FILE is the stdio stream to write to, X is the rtx, and
6778 CODE is the operand print code from the output string. */
6781 output_pic_addr_const (FILE *file, rtx x, int code)
6785 switch (GET_CODE (x))
6795 /* Mark the decl as referenced so that cgraph will output the function. */
6796 if (SYMBOL_REF_DECL (x))
6797 mark_decl_referenced (SYMBOL_REF_DECL (x));
6799 assemble_name (file, XSTR (x, 0));
6800 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6801 fputs ("@PLT", file);
6808 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6809 assemble_name (asm_out_file, buf);
6813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6817 /* This used to output parentheses around the expression,
6818 but that does not work on the 386 (either ATT or BSD assembler). */
6819 output_pic_addr_const (file, XEXP (x, 0), code);
6823 if (GET_MODE (x) == VOIDmode)
6825 /* We can use %d if the number is <32 bits and positive. */
6826 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6827 fprintf (file, "0x%lx%08lx",
6828 (unsigned long) CONST_DOUBLE_HIGH (x),
6829 (unsigned long) CONST_DOUBLE_LOW (x));
6831 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6834 /* We can't handle floating point constants;
6835 PRINT_OPERAND must handle them. */
6836 output_operand_lossage ("floating constant misused");
6840 /* Some assemblers need integer constants to appear first. */
6841 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6843 output_pic_addr_const (file, XEXP (x, 0), code);
6845 output_pic_addr_const (file, XEXP (x, 1), code);
6847 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6849 output_pic_addr_const (file, XEXP (x, 1), code);
6851 output_pic_addr_const (file, XEXP (x, 0), code);
6859 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6860 output_pic_addr_const (file, XEXP (x, 0), code);
6862 output_pic_addr_const (file, XEXP (x, 1), code);
6864 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6868 if (XVECLEN (x, 0) != 1)
6870 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6871 switch (XINT (x, 1))
6874 fputs ("@GOT", file);
6877 fputs ("@GOTOFF", file);
6879 case UNSPEC_GOTPCREL:
6880 fputs ("@GOTPCREL(%rip)", file);
6882 case UNSPEC_GOTTPOFF:
6883 /* FIXME: This might be @TPOFF in Sun ld too. */
6884 fputs ("@GOTTPOFF", file);
6887 fputs ("@TPOFF", file);
6891 fputs ("@TPOFF", file);
6893 fputs ("@NTPOFF", file);
6896 fputs ("@DTPOFF", file);
6898 case UNSPEC_GOTNTPOFF:
6900 fputs ("@GOTTPOFF(%rip)", file);
6902 fputs ("@GOTNTPOFF", file);
6904 case UNSPEC_INDNTPOFF:
6905 fputs ("@INDNTPOFF", file);
6908 output_operand_lossage ("invalid UNSPEC as operand");
6914 output_operand_lossage ("invalid expression as operand");
6918 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6919 We need to handle our special PIC relocations. */
6922 i386_dwarf_output_addr_const (FILE *file, rtx x)
6925 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6929 fprintf (file, "%s", ASM_LONG);
6932 output_pic_addr_const (file, x, '\0');
6934 output_addr_const (file, x);
6938 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6939 We need to emit DTP-relative relocations. */
6942 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6944 fputs (ASM_LONG, file);
6945 output_addr_const (file, x);
6946 fputs ("@DTPOFF", file);
6952 fputs (", 0", file);
6959 /* In the name of slightly smaller debug output, and to cater to
6960 general assembler losage, recognize PIC+GOTOFF and turn it back
6961 into a direct symbol reference. */
6964 ix86_delegitimize_address (rtx orig_x)
6968 if (GET_CODE (x) == MEM)
6973 if (GET_CODE (x) != CONST
6974 || GET_CODE (XEXP (x, 0)) != UNSPEC
6975 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6976 || GET_CODE (orig_x) != MEM)
6978 return XVECEXP (XEXP (x, 0), 0, 0);
6981 if (GET_CODE (x) != PLUS
6982 || GET_CODE (XEXP (x, 1)) != CONST)
6985 if (GET_CODE (XEXP (x, 0)) == REG
6986 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6987 /* %ebx + GOT/GOTOFF */
6989 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6991 /* %ebx + %reg * scale + GOT/GOTOFF */
6993 if (GET_CODE (XEXP (y, 0)) == REG
6994 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6996 else if (GET_CODE (XEXP (y, 1)) == REG
6997 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7001 if (GET_CODE (y) != REG
7002 && GET_CODE (y) != MULT
7003 && GET_CODE (y) != ASHIFT)
7009 x = XEXP (XEXP (x, 1), 0);
7010 if (GET_CODE (x) == UNSPEC
7011 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7012 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7015 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7016 return XVECEXP (x, 0, 0);
7019 if (GET_CODE (x) == PLUS
7020 && GET_CODE (XEXP (x, 0)) == UNSPEC
7021 && GET_CODE (XEXP (x, 1)) == CONST_INT
7022 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7023 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7024 && GET_CODE (orig_x) != MEM)))
7026 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7028 return gen_rtx_PLUS (Pmode, y, x);
7036 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7041 if (mode == CCFPmode || mode == CCFPUmode)
7043 enum rtx_code second_code, bypass_code;
7044 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7045 if (bypass_code != NIL || second_code != NIL)
7047 code = ix86_fp_compare_code_to_integer (code);
7051 code = reverse_condition (code);
7062 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7067 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7068 Those same assemblers have the same but opposite losage on cmov. */
7071 suffix = fp ? "nbe" : "a";
7074 if (mode == CCNOmode || mode == CCGOCmode)
7076 else if (mode == CCmode || mode == CCGCmode)
7087 if (mode == CCNOmode || mode == CCGOCmode)
7089 else if (mode == CCmode || mode == CCGCmode)
7098 suffix = fp ? "nb" : "ae";
7101 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7111 suffix = fp ? "u" : "p";
7114 suffix = fp ? "nu" : "np";
7119 fputs (suffix, file);
7122 /* Print the name of register X to FILE based on its machine mode and number.
7123 If CODE is 'w', pretend the mode is HImode.
7124 If CODE is 'b', pretend the mode is QImode.
7125 If CODE is 'k', pretend the mode is SImode.
7126 If CODE is 'q', pretend the mode is DImode.
7127 If CODE is 'h', pretend the reg is the `high' byte register.
7128 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7131 print_reg (rtx x, int code, FILE *file)
7133 if (REGNO (x) == ARG_POINTER_REGNUM
7134 || REGNO (x) == FRAME_POINTER_REGNUM
7135 || REGNO (x) == FLAGS_REG
7136 || REGNO (x) == FPSR_REG)
7139 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7142 if (code == 'w' || MMX_REG_P (x))
7144 else if (code == 'b')
7146 else if (code == 'k')
7148 else if (code == 'q')
7150 else if (code == 'y')
7152 else if (code == 'h')
7155 code = GET_MODE_SIZE (GET_MODE (x));
7157 /* Irritatingly, AMD extended registers use different naming convention
7158 from the normal registers. */
7159 if (REX_INT_REG_P (x))
7166 error ("extended registers have no high halves");
7169 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7172 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7175 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7178 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7181 error ("unsupported operand size for extended register");
7189 if (STACK_TOP_P (x))
7191 fputs ("st(0)", file);
7198 if (! ANY_FP_REG_P (x))
7199 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7204 fputs (hi_reg_name[REGNO (x)], file);
7207 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7209 fputs (qi_reg_name[REGNO (x)], file);
7212 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7214 fputs (qi_high_reg_name[REGNO (x)], file);
7221 /* Locate some local-dynamic symbol still in use by this function
7222 so that we can print its name in some tls_local_dynamic_base
7226 get_some_local_dynamic_name (void)
7230 if (cfun->machine->some_ld_name)
7231 return cfun->machine->some_ld_name;
7233 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7235 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7236 return cfun->machine->some_ld_name;
7242 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7246 if (GET_CODE (x) == SYMBOL_REF
7247 && local_dynamic_symbolic_operand (x, Pmode))
7249 cfun->machine->some_ld_name = XSTR (x, 0);
7257 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7258 C -- print opcode suffix for set/cmov insn.
7259 c -- like C, but print reversed condition
7260 F,f -- likewise, but for floating-point.
7261 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7263 R -- print the prefix for register names.
7264 z -- print the opcode suffix for the size of the current operand.
7265 * -- print a star (in certain assembler syntax)
7266 A -- print an absolute memory reference.
7267 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7268 s -- print a shift double count, followed by the assemblers argument
7270 b -- print the QImode name of the register for the indicated operand.
7271 %b0 would print %al if operands[0] is reg 0.
7272 w -- likewise, print the HImode name of the register.
7273 k -- likewise, print the SImode name of the register.
7274 q -- likewise, print the DImode name of the register.
7275 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7276 y -- print "st(0)" instead of "st" as a register.
7277 D -- print condition for SSE cmp instruction.
7278 P -- if PIC, print an @PLT suffix.
7279 X -- don't print any sort of PIC '@' suffix for a symbol.
7280 & -- print some in-use local-dynamic symbol name.
7284 print_operand (FILE *file, rtx x, int code)
7291 if (ASSEMBLER_DIALECT == ASM_ATT)
7296 assemble_name (file, get_some_local_dynamic_name ());
7300 if (ASSEMBLER_DIALECT == ASM_ATT)
7302 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7304 /* Intel syntax. For absolute addresses, registers should not
7305 be surrounded by braces. */
7306 if (GET_CODE (x) != REG)
7309 PRINT_OPERAND (file, x, 0);
7317 PRINT_OPERAND (file, x, 0);
7322 if (ASSEMBLER_DIALECT == ASM_ATT)
7327 if (ASSEMBLER_DIALECT == ASM_ATT)
7332 if (ASSEMBLER_DIALECT == ASM_ATT)
7337 if (ASSEMBLER_DIALECT == ASM_ATT)
7342 if (ASSEMBLER_DIALECT == ASM_ATT)
7347 if (ASSEMBLER_DIALECT == ASM_ATT)
7352 /* 387 opcodes don't get size suffixes if the operands are
7354 if (STACK_REG_P (x))
7357 /* Likewise if using Intel opcodes. */
7358 if (ASSEMBLER_DIALECT == ASM_INTEL)
7361 /* This is the size of op from size of operand. */
7362 switch (GET_MODE_SIZE (GET_MODE (x)))
7365 #ifdef HAVE_GAS_FILDS_FISTS
7371 if (GET_MODE (x) == SFmode)
7386 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7388 #ifdef GAS_MNEMONICS
7414 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7416 PRINT_OPERAND (file, x, 0);
7422 /* Little bit of braindamage here. The SSE compare instructions
7423 does use completely different names for the comparisons that the
7424 fp conditional moves. */
7425 switch (GET_CODE (x))
7440 fputs ("unord", file);
7444 fputs ("neq", file);
7448 fputs ("nlt", file);
7452 fputs ("nle", file);
7455 fputs ("ord", file);
7463 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7464 if (ASSEMBLER_DIALECT == ASM_ATT)
7466 switch (GET_MODE (x))
7468 case HImode: putc ('w', file); break;
7470 case SFmode: putc ('l', file); break;
7472 case DFmode: putc ('q', file); break;
7480 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7483 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7484 if (ASSEMBLER_DIALECT == ASM_ATT)
7487 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7490 /* Like above, but reverse condition */
7492 /* Check to see if argument to %c is really a constant
7493 and not a condition code which needs to be reversed. */
7494 if (!COMPARISON_P (x))
7496 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7499 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7502 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7503 if (ASSEMBLER_DIALECT == ASM_ATT)
7506 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7512 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7515 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7518 int pred_val = INTVAL (XEXP (x, 0));
7520 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7521 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7523 int taken = pred_val > REG_BR_PROB_BASE / 2;
7524 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7526 /* Emit hints only in the case default branch prediction
7527 heuristics would fail. */
7528 if (taken != cputaken)
7530 /* We use 3e (DS) prefix for taken branches and
7531 2e (CS) prefix for not taken branches. */
7533 fputs ("ds ; ", file);
7535 fputs ("cs ; ", file);
7542 output_operand_lossage ("invalid operand code `%c'", code);
7546 if (GET_CODE (x) == REG)
7547 print_reg (x, code, file);
7549 else if (GET_CODE (x) == MEM)
7551 /* No `byte ptr' prefix for call instructions. */
7552 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7555 switch (GET_MODE_SIZE (GET_MODE (x)))
7557 case 1: size = "BYTE"; break;
7558 case 2: size = "WORD"; break;
7559 case 4: size = "DWORD"; break;
7560 case 8: size = "QWORD"; break;
7561 case 12: size = "XWORD"; break;
7562 case 16: size = "XMMWORD"; break;
7567 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7570 else if (code == 'w')
7572 else if (code == 'k')
7576 fputs (" PTR ", file);
7580 /* Avoid (%rip) for call operands. */
7581 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7582 && GET_CODE (x) != CONST_INT)
7583 output_addr_const (file, x);
7584 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7585 output_operand_lossage ("invalid constraints for operand");
7590 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7595 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7596 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7598 if (ASSEMBLER_DIALECT == ASM_ATT)
7600 fprintf (file, "0x%08lx", l);
7603 /* These float cases don't actually occur as immediate operands. */
7604 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7608 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7609 fprintf (file, "%s", dstr);
7612 else if (GET_CODE (x) == CONST_DOUBLE
7613 && GET_MODE (x) == XFmode)
7617 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7618 fprintf (file, "%s", dstr);
7625 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7627 if (ASSEMBLER_DIALECT == ASM_ATT)
7630 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7631 || GET_CODE (x) == LABEL_REF)
7633 if (ASSEMBLER_DIALECT == ASM_ATT)
7636 fputs ("OFFSET FLAT:", file);
7639 if (GET_CODE (x) == CONST_INT)
7640 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7642 output_pic_addr_const (file, x, code);
7644 output_addr_const (file, x);
7648 /* Print a memory operand whose address is ADDR. */
7651 print_operand_address (FILE *file, rtx addr)
7653 struct ix86_address parts;
7654 rtx base, index, disp;
7657 if (! ix86_decompose_address (addr, &parts))
7661 index = parts.index;
7663 scale = parts.scale;
7671 if (USER_LABEL_PREFIX[0] == 0)
7673 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7679 if (!base && !index)
7681 /* Displacement only requires special attention. */
7683 if (GET_CODE (disp) == CONST_INT)
7685 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7687 if (USER_LABEL_PREFIX[0] == 0)
7689 fputs ("ds:", file);
7691 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7694 output_pic_addr_const (file, disp, 0);
7696 output_addr_const (file, disp);
7698 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7700 && ((GET_CODE (disp) == SYMBOL_REF
7701 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7702 || GET_CODE (disp) == LABEL_REF
7703 || (GET_CODE (disp) == CONST
7704 && GET_CODE (XEXP (disp, 0)) == PLUS
7705 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7706 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7707 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7708 fputs ("(%rip)", file);
7712 if (ASSEMBLER_DIALECT == ASM_ATT)
7717 output_pic_addr_const (file, disp, 0);
7718 else if (GET_CODE (disp) == LABEL_REF)
7719 output_asm_label (disp);
7721 output_addr_const (file, disp);
7726 print_reg (base, 0, file);
7730 print_reg (index, 0, file);
7732 fprintf (file, ",%d", scale);
7738 rtx offset = NULL_RTX;
7742 /* Pull out the offset of a symbol; print any symbol itself. */
7743 if (GET_CODE (disp) == CONST
7744 && GET_CODE (XEXP (disp, 0)) == PLUS
7745 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7747 offset = XEXP (XEXP (disp, 0), 1);
7748 disp = gen_rtx_CONST (VOIDmode,
7749 XEXP (XEXP (disp, 0), 0));
7753 output_pic_addr_const (file, disp, 0);
7754 else if (GET_CODE (disp) == LABEL_REF)
7755 output_asm_label (disp);
7756 else if (GET_CODE (disp) == CONST_INT)
7759 output_addr_const (file, disp);
7765 print_reg (base, 0, file);
7768 if (INTVAL (offset) >= 0)
7770 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7774 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7781 print_reg (index, 0, file);
7783 fprintf (file, "*%d", scale);
7791 output_addr_const_extra (FILE *file, rtx x)
7795 if (GET_CODE (x) != UNSPEC)
7798 op = XVECEXP (x, 0, 0);
7799 switch (XINT (x, 1))
7801 case UNSPEC_GOTTPOFF:
7802 output_addr_const (file, op);
7803 /* FIXME: This might be @TPOFF in Sun ld. */
7804 fputs ("@GOTTPOFF", file);
7807 output_addr_const (file, op);
7808 fputs ("@TPOFF", file);
7811 output_addr_const (file, op);
7813 fputs ("@TPOFF", file);
7815 fputs ("@NTPOFF", file);
7818 output_addr_const (file, op);
7819 fputs ("@DTPOFF", file);
7821 case UNSPEC_GOTNTPOFF:
7822 output_addr_const (file, op);
7824 fputs ("@GOTTPOFF(%rip)", file);
7826 fputs ("@GOTNTPOFF", file);
7828 case UNSPEC_INDNTPOFF:
7829 output_addr_const (file, op);
7830 fputs ("@INDNTPOFF", file);
7840 /* Split one or more DImode RTL references into pairs of SImode
7841 references. The RTL can be REG, offsettable MEM, integer constant, or
7842 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7843 split and "num" is its length. lo_half and hi_half are output arrays
7844 that parallel "operands". */
7847 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7851 rtx op = operands[num];
7853 /* simplify_subreg refuse to split volatile memory addresses,
7854 but we still have to handle it. */
7855 if (GET_CODE (op) == MEM)
7857 lo_half[num] = adjust_address (op, SImode, 0);
7858 hi_half[num] = adjust_address (op, SImode, 4);
7862 lo_half[num] = simplify_gen_subreg (SImode, op,
7863 GET_MODE (op) == VOIDmode
7864 ? DImode : GET_MODE (op), 0);
7865 hi_half[num] = simplify_gen_subreg (SImode, op,
7866 GET_MODE (op) == VOIDmode
7867 ? DImode : GET_MODE (op), 4);
7871 /* Split one or more TImode RTL references into pairs of SImode
7872 references. The RTL can be REG, offsettable MEM, integer constant, or
7873 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7874 split and "num" is its length. lo_half and hi_half are output arrays
7875 that parallel "operands". */
7878 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7882 rtx op = operands[num];
7884 /* simplify_subreg refuse to split volatile memory addresses, but we
7885 still have to handle it. */
7886 if (GET_CODE (op) == MEM)
7888 lo_half[num] = adjust_address (op, DImode, 0);
7889 hi_half[num] = adjust_address (op, DImode, 8);
7893 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7894 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7899 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7900 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7901 is the expression of the binary operation. The output may either be
7902 emitted here, or returned to the caller, like all output_* functions.
7904 There is no guarantee that the operands are the same mode, as they
7905 might be within FLOAT or FLOAT_EXTEND expressions. */
7907 #ifndef SYSV386_COMPAT
7908 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7909 wants to fix the assemblers because that causes incompatibility
7910 with gcc. No-one wants to fix gcc because that causes
7911 incompatibility with assemblers... You can use the option of
7912 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7913 #define SYSV386_COMPAT 1
7917 output_387_binary_op (rtx insn, rtx *operands)
7919 static char buf[30];
7922 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7924 #ifdef ENABLE_CHECKING
7925 /* Even if we do not want to check the inputs, this documents input
7926 constraints. Which helps in understanding the following code. */
7927 if (STACK_REG_P (operands[0])
7928 && ((REG_P (operands[1])
7929 && REGNO (operands[0]) == REGNO (operands[1])
7930 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7931 || (REG_P (operands[2])
7932 && REGNO (operands[0]) == REGNO (operands[2])
7933 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7934 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7940 switch (GET_CODE (operands[3]))
7943 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7944 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7952 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7953 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7961 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7962 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7970 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7971 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7985 if (GET_MODE (operands[0]) == SFmode)
7986 strcat (buf, "ss\t{%2, %0|%0, %2}");
7988 strcat (buf, "sd\t{%2, %0|%0, %2}");
7993 switch (GET_CODE (operands[3]))
7997 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7999 rtx temp = operands[2];
8000 operands[2] = operands[1];
8004 /* know operands[0] == operands[1]. */
8006 if (GET_CODE (operands[2]) == MEM)
8012 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8014 if (STACK_TOP_P (operands[0]))
8015 /* How is it that we are storing to a dead operand[2]?
8016 Well, presumably operands[1] is dead too. We can't
8017 store the result to st(0) as st(0) gets popped on this
8018 instruction. Instead store to operands[2] (which I
8019 think has to be st(1)). st(1) will be popped later.
8020 gcc <= 2.8.1 didn't have this check and generated
8021 assembly code that the Unixware assembler rejected. */
8022 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8024 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8028 if (STACK_TOP_P (operands[0]))
8029 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8031 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8036 if (GET_CODE (operands[1]) == MEM)
8042 if (GET_CODE (operands[2]) == MEM)
8048 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8051 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8052 derived assemblers, confusingly reverse the direction of
8053 the operation for fsub{r} and fdiv{r} when the
8054 destination register is not st(0). The Intel assembler
8055 doesn't have this brain damage. Read !SYSV386_COMPAT to
8056 figure out what the hardware really does. */
8057 if (STACK_TOP_P (operands[0]))
8058 p = "{p\t%0, %2|rp\t%2, %0}";
8060 p = "{rp\t%2, %0|p\t%0, %2}";
8062 if (STACK_TOP_P (operands[0]))
8063 /* As above for fmul/fadd, we can't store to st(0). */
8064 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8066 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8071 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8074 if (STACK_TOP_P (operands[0]))
8075 p = "{rp\t%0, %1|p\t%1, %0}";
8077 p = "{p\t%1, %0|rp\t%0, %1}";
8079 if (STACK_TOP_P (operands[0]))
8080 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8082 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8087 if (STACK_TOP_P (operands[0]))
8089 if (STACK_TOP_P (operands[1]))
8090 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8092 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8095 else if (STACK_TOP_P (operands[1]))
8098 p = "{\t%1, %0|r\t%0, %1}";
8100 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8106 p = "{r\t%2, %0|\t%0, %2}";
8108 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8121 /* Output code to initialize control word copies used by
8122 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8123 is set to control word rounding downwards. */
8125 emit_i387_cw_initialization (rtx normal, rtx round_down)
8127 rtx reg = gen_reg_rtx (HImode);
8129 emit_insn (gen_x86_fnstcw_1 (normal));
8130 emit_move_insn (reg, normal);
8131 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8133 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8135 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8136 emit_move_insn (round_down, reg);
8139 /* Output code for INSN to convert a float to a signed int. OPERANDS
8140 are the insn operands. The output may be [HSD]Imode and the input
8141 operand may be [SDX]Fmode. */
8144 output_fix_trunc (rtx insn, rtx *operands)
8146 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8147 int dimode_p = GET_MODE (operands[0]) == DImode;
8149 /* Jump through a hoop or two for DImode, since the hardware has no
8150 non-popping instruction. We used to do this a different way, but
8151 that was somewhat fragile and broke with post-reload splitters. */
8152 if (dimode_p && !stack_top_dies)
8153 output_asm_insn ("fld\t%y1", operands);
8155 if (!STACK_TOP_P (operands[1]))
8158 if (GET_CODE (operands[0]) != MEM)
8161 output_asm_insn ("fldcw\t%3", operands);
8162 if (stack_top_dies || dimode_p)
8163 output_asm_insn ("fistp%z0\t%0", operands);
8165 output_asm_insn ("fist%z0\t%0", operands);
8166 output_asm_insn ("fldcw\t%2", operands);
8171 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8172 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8173 when fucom should be used. */
8176 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8179 rtx cmp_op0 = operands[0];
8180 rtx cmp_op1 = operands[1];
8181 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8186 cmp_op1 = operands[2];
8190 if (GET_MODE (operands[0]) == SFmode)
8192 return "ucomiss\t{%1, %0|%0, %1}";
8194 return "comiss\t{%1, %0|%0, %1}";
8197 return "ucomisd\t{%1, %0|%0, %1}";
8199 return "comisd\t{%1, %0|%0, %1}";
8202 if (! STACK_TOP_P (cmp_op0))
8205 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8207 if (STACK_REG_P (cmp_op1)
8209 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8210 && REGNO (cmp_op1) != FIRST_STACK_REG)
8212 /* If both the top of the 387 stack dies, and the other operand
8213 is also a stack register that dies, then this must be a
8214 `fcompp' float compare */
8218 /* There is no double popping fcomi variant. Fortunately,
8219 eflags is immune from the fstp's cc clobbering. */
8221 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8223 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8231 return "fucompp\n\tfnstsw\t%0";
8233 return "fcompp\n\tfnstsw\t%0";
8246 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8248 static const char * const alt[24] =
8260 "fcomi\t{%y1, %0|%0, %y1}",
8261 "fcomip\t{%y1, %0|%0, %y1}",
8262 "fucomi\t{%y1, %0|%0, %y1}",
8263 "fucomip\t{%y1, %0|%0, %y1}",
8270 "fcom%z2\t%y2\n\tfnstsw\t%0",
8271 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8272 "fucom%z2\t%y2\n\tfnstsw\t%0",
8273 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8275 "ficom%z2\t%y2\n\tfnstsw\t%0",
8276 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8284 mask = eflags_p << 3;
8285 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8286 mask |= unordered_p << 1;
8287 mask |= stack_top_dies;
8300 ix86_output_addr_vec_elt (FILE *file, int value)
8302 const char *directive = ASM_LONG;
8307 directive = ASM_QUAD;
8313 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8317 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8320 fprintf (file, "%s%s%d-%s%d\n",
8321 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8322 else if (HAVE_AS_GOTOFF_IN_DATA)
8323 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8325 else if (TARGET_MACHO)
8327 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8328 machopic_output_function_base_name (file);
8329 fprintf(file, "\n");
8333 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8334 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8337 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8341 ix86_expand_clear (rtx dest)
8345 /* We play register width games, which are only valid after reload. */
8346 if (!reload_completed)
8349 /* Avoid HImode and its attendant prefix byte. */
8350 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8351 dest = gen_rtx_REG (SImode, REGNO (dest));
8353 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8355 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8356 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8358 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8359 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8365 /* X is an unchanging MEM. If it is a constant pool reference, return
8366 the constant pool rtx, else NULL. */
8369 maybe_get_pool_constant (rtx x)
8371 x = ix86_delegitimize_address (XEXP (x, 0));
8373 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8374 return get_pool_constant (x);
8380 ix86_expand_move (enum machine_mode mode, rtx operands[])
8382 int strict = (reload_in_progress || reload_completed);
8384 enum tls_model model;
8389 model = tls_symbolic_operand (op1, Pmode);
8392 op1 = legitimize_tls_address (op1, model, true);
8393 op1 = force_operand (op1, op0);
8398 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8403 rtx temp = ((reload_in_progress
8404 || ((op0 && GET_CODE (op0) == REG)
8406 ? op0 : gen_reg_rtx (Pmode));
8407 op1 = machopic_indirect_data_reference (op1, temp);
8408 op1 = machopic_legitimize_pic_address (op1, mode,
8409 temp == op1 ? 0 : temp);
8411 else if (MACHOPIC_INDIRECT)
8412 op1 = machopic_indirect_data_reference (op1, 0);
8416 if (GET_CODE (op0) == MEM)
8417 op1 = force_reg (Pmode, op1);
8421 if (GET_CODE (temp) != REG)
8422 temp = gen_reg_rtx (Pmode);
8423 temp = legitimize_pic_address (op1, temp);
8428 #endif /* TARGET_MACHO */
8432 if (GET_CODE (op0) == MEM
8433 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8434 || !push_operand (op0, mode))
8435 && GET_CODE (op1) == MEM)
8436 op1 = force_reg (mode, op1);
8438 if (push_operand (op0, mode)
8439 && ! general_no_elim_operand (op1, mode))
8440 op1 = copy_to_mode_reg (mode, op1);
8442 /* Force large constants in 64bit compilation into register
8443 to get them CSEed. */
8444 if (TARGET_64BIT && mode == DImode
8445 && immediate_operand (op1, mode)
8446 && !x86_64_zero_extended_value (op1)
8447 && !register_operand (op0, mode)
8448 && optimize && !reload_completed && !reload_in_progress)
8449 op1 = copy_to_mode_reg (mode, op1);
8451 if (FLOAT_MODE_P (mode))
8453 /* If we are loading a floating point constant to a register,
8454 force the value to memory now, since we'll get better code
8455 out the back end. */
8459 else if (GET_CODE (op1) == CONST_DOUBLE)
8461 op1 = validize_mem (force_const_mem (mode, op1));
8462 if (!register_operand (op0, mode))
8464 rtx temp = gen_reg_rtx (mode);
8465 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8466 emit_move_insn (op0, temp);
8473 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8477 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8479 /* Force constants other than zero into memory. We do not know how
8480 the instructions used to build constants modify the upper 64 bits
8481 of the register, once we have that information we may be able
8482 to handle some of them more efficiently. */
8483 if ((reload_in_progress | reload_completed) == 0
8484 && register_operand (operands[0], mode)
8485 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8486 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8488 /* Make operand1 a register if it isn't already. */
8490 && !register_operand (operands[0], mode)
8491 && !register_operand (operands[1], mode))
8493 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8494 emit_move_insn (operands[0], temp);
8498 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8501 /* Attempt to expand a binary operator. Make the expansion closer to the
8502 actual machine, then just general_operand, which will allow 3 separate
8503 memory references (one output, two input) in a single insn. */
8506 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8509 int matching_memory;
8510 rtx src1, src2, dst, op, clob;
8516 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8517 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8518 && (rtx_equal_p (dst, src2)
8519 || immediate_operand (src1, mode)))
8526 /* If the destination is memory, and we do not have matching source
8527 operands, do things in registers. */
8528 matching_memory = 0;
8529 if (GET_CODE (dst) == MEM)
8531 if (rtx_equal_p (dst, src1))
8532 matching_memory = 1;
8533 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8534 && rtx_equal_p (dst, src2))
8535 matching_memory = 2;
8537 dst = gen_reg_rtx (mode);
8540 /* Both source operands cannot be in memory. */
8541 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8543 if (matching_memory != 2)
8544 src2 = force_reg (mode, src2);
8546 src1 = force_reg (mode, src1);
8549 /* If the operation is not commutable, source 1 cannot be a constant
8550 or non-matching memory. */
8551 if ((CONSTANT_P (src1)
8552 || (!matching_memory && GET_CODE (src1) == MEM))
8553 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8554 src1 = force_reg (mode, src1);
8556 /* If optimizing, copy to regs to improve CSE */
8557 if (optimize && ! no_new_pseudos)
8559 if (GET_CODE (dst) == MEM)
8560 dst = gen_reg_rtx (mode);
8561 if (GET_CODE (src1) == MEM)
8562 src1 = force_reg (mode, src1);
8563 if (GET_CODE (src2) == MEM)
8564 src2 = force_reg (mode, src2);
8567 /* Emit the instruction. */
8569 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8570 if (reload_in_progress)
8572 /* Reload doesn't know about the flags register, and doesn't know that
8573 it doesn't want to clobber it. We can only do this with PLUS. */
8580 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8581 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8584 /* Fix up the destination if needed. */
8585 if (dst != operands[0])
8586 emit_move_insn (operands[0], dst);
8589 /* Return TRUE or FALSE depending on whether the binary operator meets the
8590 appropriate constraints. */
8593 ix86_binary_operator_ok (enum rtx_code code,
8594 enum machine_mode mode ATTRIBUTE_UNUSED,
8597 /* Both source operands cannot be in memory. */
8598 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8600 /* If the operation is not commutable, source 1 cannot be a constant. */
8601 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8603 /* If the destination is memory, we must have a matching source operand. */
8604 if (GET_CODE (operands[0]) == MEM
8605 && ! (rtx_equal_p (operands[0], operands[1])
8606 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8607 && rtx_equal_p (operands[0], operands[2]))))
8609 /* If the operation is not commutable and the source 1 is memory, we must
8610 have a matching destination. */
8611 if (GET_CODE (operands[1]) == MEM
8612 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8613 && ! rtx_equal_p (operands[0], operands[1]))
8618 /* Attempt to expand a unary operator. Make the expansion closer to the
8619 actual machine, then just general_operand, which will allow 2 separate
8620 memory references (one output, one input) in a single insn. */
8623 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8626 int matching_memory;
8627 rtx src, dst, op, clob;
8632 /* If the destination is memory, and we do not have matching source
8633 operands, do things in registers. */
8634 matching_memory = 0;
8635 if (GET_CODE (dst) == MEM)
8637 if (rtx_equal_p (dst, src))
8638 matching_memory = 1;
8640 dst = gen_reg_rtx (mode);
8643 /* When source operand is memory, destination must match. */
8644 if (!matching_memory && GET_CODE (src) == MEM)
8645 src = force_reg (mode, src);
8647 /* If optimizing, copy to regs to improve CSE */
8648 if (optimize && ! no_new_pseudos)
8650 if (GET_CODE (dst) == MEM)
8651 dst = gen_reg_rtx (mode);
8652 if (GET_CODE (src) == MEM)
8653 src = force_reg (mode, src);
8656 /* Emit the instruction. */
8658 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8659 if (reload_in_progress || code == NOT)
8661 /* Reload doesn't know about the flags register, and doesn't know that
8662 it doesn't want to clobber it. */
8669 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8670 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8673 /* Fix up the destination if needed. */
8674 if (dst != operands[0])
8675 emit_move_insn (operands[0], dst);
8678 /* Return TRUE or FALSE depending on whether the unary operator meets the
8679 appropriate constraints. */
8682 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8683 enum machine_mode mode ATTRIBUTE_UNUSED,
8684 rtx operands[2] ATTRIBUTE_UNUSED)
8686 /* If one of operands is memory, source and destination must match. */
8687 if ((GET_CODE (operands[0]) == MEM
8688 || GET_CODE (operands[1]) == MEM)
8689 && ! rtx_equal_p (operands[0], operands[1]))
8694 /* Return TRUE or FALSE depending on whether the first SET in INSN
8695 has source and destination with matching CC modes, and that the
8696 CC mode is at least as constrained as REQ_MODE. */
8699 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8702 enum machine_mode set_mode;
8704 set = PATTERN (insn);
8705 if (GET_CODE (set) == PARALLEL)
8706 set = XVECEXP (set, 0, 0);
8707 if (GET_CODE (set) != SET)
8709 if (GET_CODE (SET_SRC (set)) != COMPARE)
8712 set_mode = GET_MODE (SET_DEST (set));
8716 if (req_mode != CCNOmode
8717 && (req_mode != CCmode
8718 || XEXP (SET_SRC (set), 1) != const0_rtx))
8722 if (req_mode == CCGCmode)
8726 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8730 if (req_mode == CCZmode)
8740 return (GET_MODE (SET_SRC (set)) == set_mode);
8743 /* Generate insn patterns to do an integer compare of OPERANDS. */
8746 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8748 enum machine_mode cmpmode;
8751 cmpmode = SELECT_CC_MODE (code, op0, op1);
8752 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8754 /* This is very simple, but making the interface the same as in the
8755 FP case makes the rest of the code easier. */
8756 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8757 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8759 /* Return the test that should be put into the flags user, i.e.
8760 the bcc, scc, or cmov instruction. */
8761 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8764 /* Figure out whether to use ordered or unordered fp comparisons.
8765 Return the appropriate mode to use. */
8768 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8770 /* ??? In order to make all comparisons reversible, we do all comparisons
8771 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8772 all forms trapping and nontrapping comparisons, we can make inequality
8773 comparisons trapping again, since it results in better code when using
8774 FCOM based compares. */
8775 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8779 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8781 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8782 return ix86_fp_compare_mode (code);
8785 /* Only zero flag is needed. */
8787 case NE: /* ZF!=0 */
8789 /* Codes needing carry flag. */
8790 case GEU: /* CF=0 */
8791 case GTU: /* CF=0 & ZF=0 */
8792 case LTU: /* CF=1 */
8793 case LEU: /* CF=1 | ZF=1 */
8795 /* Codes possibly doable only with sign flag when
8796 comparing against zero. */
8797 case GE: /* SF=OF or SF=0 */
8798 case LT: /* SF<>OF or SF=1 */
8799 if (op1 == const0_rtx)
8802 /* For other cases Carry flag is not required. */
8804 /* Codes doable only with sign flag when comparing
8805 against zero, but we miss jump instruction for it
8806 so we need to use relational tests against overflow
8807 that thus needs to be zero. */
8808 case GT: /* ZF=0 & SF=OF */
8809 case LE: /* ZF=1 | SF<>OF */
8810 if (op1 == const0_rtx)
8814 /* strcmp pattern do (use flags) and combine may ask us for proper
8823 /* Return the fixed registers used for condition codes. */
8826 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8833 /* If two condition code modes are compatible, return a condition code
8834 mode which is compatible with both. Otherwise, return
8837 static enum machine_mode
8838 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8843 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8846 if ((m1 == CCGCmode && m2 == CCGOCmode)
8847 || (m1 == CCGOCmode && m2 == CCGCmode))
8875 /* These are only compatible with themselves, which we already
8881 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8884 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8886 enum rtx_code swapped_code = swap_condition (code);
8887 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8888 || (ix86_fp_comparison_cost (swapped_code)
8889 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8892 /* Swap, force into registers, or otherwise massage the two operands
8893 to a fp comparison. The operands are updated in place; the new
8894 comparison code is returned. */
8896 static enum rtx_code
8897 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8899 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8900 rtx op0 = *pop0, op1 = *pop1;
8901 enum machine_mode op_mode = GET_MODE (op0);
8902 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8904 /* All of the unordered compare instructions only work on registers.
8905 The same is true of the XFmode compare instructions. The same is
8906 true of the fcomi compare instructions. */
8909 && (fpcmp_mode == CCFPUmode
8910 || op_mode == XFmode
8911 || ix86_use_fcomi_compare (code)))
8913 op0 = force_reg (op_mode, op0);
8914 op1 = force_reg (op_mode, op1);
8918 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8919 things around if they appear profitable, otherwise force op0
8922 if (standard_80387_constant_p (op0) == 0
8923 || (GET_CODE (op0) == MEM
8924 && ! (standard_80387_constant_p (op1) == 0
8925 || GET_CODE (op1) == MEM)))
8928 tmp = op0, op0 = op1, op1 = tmp;
8929 code = swap_condition (code);
8932 if (GET_CODE (op0) != REG)
8933 op0 = force_reg (op_mode, op0);
8935 if (CONSTANT_P (op1))
8937 if (standard_80387_constant_p (op1))
8938 op1 = force_reg (op_mode, op1);
8940 op1 = validize_mem (force_const_mem (op_mode, op1));
8944 /* Try to rearrange the comparison to make it cheaper. */
8945 if (ix86_fp_comparison_cost (code)
8946 > ix86_fp_comparison_cost (swap_condition (code))
8947 && (GET_CODE (op1) == REG || !no_new_pseudos))
8950 tmp = op0, op0 = op1, op1 = tmp;
8951 code = swap_condition (code);
8952 if (GET_CODE (op0) != REG)
8953 op0 = force_reg (op_mode, op0);
8961 /* Convert comparison codes we use to represent FP comparison to integer
8962 code that will result in proper branch. Return UNKNOWN if no such code
8964 static enum rtx_code
8965 ix86_fp_compare_code_to_integer (enum rtx_code code)
8994 /* Split comparison code CODE into comparisons we can do using branch
8995 instructions. BYPASS_CODE is comparison code for branch that will
8996 branch around FIRST_CODE and SECOND_CODE. If some of branches
8997 is not required, set value to NIL.
8998 We never require more than two branches. */
9000 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9001 enum rtx_code *first_code,
9002 enum rtx_code *second_code)
9008 /* The fcomi comparison sets flags as follows:
9018 case GT: /* GTU - CF=0 & ZF=0 */
9019 case GE: /* GEU - CF=0 */
9020 case ORDERED: /* PF=0 */
9021 case UNORDERED: /* PF=1 */
9022 case UNEQ: /* EQ - ZF=1 */
9023 case UNLT: /* LTU - CF=1 */
9024 case UNLE: /* LEU - CF=1 | ZF=1 */
9025 case LTGT: /* EQ - ZF=0 */
9027 case LT: /* LTU - CF=1 - fails on unordered */
9029 *bypass_code = UNORDERED;
9031 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9033 *bypass_code = UNORDERED;
9035 case EQ: /* EQ - ZF=1 - fails on unordered */
9037 *bypass_code = UNORDERED;
9039 case NE: /* NE - ZF=0 - fails on unordered */
9041 *second_code = UNORDERED;
9043 case UNGE: /* GEU - CF=0 - fails on unordered */
9045 *second_code = UNORDERED;
9047 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9049 *second_code = UNORDERED;
9054 if (!TARGET_IEEE_FP)
9061 /* Return cost of comparison done fcom + arithmetics operations on AX.
9062 All following functions do use number of instructions as a cost metrics.
9063 In future this should be tweaked to compute bytes for optimize_size and
9064 take into account performance of various instructions on various CPUs. */
9066 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9068 if (!TARGET_IEEE_FP)
9070 /* The cost of code output by ix86_expand_fp_compare. */
9098 /* Return cost of comparison done using fcomi operation.
9099 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9101 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9103 enum rtx_code bypass_code, first_code, second_code;
9104 /* Return arbitrarily high cost when instruction is not supported - this
9105 prevents gcc from using it. */
9108 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9109 return (bypass_code != NIL || second_code != NIL) + 2;
9112 /* Return cost of comparison done using sahf operation.
9113 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9115 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9117 enum rtx_code bypass_code, first_code, second_code;
9118 /* Return arbitrarily high cost when instruction is not preferred - this
9119 avoids gcc from using it. */
9120 if (!TARGET_USE_SAHF && !optimize_size)
9122 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9123 return (bypass_code != NIL || second_code != NIL) + 3;
9126 /* Compute cost of the comparison done using any method.
9127 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9129 ix86_fp_comparison_cost (enum rtx_code code)
9131 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9134 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9135 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9137 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9138 if (min > sahf_cost)
9140 if (min > fcomi_cost)
9145 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9148 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9149 rtx *second_test, rtx *bypass_test)
9151 enum machine_mode fpcmp_mode, intcmp_mode;
9153 int cost = ix86_fp_comparison_cost (code);
9154 enum rtx_code bypass_code, first_code, second_code;
9156 fpcmp_mode = ix86_fp_compare_mode (code);
9157 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9160 *second_test = NULL_RTX;
9162 *bypass_test = NULL_RTX;
9164 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9166 /* Do fcomi/sahf based test when profitable. */
9167 if ((bypass_code == NIL || bypass_test)
9168 && (second_code == NIL || second_test)
9169 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9173 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9174 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9180 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9181 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9183 scratch = gen_reg_rtx (HImode);
9184 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9185 emit_insn (gen_x86_sahf_1 (scratch));
9188 /* The FP codes work out to act like unsigned. */
9189 intcmp_mode = fpcmp_mode;
9191 if (bypass_code != NIL)
9192 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9193 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9195 if (second_code != NIL)
9196 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9197 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9202 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9203 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9204 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9206 scratch = gen_reg_rtx (HImode);
9207 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9209 /* In the unordered case, we have to check C2 for NaN's, which
9210 doesn't happen to work out to anything nice combination-wise.
9211 So do some bit twiddling on the value we've got in AH to come
9212 up with an appropriate set of condition codes. */
9214 intcmp_mode = CCNOmode;
9219 if (code == GT || !TARGET_IEEE_FP)
9221 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9226 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9227 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9228 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9229 intcmp_mode = CCmode;
9235 if (code == LT && TARGET_IEEE_FP)
9237 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9238 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9239 intcmp_mode = CCmode;
9244 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9250 if (code == GE || !TARGET_IEEE_FP)
9252 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9257 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9258 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9265 if (code == LE && TARGET_IEEE_FP)
9267 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9268 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9269 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9270 intcmp_mode = CCmode;
9275 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9281 if (code == EQ && TARGET_IEEE_FP)
9283 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9284 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9285 intcmp_mode = CCmode;
9290 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9297 if (code == NE && TARGET_IEEE_FP)
9299 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9300 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9306 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9312 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9316 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9325 /* Return the test that should be put into the flags user, i.e.
9326 the bcc, scc, or cmov instruction. */
9327 return gen_rtx_fmt_ee (code, VOIDmode,
9328 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9333 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9336 op0 = ix86_compare_op0;
9337 op1 = ix86_compare_op1;
9340 *second_test = NULL_RTX;
9342 *bypass_test = NULL_RTX;
9344 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9345 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9346 second_test, bypass_test);
9348 ret = ix86_expand_int_compare (code, op0, op1);
9353 /* Return true if the CODE will result in nontrivial jump sequence. */
9355 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9357 enum rtx_code bypass_code, first_code, second_code;
9360 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9361 return bypass_code != NIL || second_code != NIL;
9365 ix86_expand_branch (enum rtx_code code, rtx label)
9369 switch (GET_MODE (ix86_compare_op0))
9375 tmp = ix86_expand_compare (code, NULL, NULL);
9376 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9377 gen_rtx_LABEL_REF (VOIDmode, label),
9379 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9388 enum rtx_code bypass_code, first_code, second_code;
9390 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9393 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9395 /* Check whether we will use the natural sequence with one jump. If
9396 so, we can expand jump early. Otherwise delay expansion by
9397 creating compound insn to not confuse optimizers. */
9398 if (bypass_code == NIL && second_code == NIL
9401 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9402 gen_rtx_LABEL_REF (VOIDmode, label),
9407 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9408 ix86_compare_op0, ix86_compare_op1);
9409 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9410 gen_rtx_LABEL_REF (VOIDmode, label),
9412 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9414 use_fcomi = ix86_use_fcomi_compare (code);
9415 vec = rtvec_alloc (3 + !use_fcomi);
9416 RTVEC_ELT (vec, 0) = tmp;
9418 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9420 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9423 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9425 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9433 /* Expand DImode branch into multiple compare+branch. */
9435 rtx lo[2], hi[2], label2;
9436 enum rtx_code code1, code2, code3;
9438 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9440 tmp = ix86_compare_op0;
9441 ix86_compare_op0 = ix86_compare_op1;
9442 ix86_compare_op1 = tmp;
9443 code = swap_condition (code);
9445 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9446 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9448 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9449 avoid two branches. This costs one extra insn, so disable when
9450 optimizing for size. */
9452 if ((code == EQ || code == NE)
9454 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9459 if (hi[1] != const0_rtx)
9460 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9461 NULL_RTX, 0, OPTAB_WIDEN);
9464 if (lo[1] != const0_rtx)
9465 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9466 NULL_RTX, 0, OPTAB_WIDEN);
9468 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9469 NULL_RTX, 0, OPTAB_WIDEN);
9471 ix86_compare_op0 = tmp;
9472 ix86_compare_op1 = const0_rtx;
9473 ix86_expand_branch (code, label);
9477 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9478 op1 is a constant and the low word is zero, then we can just
9479 examine the high word. */
9481 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9484 case LT: case LTU: case GE: case GEU:
9485 ix86_compare_op0 = hi[0];
9486 ix86_compare_op1 = hi[1];
9487 ix86_expand_branch (code, label);
9493 /* Otherwise, we need two or three jumps. */
9495 label2 = gen_label_rtx ();
9498 code2 = swap_condition (code);
9499 code3 = unsigned_condition (code);
9503 case LT: case GT: case LTU: case GTU:
9506 case LE: code1 = LT; code2 = GT; break;
9507 case GE: code1 = GT; code2 = LT; break;
9508 case LEU: code1 = LTU; code2 = GTU; break;
9509 case GEU: code1 = GTU; code2 = LTU; break;
9511 case EQ: code1 = NIL; code2 = NE; break;
9512 case NE: code2 = NIL; break;
9520 * if (hi(a) < hi(b)) goto true;
9521 * if (hi(a) > hi(b)) goto false;
9522 * if (lo(a) < lo(b)) goto true;
9526 ix86_compare_op0 = hi[0];
9527 ix86_compare_op1 = hi[1];
9530 ix86_expand_branch (code1, label);
9532 ix86_expand_branch (code2, label2);
9534 ix86_compare_op0 = lo[0];
9535 ix86_compare_op1 = lo[1];
9536 ix86_expand_branch (code3, label);
9539 emit_label (label2);
9548 /* Split branch based on floating point condition. */
9550 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9551 rtx target1, rtx target2, rtx tmp)
9554 rtx label = NULL_RTX;
9556 int bypass_probability = -1, second_probability = -1, probability = -1;
9559 if (target2 != pc_rtx)
9562 code = reverse_condition_maybe_unordered (code);
9567 condition = ix86_expand_fp_compare (code, op1, op2,
9568 tmp, &second, &bypass);
9570 if (split_branch_probability >= 0)
9572 /* Distribute the probabilities across the jumps.
9573 Assume the BYPASS and SECOND to be always test
9575 probability = split_branch_probability;
9577 /* Value of 1 is low enough to make no need for probability
9578 to be updated. Later we may run some experiments and see
9579 if unordered values are more frequent in practice. */
9581 bypass_probability = 1;
9583 second_probability = 1;
9585 if (bypass != NULL_RTX)
9587 label = gen_label_rtx ();
9588 i = emit_jump_insn (gen_rtx_SET
9590 gen_rtx_IF_THEN_ELSE (VOIDmode,
9592 gen_rtx_LABEL_REF (VOIDmode,
9595 if (bypass_probability >= 0)
9597 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9598 GEN_INT (bypass_probability),
9601 i = emit_jump_insn (gen_rtx_SET
9603 gen_rtx_IF_THEN_ELSE (VOIDmode,
9604 condition, target1, target2)));
9605 if (probability >= 0)
9607 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9608 GEN_INT (probability),
9610 if (second != NULL_RTX)
9612 i = emit_jump_insn (gen_rtx_SET
9614 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9616 if (second_probability >= 0)
9618 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9619 GEN_INT (second_probability),
9622 if (label != NULL_RTX)
9627 ix86_expand_setcc (enum rtx_code code, rtx dest)
9629 rtx ret, tmp, tmpreg, equiv;
9630 rtx second_test, bypass_test;
9632 if (GET_MODE (ix86_compare_op0) == DImode
9634 return 0; /* FAIL */
9636 if (GET_MODE (dest) != QImode)
9639 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9640 PUT_MODE (ret, QImode);
9645 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9646 if (bypass_test || second_test)
9648 rtx test = second_test;
9650 rtx tmp2 = gen_reg_rtx (QImode);
9657 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9659 PUT_MODE (test, QImode);
9660 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9663 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9665 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9668 /* Attach a REG_EQUAL note describing the comparison result. */
9669 equiv = simplify_gen_relational (code, QImode,
9670 GET_MODE (ix86_compare_op0),
9671 ix86_compare_op0, ix86_compare_op1);
9672 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9674 return 1; /* DONE */
9677 /* Expand comparison setting or clearing carry flag. Return true when
9678 successful and set pop for the operation. */
9680 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9682 enum machine_mode mode =
9683 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9685 /* Do not handle DImode compares that go trought special path. Also we can't
9686 deal with FP compares yet. This is possible to add. */
9687 if ((mode == DImode && !TARGET_64BIT))
9689 if (FLOAT_MODE_P (mode))
9691 rtx second_test = NULL, bypass_test = NULL;
9692 rtx compare_op, compare_seq;
9694 /* Shortcut: following common codes never translate into carry flag compares. */
9695 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9696 || code == ORDERED || code == UNORDERED)
9699 /* These comparisons require zero flag; swap operands so they won't. */
9700 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9706 code = swap_condition (code);
9709 /* Try to expand the comparison and verify that we end up with carry flag
9710 based comparison. This is fails to be true only when we decide to expand
9711 comparison using arithmetic that is not too common scenario. */
9713 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9714 &second_test, &bypass_test);
9715 compare_seq = get_insns ();
9718 if (second_test || bypass_test)
9720 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9721 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9722 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9724 code = GET_CODE (compare_op);
9725 if (code != LTU && code != GEU)
9727 emit_insn (compare_seq);
9731 if (!INTEGRAL_MODE_P (mode))
9739 /* Convert a==0 into (unsigned)a<1. */
9742 if (op1 != const0_rtx)
9745 code = (code == EQ ? LTU : GEU);
9748 /* Convert a>b into b<a or a>=b-1. */
9751 if (GET_CODE (op1) == CONST_INT)
9753 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9754 /* Bail out on overflow. We still can swap operands but that
9755 would force loading of the constant into register. */
9756 if (op1 == const0_rtx
9757 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9759 code = (code == GTU ? GEU : LTU);
9766 code = (code == GTU ? LTU : GEU);
9770 /* Convert a>=0 into (unsigned)a<0x80000000. */
9773 if (mode == DImode || op1 != const0_rtx)
9775 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9776 code = (code == LT ? GEU : LTU);
9780 if (mode == DImode || op1 != constm1_rtx)
9782 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9783 code = (code == LE ? GEU : LTU);
9789 /* Swapping operands may cause constant to appear as first operand. */
9790 if (!nonimmediate_operand (op0, VOIDmode))
9794 op0 = force_reg (mode, op0);
9796 ix86_compare_op0 = op0;
9797 ix86_compare_op1 = op1;
9798 *pop = ix86_expand_compare (code, NULL, NULL);
9799 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9805 ix86_expand_int_movcc (rtx operands[])
9807 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9808 rtx compare_seq, compare_op;
9809 rtx second_test, bypass_test;
9810 enum machine_mode mode = GET_MODE (operands[0]);
9811 bool sign_bit_compare_p = false;;
9814 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9815 compare_seq = get_insns ();
9818 compare_code = GET_CODE (compare_op);
9820 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9821 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9822 sign_bit_compare_p = true;
9824 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9825 HImode insns, we'd be swallowed in word prefix ops. */
9827 if ((mode != HImode || TARGET_FAST_PREFIX)
9828 && (mode != DImode || TARGET_64BIT)
9829 && GET_CODE (operands[2]) == CONST_INT
9830 && GET_CODE (operands[3]) == CONST_INT)
9832 rtx out = operands[0];
9833 HOST_WIDE_INT ct = INTVAL (operands[2]);
9834 HOST_WIDE_INT cf = INTVAL (operands[3]);
9838 /* Sign bit compares are better done using shifts than we do by using
9840 if (sign_bit_compare_p
9841 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9842 ix86_compare_op1, &compare_op))
9844 /* Detect overlap between destination and compare sources. */
9847 if (!sign_bit_compare_p)
9851 compare_code = GET_CODE (compare_op);
9853 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9854 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9857 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9860 /* To simplify rest of code, restrict to the GEU case. */
9861 if (compare_code == LTU)
9863 HOST_WIDE_INT tmp = ct;
9866 compare_code = reverse_condition (compare_code);
9867 code = reverse_condition (code);
9872 PUT_CODE (compare_op,
9873 reverse_condition_maybe_unordered
9874 (GET_CODE (compare_op)));
9876 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9880 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9881 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9882 tmp = gen_reg_rtx (mode);
9885 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9887 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9891 if (code == GT || code == GE)
9892 code = reverse_condition (code);
9895 HOST_WIDE_INT tmp = ct;
9900 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9901 ix86_compare_op1, VOIDmode, 0, -1);
9914 tmp = expand_simple_binop (mode, PLUS,
9916 copy_rtx (tmp), 1, OPTAB_DIRECT);
9927 tmp = expand_simple_binop (mode, IOR,
9929 copy_rtx (tmp), 1, OPTAB_DIRECT);
9931 else if (diff == -1 && ct)
9941 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9943 tmp = expand_simple_binop (mode, PLUS,
9944 copy_rtx (tmp), GEN_INT (cf),
9945 copy_rtx (tmp), 1, OPTAB_DIRECT);
9953 * andl cf - ct, dest
9963 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9966 tmp = expand_simple_binop (mode, AND,
9968 gen_int_mode (cf - ct, mode),
9969 copy_rtx (tmp), 1, OPTAB_DIRECT);
9971 tmp = expand_simple_binop (mode, PLUS,
9972 copy_rtx (tmp), GEN_INT (ct),
9973 copy_rtx (tmp), 1, OPTAB_DIRECT);
9976 if (!rtx_equal_p (tmp, out))
9977 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9979 return 1; /* DONE */
9985 tmp = ct, ct = cf, cf = tmp;
9987 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9989 /* We may be reversing unordered compare to normal compare, that
9990 is not valid in general (we may convert non-trapping condition
9991 to trapping one), however on i386 we currently emit all
9992 comparisons unordered. */
9993 compare_code = reverse_condition_maybe_unordered (compare_code);
9994 code = reverse_condition_maybe_unordered (code);
9998 compare_code = reverse_condition (compare_code);
9999 code = reverse_condition (code);
10003 compare_code = NIL;
10004 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10005 && GET_CODE (ix86_compare_op1) == CONST_INT)
10007 if (ix86_compare_op1 == const0_rtx
10008 && (code == LT || code == GE))
10009 compare_code = code;
10010 else if (ix86_compare_op1 == constm1_rtx)
10014 else if (code == GT)
10019 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10020 if (compare_code != NIL
10021 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10022 && (cf == -1 || ct == -1))
10024 /* If lea code below could be used, only optimize
10025 if it results in a 2 insn sequence. */
10027 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10028 || diff == 3 || diff == 5 || diff == 9)
10029 || (compare_code == LT && ct == -1)
10030 || (compare_code == GE && cf == -1))
10033 * notl op1 (if necessary)
10041 code = reverse_condition (code);
10044 out = emit_store_flag (out, code, ix86_compare_op0,
10045 ix86_compare_op1, VOIDmode, 0, -1);
10047 out = expand_simple_binop (mode, IOR,
10049 out, 1, OPTAB_DIRECT);
10050 if (out != operands[0])
10051 emit_move_insn (operands[0], out);
10053 return 1; /* DONE */
10058 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10059 || diff == 3 || diff == 5 || diff == 9)
10060 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10061 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10067 * lea cf(dest*(ct-cf)),dest
10071 * This also catches the degenerate setcc-only case.
10077 out = emit_store_flag (out, code, ix86_compare_op0,
10078 ix86_compare_op1, VOIDmode, 0, 1);
10081 /* On x86_64 the lea instruction operates on Pmode, so we need
10082 to get arithmetics done in proper mode to match. */
10084 tmp = copy_rtx (out);
10088 out1 = copy_rtx (out);
10089 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10093 tmp = gen_rtx_PLUS (mode, tmp, out1);
10099 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10102 if (!rtx_equal_p (tmp, out))
10105 out = force_operand (tmp, copy_rtx (out));
10107 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10109 if (!rtx_equal_p (out, operands[0]))
10110 emit_move_insn (operands[0], copy_rtx (out));
10112 return 1; /* DONE */
10116 * General case: Jumpful:
10117 * xorl dest,dest cmpl op1, op2
10118 * cmpl op1, op2 movl ct, dest
10119 * setcc dest jcc 1f
10120 * decl dest movl cf, dest
10121 * andl (cf-ct),dest 1:
10124 * Size 20. Size 14.
10126 * This is reasonably steep, but branch mispredict costs are
10127 * high on modern cpus, so consider failing only if optimizing
10131 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10132 && BRANCH_COST >= 2)
10138 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10139 /* We may be reversing unordered compare to normal compare,
10140 that is not valid in general (we may convert non-trapping
10141 condition to trapping one), however on i386 we currently
10142 emit all comparisons unordered. */
10143 code = reverse_condition_maybe_unordered (code);
10146 code = reverse_condition (code);
10147 if (compare_code != NIL)
10148 compare_code = reverse_condition (compare_code);
10152 if (compare_code != NIL)
10154 /* notl op1 (if needed)
10159 For x < 0 (resp. x <= -1) there will be no notl,
10160 so if possible swap the constants to get rid of the
10162 True/false will be -1/0 while code below (store flag
10163 followed by decrement) is 0/-1, so the constants need
10164 to be exchanged once more. */
10166 if (compare_code == GE || !cf)
10168 code = reverse_condition (code);
10173 HOST_WIDE_INT tmp = cf;
10178 out = emit_store_flag (out, code, ix86_compare_op0,
10179 ix86_compare_op1, VOIDmode, 0, -1);
10183 out = emit_store_flag (out, code, ix86_compare_op0,
10184 ix86_compare_op1, VOIDmode, 0, 1);
10186 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10187 copy_rtx (out), 1, OPTAB_DIRECT);
10190 out = expand_simple_binop (mode, AND, copy_rtx (out),
10191 gen_int_mode (cf - ct, mode),
10192 copy_rtx (out), 1, OPTAB_DIRECT);
10194 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10195 copy_rtx (out), 1, OPTAB_DIRECT);
10196 if (!rtx_equal_p (out, operands[0]))
10197 emit_move_insn (operands[0], copy_rtx (out));
10199 return 1; /* DONE */
10203 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10205 /* Try a few things more with specific constants and a variable. */
10208 rtx var, orig_out, out, tmp;
10210 if (BRANCH_COST <= 2)
10211 return 0; /* FAIL */
10213 /* If one of the two operands is an interesting constant, load a
10214 constant with the above and mask it in with a logical operation. */
10216 if (GET_CODE (operands[2]) == CONST_INT)
10219 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10220 operands[3] = constm1_rtx, op = and_optab;
10221 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10222 operands[3] = const0_rtx, op = ior_optab;
10224 return 0; /* FAIL */
10226 else if (GET_CODE (operands[3]) == CONST_INT)
10229 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10230 operands[2] = constm1_rtx, op = and_optab;
10231 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10232 operands[2] = const0_rtx, op = ior_optab;
10234 return 0; /* FAIL */
10237 return 0; /* FAIL */
10239 orig_out = operands[0];
10240 tmp = gen_reg_rtx (mode);
10243 /* Recurse to get the constant loaded. */
10244 if (ix86_expand_int_movcc (operands) == 0)
10245 return 0; /* FAIL */
10247 /* Mask in the interesting variable. */
10248 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10250 if (!rtx_equal_p (out, orig_out))
10251 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10253 return 1; /* DONE */
10257 * For comparison with above,
10267 if (! nonimmediate_operand (operands[2], mode))
10268 operands[2] = force_reg (mode, operands[2]);
10269 if (! nonimmediate_operand (operands[3], mode))
10270 operands[3] = force_reg (mode, operands[3]);
10272 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10274 rtx tmp = gen_reg_rtx (mode);
10275 emit_move_insn (tmp, operands[3]);
10278 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10280 rtx tmp = gen_reg_rtx (mode);
10281 emit_move_insn (tmp, operands[2]);
10285 if (! register_operand (operands[2], VOIDmode)
10287 || ! register_operand (operands[3], VOIDmode)))
10288 operands[2] = force_reg (mode, operands[2]);
10291 && ! register_operand (operands[3], VOIDmode))
10292 operands[3] = force_reg (mode, operands[3]);
10294 emit_insn (compare_seq);
10295 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10296 gen_rtx_IF_THEN_ELSE (mode,
10297 compare_op, operands[2],
10300 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10301 gen_rtx_IF_THEN_ELSE (mode,
10303 copy_rtx (operands[3]),
10304 copy_rtx (operands[0]))));
10306 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10307 gen_rtx_IF_THEN_ELSE (mode,
10309 copy_rtx (operands[2]),
10310 copy_rtx (operands[0]))));
10312 return 1; /* DONE */
10316 ix86_expand_fp_movcc (rtx operands[])
10318 enum rtx_code code;
10320 rtx compare_op, second_test, bypass_test;
10322 /* For SF/DFmode conditional moves based on comparisons
10323 in same mode, we may want to use SSE min/max instructions. */
10324 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10325 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10326 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10327 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10328 && (!TARGET_IEEE_FP
10329 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10330 /* We may be called from the post-reload splitter. */
10331 && (!REG_P (operands[0])
10332 || SSE_REG_P (operands[0])
10333 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10335 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10336 code = GET_CODE (operands[1]);
10338 /* See if we have (cross) match between comparison operands and
10339 conditional move operands. */
10340 if (rtx_equal_p (operands[2], op1))
10345 code = reverse_condition_maybe_unordered (code);
10347 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10349 /* Check for min operation. */
10350 if (code == LT || code == UNLE)
10358 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10359 if (memory_operand (op0, VOIDmode))
10360 op0 = force_reg (GET_MODE (operands[0]), op0);
10361 if (GET_MODE (operands[0]) == SFmode)
10362 emit_insn (gen_minsf3 (operands[0], op0, op1));
10364 emit_insn (gen_mindf3 (operands[0], op0, op1));
10367 /* Check for max operation. */
10368 if (code == GT || code == UNGE)
10376 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10377 if (memory_operand (op0, VOIDmode))
10378 op0 = force_reg (GET_MODE (operands[0]), op0);
10379 if (GET_MODE (operands[0]) == SFmode)
10380 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10382 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10386 /* Manage condition to be sse_comparison_operator. In case we are
10387 in non-ieee mode, try to canonicalize the destination operand
10388 to be first in the comparison - this helps reload to avoid extra
10390 if (!sse_comparison_operator (operands[1], VOIDmode)
10391 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10393 rtx tmp = ix86_compare_op0;
10394 ix86_compare_op0 = ix86_compare_op1;
10395 ix86_compare_op1 = tmp;
10396 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10397 VOIDmode, ix86_compare_op0,
10400 /* Similarly try to manage result to be first operand of conditional
10401 move. We also don't support the NE comparison on SSE, so try to
10403 if ((rtx_equal_p (operands[0], operands[3])
10404 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10405 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10407 rtx tmp = operands[2];
10408 operands[2] = operands[3];
10410 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10411 (GET_CODE (operands[1])),
10412 VOIDmode, ix86_compare_op0,
10415 if (GET_MODE (operands[0]) == SFmode)
10416 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10417 operands[2], operands[3],
10418 ix86_compare_op0, ix86_compare_op1));
10420 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10421 operands[2], operands[3],
10422 ix86_compare_op0, ix86_compare_op1));
10426 /* The floating point conditional move instructions don't directly
10427 support conditions resulting from a signed integer comparison. */
10429 code = GET_CODE (operands[1]);
10430 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10432 /* The floating point conditional move instructions don't directly
10433 support signed integer comparisons. */
10435 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10437 if (second_test != NULL || bypass_test != NULL)
10439 tmp = gen_reg_rtx (QImode);
10440 ix86_expand_setcc (code, tmp);
10442 ix86_compare_op0 = tmp;
10443 ix86_compare_op1 = const0_rtx;
10444 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10446 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10448 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10449 emit_move_insn (tmp, operands[3]);
10452 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10454 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10455 emit_move_insn (tmp, operands[2]);
10459 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10460 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10465 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10466 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10471 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10472 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10480 /* Expand conditional increment or decrement using adb/sbb instructions.
10481 The default case using setcc followed by the conditional move can be
10482 done by generic code. */
10484 ix86_expand_int_addcc (rtx operands[])
10486 enum rtx_code code = GET_CODE (operands[1]);
10488 rtx val = const0_rtx;
10489 bool fpcmp = false;
10490 enum machine_mode mode = GET_MODE (operands[0]);
10492 if (operands[3] != const1_rtx
10493 && operands[3] != constm1_rtx)
10495 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10496 ix86_compare_op1, &compare_op))
10498 code = GET_CODE (compare_op);
10500 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10501 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10504 code = ix86_fp_compare_code_to_integer (code);
10511 PUT_CODE (compare_op,
10512 reverse_condition_maybe_unordered
10513 (GET_CODE (compare_op)));
10515 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10517 PUT_MODE (compare_op, mode);
10519 /* Construct either adc or sbb insn. */
10520 if ((code == LTU) == (operands[3] == constm1_rtx))
10522 switch (GET_MODE (operands[0]))
10525 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10528 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10531 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10534 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10542 switch (GET_MODE (operands[0]))
10545 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10548 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10551 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10554 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10560 return 1; /* DONE */
10564 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10565 works for floating pointer parameters and nonoffsetable memories.
10566 For pushes, it returns just stack offsets; the values will be saved
10567 in the right order. Maximally three parts are generated. */
10570 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10575 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10577 size = (GET_MODE_SIZE (mode) + 4) / 8;
10579 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10581 if (size < 2 || size > 3)
10584 /* Optimize constant pool reference to immediates. This is used by fp
10585 moves, that force all constants to memory to allow combining. */
10586 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10588 rtx tmp = maybe_get_pool_constant (operand);
10593 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10595 /* The only non-offsetable memories we handle are pushes. */
10596 if (! push_operand (operand, VOIDmode))
10599 operand = copy_rtx (operand);
10600 PUT_MODE (operand, Pmode);
10601 parts[0] = parts[1] = parts[2] = operand;
10603 else if (!TARGET_64BIT)
10605 if (mode == DImode)
10606 split_di (&operand, 1, &parts[0], &parts[1]);
10609 if (REG_P (operand))
10611 if (!reload_completed)
10613 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10614 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10616 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10618 else if (offsettable_memref_p (operand))
10620 operand = adjust_address (operand, SImode, 0);
10621 parts[0] = operand;
10622 parts[1] = adjust_address (operand, SImode, 4);
10624 parts[2] = adjust_address (operand, SImode, 8);
10626 else if (GET_CODE (operand) == CONST_DOUBLE)
10631 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10635 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10636 parts[2] = gen_int_mode (l[2], SImode);
10639 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10644 parts[1] = gen_int_mode (l[1], SImode);
10645 parts[0] = gen_int_mode (l[0], SImode);
10653 if (mode == TImode)
10654 split_ti (&operand, 1, &parts[0], &parts[1]);
10655 if (mode == XFmode || mode == TFmode)
10657 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10658 if (REG_P (operand))
10660 if (!reload_completed)
10662 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10663 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10665 else if (offsettable_memref_p (operand))
10667 operand = adjust_address (operand, DImode, 0);
10668 parts[0] = operand;
10669 parts[1] = adjust_address (operand, upper_mode, 8);
10671 else if (GET_CODE (operand) == CONST_DOUBLE)
10676 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10677 real_to_target (l, &r, mode);
10678 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10679 if (HOST_BITS_PER_WIDE_INT >= 64)
10682 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10683 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10686 parts[0] = immed_double_const (l[0], l[1], DImode);
10687 if (upper_mode == SImode)
10688 parts[1] = gen_int_mode (l[2], SImode);
10689 else if (HOST_BITS_PER_WIDE_INT >= 64)
10692 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10693 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10696 parts[1] = immed_double_const (l[2], l[3], DImode);
10706 /* Emit insns to perform a move or push of DI, DF, and XF values.
10707 Return false when normal moves are needed; true when all required
10708 insns have been emitted. Operands 2-4 contain the input values
10709 int the correct order; operands 5-7 contain the output values. */
10712 ix86_split_long_move (rtx operands[])
10717 int collisions = 0;
10718 enum machine_mode mode = GET_MODE (operands[0]);
10720 /* The DFmode expanders may ask us to move double.
10721 For 64bit target this is single move. By hiding the fact
10722 here we simplify i386.md splitters. */
10723 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10725 /* Optimize constant pool reference to immediates. This is used by
10726 fp moves, that force all constants to memory to allow combining. */
10728 if (GET_CODE (operands[1]) == MEM
10729 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10730 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10731 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10732 if (push_operand (operands[0], VOIDmode))
10734 operands[0] = copy_rtx (operands[0]);
10735 PUT_MODE (operands[0], Pmode);
10738 operands[0] = gen_lowpart (DImode, operands[0]);
10739 operands[1] = gen_lowpart (DImode, operands[1]);
10740 emit_move_insn (operands[0], operands[1]);
10744 /* The only non-offsettable memory we handle is push. */
10745 if (push_operand (operands[0], VOIDmode))
10747 else if (GET_CODE (operands[0]) == MEM
10748 && ! offsettable_memref_p (operands[0]))
10751 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10752 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10754 /* When emitting push, take care for source operands on the stack. */
10755 if (push && GET_CODE (operands[1]) == MEM
10756 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10759 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10760 XEXP (part[1][2], 0));
10761 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10762 XEXP (part[1][1], 0));
10765 /* We need to do copy in the right order in case an address register
10766 of the source overlaps the destination. */
10767 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10769 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10771 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10774 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10777 /* Collision in the middle part can be handled by reordering. */
10778 if (collisions == 1 && nparts == 3
10779 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10782 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10783 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10786 /* If there are more collisions, we can't handle it by reordering.
10787 Do an lea to the last part and use only one colliding move. */
10788 else if (collisions > 1)
10794 base = part[0][nparts - 1];
10796 /* Handle the case when the last part isn't valid for lea.
10797 Happens in 64-bit mode storing the 12-byte XFmode. */
10798 if (GET_MODE (base) != Pmode)
10799 base = gen_rtx_REG (Pmode, REGNO (base));
10801 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10802 part[1][0] = replace_equiv_address (part[1][0], base);
10803 part[1][1] = replace_equiv_address (part[1][1],
10804 plus_constant (base, UNITS_PER_WORD));
10806 part[1][2] = replace_equiv_address (part[1][2],
10807 plus_constant (base, 8));
10817 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10818 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10819 emit_move_insn (part[0][2], part[1][2]);
10824 /* In 64bit mode we don't have 32bit push available. In case this is
10825 register, it is OK - we will just use larger counterpart. We also
10826 retype memory - these comes from attempt to avoid REX prefix on
10827 moving of second half of TFmode value. */
10828 if (GET_MODE (part[1][1]) == SImode)
10830 if (GET_CODE (part[1][1]) == MEM)
10831 part[1][1] = adjust_address (part[1][1], DImode, 0);
10832 else if (REG_P (part[1][1]))
10833 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10836 if (GET_MODE (part[1][0]) == SImode)
10837 part[1][0] = part[1][1];
10840 emit_move_insn (part[0][1], part[1][1]);
10841 emit_move_insn (part[0][0], part[1][0]);
10845 /* Choose correct order to not overwrite the source before it is copied. */
10846 if ((REG_P (part[0][0])
10847 && REG_P (part[1][1])
10848 && (REGNO (part[0][0]) == REGNO (part[1][1])
10850 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10852 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10856 operands[2] = part[0][2];
10857 operands[3] = part[0][1];
10858 operands[4] = part[0][0];
10859 operands[5] = part[1][2];
10860 operands[6] = part[1][1];
10861 operands[7] = part[1][0];
10865 operands[2] = part[0][1];
10866 operands[3] = part[0][0];
10867 operands[5] = part[1][1];
10868 operands[6] = part[1][0];
10875 operands[2] = part[0][0];
10876 operands[3] = part[0][1];
10877 operands[4] = part[0][2];
10878 operands[5] = part[1][0];
10879 operands[6] = part[1][1];
10880 operands[7] = part[1][2];
10884 operands[2] = part[0][0];
10885 operands[3] = part[0][1];
10886 operands[5] = part[1][0];
10887 operands[6] = part[1][1];
10890 emit_move_insn (operands[2], operands[5]);
10891 emit_move_insn (operands[3], operands[6]);
10893 emit_move_insn (operands[4], operands[7]);
10899 ix86_split_ashldi (rtx *operands, rtx scratch)
10901 rtx low[2], high[2];
10904 if (GET_CODE (operands[2]) == CONST_INT)
10906 split_di (operands, 2, low, high);
10907 count = INTVAL (operands[2]) & 63;
10911 emit_move_insn (high[0], low[1]);
10912 emit_move_insn (low[0], const0_rtx);
10915 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10919 if (!rtx_equal_p (operands[0], operands[1]))
10920 emit_move_insn (operands[0], operands[1]);
10921 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10922 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10927 if (!rtx_equal_p (operands[0], operands[1]))
10928 emit_move_insn (operands[0], operands[1]);
10930 split_di (operands, 1, low, high);
10932 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10933 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10935 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10937 if (! no_new_pseudos)
10938 scratch = force_reg (SImode, const0_rtx);
10940 emit_move_insn (scratch, const0_rtx);
10942 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10946 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10951 ix86_split_ashrdi (rtx *operands, rtx scratch)
10953 rtx low[2], high[2];
10956 if (GET_CODE (operands[2]) == CONST_INT)
10958 split_di (operands, 2, low, high);
10959 count = INTVAL (operands[2]) & 63;
10963 emit_move_insn (high[0], high[1]);
10964 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10965 emit_move_insn (low[0], high[0]);
10968 else if (count >= 32)
10970 emit_move_insn (low[0], high[1]);
10972 if (! reload_completed)
10973 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10976 emit_move_insn (high[0], low[0]);
10977 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10981 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10985 if (!rtx_equal_p (operands[0], operands[1]))
10986 emit_move_insn (operands[0], operands[1]);
10987 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10988 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10993 if (!rtx_equal_p (operands[0], operands[1]))
10994 emit_move_insn (operands[0], operands[1]);
10996 split_di (operands, 1, low, high);
10998 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10999 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11001 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11003 if (! no_new_pseudos)
11004 scratch = gen_reg_rtx (SImode);
11005 emit_move_insn (scratch, high[0]);
11006 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11007 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11011 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11016 ix86_split_lshrdi (rtx *operands, rtx scratch)
11018 rtx low[2], high[2];
11021 if (GET_CODE (operands[2]) == CONST_INT)
11023 split_di (operands, 2, low, high);
11024 count = INTVAL (operands[2]) & 63;
11028 emit_move_insn (low[0], high[1]);
11029 emit_move_insn (high[0], const0_rtx);
11032 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11036 if (!rtx_equal_p (operands[0], operands[1]))
11037 emit_move_insn (operands[0], operands[1]);
11038 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11039 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11044 if (!rtx_equal_p (operands[0], operands[1]))
11045 emit_move_insn (operands[0], operands[1]);
11047 split_di (operands, 1, low, high);
11049 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11050 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11052 /* Heh. By reversing the arguments, we can reuse this pattern. */
11053 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11055 if (! no_new_pseudos)
11056 scratch = force_reg (SImode, const0_rtx);
11058 emit_move_insn (scratch, const0_rtx);
11060 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11064 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11068 /* Helper function for the string operations below. Dest VARIABLE whether
11069 it is aligned to VALUE bytes. If true, jump to the label. */
11071 ix86_expand_aligntest (rtx variable, int value)
11073 rtx label = gen_label_rtx ();
11074 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11075 if (GET_MODE (variable) == DImode)
11076 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11078 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11079 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11084 /* Adjust COUNTER by the VALUE. */
11086 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11088 if (GET_MODE (countreg) == DImode)
11089 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11091 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11094 /* Zero extend possibly SImode EXP to Pmode register. */
11096 ix86_zero_extend_to_Pmode (rtx exp)
11099 if (GET_MODE (exp) == VOIDmode)
11100 return force_reg (Pmode, exp);
11101 if (GET_MODE (exp) == Pmode)
11102 return copy_to_mode_reg (Pmode, exp);
11103 r = gen_reg_rtx (Pmode);
11104 emit_insn (gen_zero_extendsidi2 (r, exp));
11108 /* Expand string move (memcpy) operation. Use i386 string operations when
11109 profitable. expand_clrstr contains similar code. */
11111 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11113 rtx srcreg, destreg, countreg, srcexp, destexp;
11114 enum machine_mode counter_mode;
11115 HOST_WIDE_INT align = 0;
11116 unsigned HOST_WIDE_INT count = 0;
11118 if (GET_CODE (align_exp) == CONST_INT)
11119 align = INTVAL (align_exp);
11121 /* Can't use any of this if the user has appropriated esi or edi. */
11122 if (global_regs[4] || global_regs[5])
11125 /* This simple hack avoids all inlining code and simplifies code below. */
11126 if (!TARGET_ALIGN_STRINGOPS)
11129 if (GET_CODE (count_exp) == CONST_INT)
11131 count = INTVAL (count_exp);
11132 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11136 /* Figure out proper mode for counter. For 32bits it is always SImode,
11137 for 64bits use SImode when possible, otherwise DImode.
11138 Set count to number of bytes copied when known at compile time. */
11139 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11140 || x86_64_zero_extended_value (count_exp))
11141 counter_mode = SImode;
11143 counter_mode = DImode;
11145 if (counter_mode != SImode && counter_mode != DImode)
11148 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11149 if (destreg != XEXP (dst, 0))
11150 dst = replace_equiv_address_nv (dst, destreg);
11151 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11152 if (srcreg != XEXP (src, 0))
11153 src = replace_equiv_address_nv (src, srcreg);
11155 /* When optimizing for size emit simple rep ; movsb instruction for
11156 counts not divisible by 4. */
11158 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11160 emit_insn (gen_cld ());
11161 countreg = ix86_zero_extend_to_Pmode (count_exp);
11162 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11163 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11164 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11168 /* For constant aligned (or small unaligned) copies use rep movsl
11169 followed by code copying the rest. For PentiumPro ensure 8 byte
11170 alignment to allow rep movsl acceleration. */
11172 else if (count != 0
11174 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11175 || optimize_size || count < (unsigned int) 64))
11177 unsigned HOST_WIDE_INT offset = 0;
11178 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11179 rtx srcmem, dstmem;
11181 emit_insn (gen_cld ());
11182 if (count & ~(size - 1))
11184 countreg = copy_to_mode_reg (counter_mode,
11185 GEN_INT ((count >> (size == 4 ? 2 : 3))
11186 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11187 countreg = ix86_zero_extend_to_Pmode (countreg);
11189 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11190 GEN_INT (size == 4 ? 2 : 3));
11191 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11192 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11194 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11195 countreg, destexp, srcexp));
11196 offset = count & ~(size - 1);
11198 if (size == 8 && (count & 0x04))
11200 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11202 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11204 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11209 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11211 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11213 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11218 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11220 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11222 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11225 /* The generic code based on the glibc implementation:
11226 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11227 allowing accelerated copying there)
11228 - copy the data using rep movsl
11229 - copy the rest. */
11234 rtx srcmem, dstmem;
11235 int desired_alignment = (TARGET_PENTIUMPRO
11236 && (count == 0 || count >= (unsigned int) 260)
11237 ? 8 : UNITS_PER_WORD);
11238 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11239 dst = change_address (dst, BLKmode, destreg);
11240 src = change_address (src, BLKmode, srcreg);
11242 /* In case we don't know anything about the alignment, default to
11243 library version, since it is usually equally fast and result in
11246 Also emit call when we know that the count is large and call overhead
11247 will not be important. */
11248 if (!TARGET_INLINE_ALL_STRINGOPS
11249 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11252 if (TARGET_SINGLE_STRINGOP)
11253 emit_insn (gen_cld ());
11255 countreg2 = gen_reg_rtx (Pmode);
11256 countreg = copy_to_mode_reg (counter_mode, count_exp);
11258 /* We don't use loops to align destination and to copy parts smaller
11259 than 4 bytes, because gcc is able to optimize such code better (in
11260 the case the destination or the count really is aligned, gcc is often
11261 able to predict the branches) and also it is friendlier to the
11262 hardware branch prediction.
11264 Using loops is beneficial for generic case, because we can
11265 handle small counts using the loops. Many CPUs (such as Athlon)
11266 have large REP prefix setup costs.
11268 This is quite costly. Maybe we can revisit this decision later or
11269 add some customizability to this code. */
11271 if (count == 0 && align < desired_alignment)
11273 label = gen_label_rtx ();
11274 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11275 LEU, 0, counter_mode, 1, label);
11279 rtx label = ix86_expand_aligntest (destreg, 1);
11280 srcmem = change_address (src, QImode, srcreg);
11281 dstmem = change_address (dst, QImode, destreg);
11282 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11283 ix86_adjust_counter (countreg, 1);
11284 emit_label (label);
11285 LABEL_NUSES (label) = 1;
11289 rtx label = ix86_expand_aligntest (destreg, 2);
11290 srcmem = change_address (src, HImode, srcreg);
11291 dstmem = change_address (dst, HImode, destreg);
11292 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11293 ix86_adjust_counter (countreg, 2);
11294 emit_label (label);
11295 LABEL_NUSES (label) = 1;
11297 if (align <= 4 && desired_alignment > 4)
11299 rtx label = ix86_expand_aligntest (destreg, 4);
11300 srcmem = change_address (src, SImode, srcreg);
11301 dstmem = change_address (dst, SImode, destreg);
11302 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11303 ix86_adjust_counter (countreg, 4);
11304 emit_label (label);
11305 LABEL_NUSES (label) = 1;
11308 if (label && desired_alignment > 4 && !TARGET_64BIT)
11310 emit_label (label);
11311 LABEL_NUSES (label) = 1;
11314 if (!TARGET_SINGLE_STRINGOP)
11315 emit_insn (gen_cld ());
11318 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11320 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11324 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11325 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11327 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11328 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11329 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11330 countreg2, destexp, srcexp));
11334 emit_label (label);
11335 LABEL_NUSES (label) = 1;
11337 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11339 srcmem = change_address (src, SImode, srcreg);
11340 dstmem = change_address (dst, SImode, destreg);
11341 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11343 if ((align <= 4 || count == 0) && TARGET_64BIT)
11345 rtx label = ix86_expand_aligntest (countreg, 4);
11346 srcmem = change_address (src, SImode, srcreg);
11347 dstmem = change_address (dst, SImode, destreg);
11348 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11349 emit_label (label);
11350 LABEL_NUSES (label) = 1;
11352 if (align > 2 && count != 0 && (count & 2))
11354 srcmem = change_address (src, HImode, srcreg);
11355 dstmem = change_address (dst, HImode, destreg);
11356 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11358 if (align <= 2 || count == 0)
11360 rtx label = ix86_expand_aligntest (countreg, 2);
11361 srcmem = change_address (src, HImode, srcreg);
11362 dstmem = change_address (dst, HImode, destreg);
11363 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11364 emit_label (label);
11365 LABEL_NUSES (label) = 1;
11367 if (align > 1 && count != 0 && (count & 1))
11369 srcmem = change_address (src, QImode, srcreg);
11370 dstmem = change_address (dst, QImode, destreg);
11371 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11373 if (align <= 1 || count == 0)
11375 rtx label = ix86_expand_aligntest (countreg, 1);
11376 srcmem = change_address (src, QImode, srcreg);
11377 dstmem = change_address (dst, QImode, destreg);
11378 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11379 emit_label (label);
11380 LABEL_NUSES (label) = 1;
11387 /* Expand string clear operation (bzero). Use i386 string operations when
11388 profitable. expand_movstr contains similar code. */
11390 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11392 rtx destreg, zeroreg, countreg, destexp;
11393 enum machine_mode counter_mode;
11394 HOST_WIDE_INT align = 0;
11395 unsigned HOST_WIDE_INT count = 0;
11397 if (GET_CODE (align_exp) == CONST_INT)
11398 align = INTVAL (align_exp);
11400 /* Can't use any of this if the user has appropriated esi. */
11401 if (global_regs[4])
11404 /* This simple hack avoids all inlining code and simplifies code below. */
11405 if (!TARGET_ALIGN_STRINGOPS)
11408 if (GET_CODE (count_exp) == CONST_INT)
11410 count = INTVAL (count_exp);
11411 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11414 /* Figure out proper mode for counter. For 32bits it is always SImode,
11415 for 64bits use SImode when possible, otherwise DImode.
11416 Set count to number of bytes copied when known at compile time. */
11417 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11418 || x86_64_zero_extended_value (count_exp))
11419 counter_mode = SImode;
11421 counter_mode = DImode;
11423 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11424 if (destreg != XEXP (dst, 0))
11425 dst = replace_equiv_address_nv (dst, destreg);
11427 emit_insn (gen_cld ());
11429 /* When optimizing for size emit simple rep ; movsb instruction for
11430 counts not divisible by 4. */
11432 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11434 countreg = ix86_zero_extend_to_Pmode (count_exp);
11435 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11436 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11437 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11439 else if (count != 0
11441 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11442 || optimize_size || count < (unsigned int) 64))
11444 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11445 unsigned HOST_WIDE_INT offset = 0;
11447 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11448 if (count & ~(size - 1))
11450 countreg = copy_to_mode_reg (counter_mode,
11451 GEN_INT ((count >> (size == 4 ? 2 : 3))
11452 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11453 countreg = ix86_zero_extend_to_Pmode (countreg);
11454 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11455 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11456 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11457 offset = count & ~(size - 1);
11459 if (size == 8 && (count & 0x04))
11461 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11463 emit_insn (gen_strset (destreg, mem,
11464 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11469 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11471 emit_insn (gen_strset (destreg, mem,
11472 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11477 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11479 emit_insn (gen_strset (destreg, mem,
11480 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11487 /* Compute desired alignment of the string operation. */
11488 int desired_alignment = (TARGET_PENTIUMPRO
11489 && (count == 0 || count >= (unsigned int) 260)
11490 ? 8 : UNITS_PER_WORD);
11492 /* In case we don't know anything about the alignment, default to
11493 library version, since it is usually equally fast and result in
11496 Also emit call when we know that the count is large and call overhead
11497 will not be important. */
11498 if (!TARGET_INLINE_ALL_STRINGOPS
11499 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11502 if (TARGET_SINGLE_STRINGOP)
11503 emit_insn (gen_cld ());
11505 countreg2 = gen_reg_rtx (Pmode);
11506 countreg = copy_to_mode_reg (counter_mode, count_exp);
11507 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11508 /* Get rid of MEM_OFFSET, it won't be accurate. */
11509 dst = change_address (dst, BLKmode, destreg);
11511 if (count == 0 && align < desired_alignment)
11513 label = gen_label_rtx ();
11514 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11515 LEU, 0, counter_mode, 1, label);
11519 rtx label = ix86_expand_aligntest (destreg, 1);
11520 emit_insn (gen_strset (destreg, dst,
11521 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11522 ix86_adjust_counter (countreg, 1);
11523 emit_label (label);
11524 LABEL_NUSES (label) = 1;
11528 rtx label = ix86_expand_aligntest (destreg, 2);
11529 emit_insn (gen_strset (destreg, dst,
11530 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11531 ix86_adjust_counter (countreg, 2);
11532 emit_label (label);
11533 LABEL_NUSES (label) = 1;
11535 if (align <= 4 && desired_alignment > 4)
11537 rtx label = ix86_expand_aligntest (destreg, 4);
11538 emit_insn (gen_strset (destreg, dst,
11540 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11542 ix86_adjust_counter (countreg, 4);
11543 emit_label (label);
11544 LABEL_NUSES (label) = 1;
11547 if (label && desired_alignment > 4 && !TARGET_64BIT)
11549 emit_label (label);
11550 LABEL_NUSES (label) = 1;
11554 if (!TARGET_SINGLE_STRINGOP)
11555 emit_insn (gen_cld ());
11558 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11560 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11564 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11565 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11567 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11568 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11572 emit_label (label);
11573 LABEL_NUSES (label) = 1;
11576 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11577 emit_insn (gen_strset (destreg, dst,
11578 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11579 if (TARGET_64BIT && (align <= 4 || count == 0))
11581 rtx label = ix86_expand_aligntest (countreg, 4);
11582 emit_insn (gen_strset (destreg, dst,
11583 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11584 emit_label (label);
11585 LABEL_NUSES (label) = 1;
11587 if (align > 2 && count != 0 && (count & 2))
11588 emit_insn (gen_strset (destreg, dst,
11589 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11590 if (align <= 2 || count == 0)
11592 rtx label = ix86_expand_aligntest (countreg, 2);
11593 emit_insn (gen_strset (destreg, dst,
11594 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11595 emit_label (label);
11596 LABEL_NUSES (label) = 1;
11598 if (align > 1 && count != 0 && (count & 1))
11599 emit_insn (gen_strset (destreg, dst,
11600 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11601 if (align <= 1 || count == 0)
11603 rtx label = ix86_expand_aligntest (countreg, 1);
11604 emit_insn (gen_strset (destreg, dst,
11605 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11606 emit_label (label);
11607 LABEL_NUSES (label) = 1;
11613 /* Expand strlen. */
11615 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11617 rtx addr, scratch1, scratch2, scratch3, scratch4;
11619 /* The generic case of strlen expander is long. Avoid it's
11620 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11622 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11623 && !TARGET_INLINE_ALL_STRINGOPS
11625 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11628 addr = force_reg (Pmode, XEXP (src, 0));
11629 scratch1 = gen_reg_rtx (Pmode);
11631 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11634 /* Well it seems that some optimizer does not combine a call like
11635 foo(strlen(bar), strlen(bar));
11636 when the move and the subtraction is done here. It does calculate
11637 the length just once when these instructions are done inside of
11638 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11639 often used and I use one fewer register for the lifetime of
11640 output_strlen_unroll() this is better. */
11642 emit_move_insn (out, addr);
11644 ix86_expand_strlensi_unroll_1 (out, src, align);
11646 /* strlensi_unroll_1 returns the address of the zero at the end of
11647 the string, like memchr(), so compute the length by subtracting
11648 the start address. */
11650 emit_insn (gen_subdi3 (out, out, addr));
11652 emit_insn (gen_subsi3 (out, out, addr));
11657 scratch2 = gen_reg_rtx (Pmode);
11658 scratch3 = gen_reg_rtx (Pmode);
11659 scratch4 = force_reg (Pmode, constm1_rtx);
11661 emit_move_insn (scratch3, addr);
11662 eoschar = force_reg (QImode, eoschar);
11664 emit_insn (gen_cld ());
11665 src = replace_equiv_address_nv (src, scratch3);
11667 /* If .md starts supporting :P, this can be done in .md. */
11668 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11669 scratch4), UNSPEC_SCAS);
11670 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11673 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11674 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11678 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11679 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11685 /* Expand the appropriate insns for doing strlen if not just doing
11688 out = result, initialized with the start address
11689 align_rtx = alignment of the address.
11690 scratch = scratch register, initialized with the startaddress when
11691 not aligned, otherwise undefined
11693 This is just the body. It needs the initializations mentioned above and
11694 some address computing at the end. These things are done in i386.md. */
11697 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11701 rtx align_2_label = NULL_RTX;
11702 rtx align_3_label = NULL_RTX;
11703 rtx align_4_label = gen_label_rtx ();
11704 rtx end_0_label = gen_label_rtx ();
11706 rtx tmpreg = gen_reg_rtx (SImode);
11707 rtx scratch = gen_reg_rtx (SImode);
11711 if (GET_CODE (align_rtx) == CONST_INT)
11712 align = INTVAL (align_rtx);
11714 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11716 /* Is there a known alignment and is it less than 4? */
11719 rtx scratch1 = gen_reg_rtx (Pmode);
11720 emit_move_insn (scratch1, out);
11721 /* Is there a known alignment and is it not 2? */
11724 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11725 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11727 /* Leave just the 3 lower bits. */
11728 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11729 NULL_RTX, 0, OPTAB_WIDEN);
11731 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11732 Pmode, 1, align_4_label);
11733 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11734 Pmode, 1, align_2_label);
11735 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11736 Pmode, 1, align_3_label);
11740 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11741 check if is aligned to 4 - byte. */
11743 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11744 NULL_RTX, 0, OPTAB_WIDEN);
11746 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11747 Pmode, 1, align_4_label);
11750 mem = change_address (src, QImode, out);
11752 /* Now compare the bytes. */
11754 /* Compare the first n unaligned byte on a byte per byte basis. */
11755 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11756 QImode, 1, end_0_label);
11758 /* Increment the address. */
11760 emit_insn (gen_adddi3 (out, out, const1_rtx));
11762 emit_insn (gen_addsi3 (out, out, const1_rtx));
11764 /* Not needed with an alignment of 2 */
11767 emit_label (align_2_label);
11769 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11773 emit_insn (gen_adddi3 (out, out, const1_rtx));
11775 emit_insn (gen_addsi3 (out, out, const1_rtx));
11777 emit_label (align_3_label);
11780 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11784 emit_insn (gen_adddi3 (out, out, const1_rtx));
11786 emit_insn (gen_addsi3 (out, out, const1_rtx));
11789 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11790 align this loop. It gives only huge programs, but does not help to
11792 emit_label (align_4_label);
11794 mem = change_address (src, SImode, out);
11795 emit_move_insn (scratch, mem);
11797 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11799 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11801 /* This formula yields a nonzero result iff one of the bytes is zero.
11802 This saves three branches inside loop and many cycles. */
11804 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11805 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11806 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11807 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11808 gen_int_mode (0x80808080, SImode)));
11809 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11814 rtx reg = gen_reg_rtx (SImode);
11815 rtx reg2 = gen_reg_rtx (Pmode);
11816 emit_move_insn (reg, tmpreg);
11817 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11819 /* If zero is not in the first two bytes, move two bytes forward. */
11820 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11821 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11822 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11823 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11824 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11827 /* Emit lea manually to avoid clobbering of flags. */
11828 emit_insn (gen_rtx_SET (SImode, reg2,
11829 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11831 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11832 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11833 emit_insn (gen_rtx_SET (VOIDmode, out,
11834 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11841 rtx end_2_label = gen_label_rtx ();
11842 /* Is zero in the first two bytes? */
11844 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11845 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11846 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11847 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11848 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11850 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11851 JUMP_LABEL (tmp) = end_2_label;
11853 /* Not in the first two. Move two bytes forward. */
11854 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11856 emit_insn (gen_adddi3 (out, out, const2_rtx));
11858 emit_insn (gen_addsi3 (out, out, const2_rtx));
11860 emit_label (end_2_label);
11864 /* Avoid branch in fixing the byte. */
11865 tmpreg = gen_lowpart (QImode, tmpreg);
11866 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11867 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11869 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11871 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11873 emit_label (end_0_label);
11877 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11878 rtx callarg2 ATTRIBUTE_UNUSED,
11879 rtx pop, int sibcall)
11881 rtx use = NULL, call;
11883 if (pop == const0_rtx)
11885 if (TARGET_64BIT && pop)
11889 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11890 fnaddr = machopic_indirect_call_target (fnaddr);
11892 /* Static functions and indirect calls don't need the pic register. */
11893 if (! TARGET_64BIT && flag_pic
11894 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11895 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11896 use_reg (&use, pic_offset_table_rtx);
11898 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11900 rtx al = gen_rtx_REG (QImode, 0);
11901 emit_move_insn (al, callarg2);
11902 use_reg (&use, al);
11904 #endif /* TARGET_MACHO */
11906 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11908 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11909 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11911 if (sibcall && TARGET_64BIT
11912 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11915 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11916 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11917 emit_move_insn (fnaddr, addr);
11918 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11921 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11923 call = gen_rtx_SET (VOIDmode, retval, call);
11926 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11927 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11928 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11931 call = emit_call_insn (call);
11933 CALL_INSN_FUNCTION_USAGE (call) = use;
11937 /* Clear stack slot assignments remembered from previous functions.
11938 This is called from INIT_EXPANDERS once before RTL is emitted for each
11941 static struct machine_function *
11942 ix86_init_machine_status (void)
11944 struct machine_function *f;
11946 f = ggc_alloc_cleared (sizeof (struct machine_function));
11947 f->use_fast_prologue_epilogue_nregs = -1;
11952 /* Return a MEM corresponding to a stack slot with mode MODE.
11953 Allocate a new slot if necessary.
11955 The RTL for a function can have several slots available: N is
11956 which slot to use. */
11959 assign_386_stack_local (enum machine_mode mode, int n)
11961 struct stack_local_entry *s;
11963 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11966 for (s = ix86_stack_locals; s; s = s->next)
11967 if (s->mode == mode && s->n == n)
11970 s = (struct stack_local_entry *)
11971 ggc_alloc (sizeof (struct stack_local_entry));
11974 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11976 s->next = ix86_stack_locals;
11977 ix86_stack_locals = s;
11981 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11983 static GTY(()) rtx ix86_tls_symbol;
11985 ix86_tls_get_addr (void)
11988 if (!ix86_tls_symbol)
11990 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11991 (TARGET_GNU_TLS && !TARGET_64BIT)
11992 ? "___tls_get_addr"
11993 : "__tls_get_addr");
11996 return ix86_tls_symbol;
11999 /* Calculate the length of the memory address in the instruction
12000 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12003 memory_address_length (rtx addr)
12005 struct ix86_address parts;
12006 rtx base, index, disp;
12009 if (GET_CODE (addr) == PRE_DEC
12010 || GET_CODE (addr) == POST_INC
12011 || GET_CODE (addr) == PRE_MODIFY
12012 || GET_CODE (addr) == POST_MODIFY)
12015 if (! ix86_decompose_address (addr, &parts))
12019 index = parts.index;
12024 - esp as the base always wants an index,
12025 - ebp as the base always wants a displacement. */
12027 /* Register Indirect. */
12028 if (base && !index && !disp)
12030 /* esp (for its index) and ebp (for its displacement) need
12031 the two-byte modrm form. */
12032 if (addr == stack_pointer_rtx
12033 || addr == arg_pointer_rtx
12034 || addr == frame_pointer_rtx
12035 || addr == hard_frame_pointer_rtx)
12039 /* Direct Addressing. */
12040 else if (disp && !base && !index)
12045 /* Find the length of the displacement constant. */
12048 if (GET_CODE (disp) == CONST_INT
12049 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12055 /* ebp always wants a displacement. */
12056 else if (base == hard_frame_pointer_rtx)
12059 /* An index requires the two-byte modrm form.... */
12061 /* ...like esp, which always wants an index. */
12062 || base == stack_pointer_rtx
12063 || base == arg_pointer_rtx
12064 || base == frame_pointer_rtx)
12071 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12072 is set, expect that insn have 8bit immediate alternative. */
12074 ix86_attr_length_immediate_default (rtx insn, int shortform)
12078 extract_insn_cached (insn);
12079 for (i = recog_data.n_operands - 1; i >= 0; --i)
12080 if (CONSTANT_P (recog_data.operand[i]))
12085 && GET_CODE (recog_data.operand[i]) == CONST_INT
12086 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12090 switch (get_attr_mode (insn))
12101 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12106 fatal_insn ("unknown insn mode", insn);
12112 /* Compute default value for "length_address" attribute. */
12114 ix86_attr_length_address_default (rtx insn)
12118 if (get_attr_type (insn) == TYPE_LEA)
12120 rtx set = PATTERN (insn);
12121 if (GET_CODE (set) == SET)
12123 else if (GET_CODE (set) == PARALLEL
12124 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12125 set = XVECEXP (set, 0, 0);
12128 #ifdef ENABLE_CHECKING
12134 return memory_address_length (SET_SRC (set));
12137 extract_insn_cached (insn);
12138 for (i = recog_data.n_operands - 1; i >= 0; --i)
12139 if (GET_CODE (recog_data.operand[i]) == MEM)
12141 return memory_address_length (XEXP (recog_data.operand[i], 0));
12147 /* Return the maximum number of instructions a cpu can issue. */
12150 ix86_issue_rate (void)
12154 case PROCESSOR_PENTIUM:
12158 case PROCESSOR_PENTIUMPRO:
12159 case PROCESSOR_PENTIUM4:
12160 case PROCESSOR_ATHLON:
12162 case PROCESSOR_NOCONA:
12170 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12171 by DEP_INSN and nothing set by DEP_INSN. */
12174 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12178 /* Simplify the test for uninteresting insns. */
12179 if (insn_type != TYPE_SETCC
12180 && insn_type != TYPE_ICMOV
12181 && insn_type != TYPE_FCMOV
12182 && insn_type != TYPE_IBR)
12185 if ((set = single_set (dep_insn)) != 0)
12187 set = SET_DEST (set);
12190 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12191 && XVECLEN (PATTERN (dep_insn), 0) == 2
12192 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12193 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12195 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12196 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12201 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12204 /* This test is true if the dependent insn reads the flags but
12205 not any other potentially set register. */
12206 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12209 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12215 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12216 address with operands set by DEP_INSN. */
12219 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12223 if (insn_type == TYPE_LEA
12226 addr = PATTERN (insn);
12227 if (GET_CODE (addr) == SET)
12229 else if (GET_CODE (addr) == PARALLEL
12230 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12231 addr = XVECEXP (addr, 0, 0);
12234 addr = SET_SRC (addr);
12239 extract_insn_cached (insn);
12240 for (i = recog_data.n_operands - 1; i >= 0; --i)
12241 if (GET_CODE (recog_data.operand[i]) == MEM)
12243 addr = XEXP (recog_data.operand[i], 0);
12250 return modified_in_p (addr, dep_insn);
12254 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12256 enum attr_type insn_type, dep_insn_type;
12257 enum attr_memory memory;
12259 int dep_insn_code_number;
12261 /* Anti and output dependencies have zero cost on all CPUs. */
12262 if (REG_NOTE_KIND (link) != 0)
12265 dep_insn_code_number = recog_memoized (dep_insn);
12267 /* If we can't recognize the insns, we can't really do anything. */
12268 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12271 insn_type = get_attr_type (insn);
12272 dep_insn_type = get_attr_type (dep_insn);
12276 case PROCESSOR_PENTIUM:
12277 /* Address Generation Interlock adds a cycle of latency. */
12278 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12281 /* ??? Compares pair with jump/setcc. */
12282 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12285 /* Floating point stores require value to be ready one cycle earlier. */
12286 if (insn_type == TYPE_FMOV
12287 && get_attr_memory (insn) == MEMORY_STORE
12288 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12292 case PROCESSOR_PENTIUMPRO:
12293 memory = get_attr_memory (insn);
12295 /* INT->FP conversion is expensive. */
12296 if (get_attr_fp_int_src (dep_insn))
12299 /* There is one cycle extra latency between an FP op and a store. */
12300 if (insn_type == TYPE_FMOV
12301 && (set = single_set (dep_insn)) != NULL_RTX
12302 && (set2 = single_set (insn)) != NULL_RTX
12303 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12304 && GET_CODE (SET_DEST (set2)) == MEM)
12307 /* Show ability of reorder buffer to hide latency of load by executing
12308 in parallel with previous instruction in case
12309 previous instruction is not needed to compute the address. */
12310 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12311 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12313 /* Claim moves to take one cycle, as core can issue one load
12314 at time and the next load can start cycle later. */
12315 if (dep_insn_type == TYPE_IMOV
12316 || dep_insn_type == TYPE_FMOV)
12324 memory = get_attr_memory (insn);
12326 /* The esp dependency is resolved before the instruction is really
12328 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12329 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12332 /* INT->FP conversion is expensive. */
12333 if (get_attr_fp_int_src (dep_insn))
12336 /* Show ability of reorder buffer to hide latency of load by executing
12337 in parallel with previous instruction in case
12338 previous instruction is not needed to compute the address. */
12339 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12340 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12342 /* Claim moves to take one cycle, as core can issue one load
12343 at time and the next load can start cycle later. */
12344 if (dep_insn_type == TYPE_IMOV
12345 || dep_insn_type == TYPE_FMOV)
12354 case PROCESSOR_ATHLON:
12356 memory = get_attr_memory (insn);
12358 /* Show ability of reorder buffer to hide latency of load by executing
12359 in parallel with previous instruction in case
12360 previous instruction is not needed to compute the address. */
12361 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12362 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12364 enum attr_unit unit = get_attr_unit (insn);
12367 /* Because of the difference between the length of integer and
12368 floating unit pipeline preparation stages, the memory operands
12369 for floating point are cheaper.
12371 ??? For Athlon it the difference is most probably 2. */
12372 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12375 loadcost = TARGET_ATHLON ? 2 : 0;
12377 if (cost >= loadcost)
12391 ia32_use_dfa_pipeline_interface (void)
12394 || TARGET_PENTIUMPRO
12396 || TARGET_ATHLON_K8)
12401 /* How many alternative schedules to try. This should be as wide as the
12402 scheduling freedom in the DFA, but no wider. Making this value too
12403 large results extra work for the scheduler. */
12406 ia32_multipass_dfa_lookahead (void)
12408 if (ix86_tune == PROCESSOR_PENTIUM)
12411 if (ix86_tune == PROCESSOR_PENTIUMPRO
12412 || ix86_tune == PROCESSOR_K6)
12420 /* Compute the alignment given to a constant that is being placed in memory.
12421 EXP is the constant and ALIGN is the alignment that the object would
12423 The value of this function is used instead of that alignment to align
12427 ix86_constant_alignment (tree exp, int align)
12429 if (TREE_CODE (exp) == REAL_CST)
12431 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12433 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12436 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12437 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12438 return BITS_PER_WORD;
12443 /* Compute the alignment for a static variable.
12444 TYPE is the data type, and ALIGN is the alignment that
12445 the object would ordinarily have. The value of this function is used
12446 instead of that alignment to align the object. */
12449 ix86_data_alignment (tree type, int align)
12451 if (AGGREGATE_TYPE_P (type)
12452 && TYPE_SIZE (type)
12453 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12454 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12455 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12458 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12459 to 16byte boundary. */
12462 if (AGGREGATE_TYPE_P (type)
12463 && TYPE_SIZE (type)
12464 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12465 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12466 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12470 if (TREE_CODE (type) == ARRAY_TYPE)
12472 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12474 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12477 else if (TREE_CODE (type) == COMPLEX_TYPE)
12480 if (TYPE_MODE (type) == DCmode && align < 64)
12482 if (TYPE_MODE (type) == XCmode && align < 128)
12485 else if ((TREE_CODE (type) == RECORD_TYPE
12486 || TREE_CODE (type) == UNION_TYPE
12487 || TREE_CODE (type) == QUAL_UNION_TYPE)
12488 && TYPE_FIELDS (type))
12490 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12492 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12495 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12496 || TREE_CODE (type) == INTEGER_TYPE)
12498 if (TYPE_MODE (type) == DFmode && align < 64)
12500 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12507 /* Compute the alignment for a local variable.
12508 TYPE is the data type, and ALIGN is the alignment that
12509 the object would ordinarily have. The value of this macro is used
12510 instead of that alignment to align the object. */
12513 ix86_local_alignment (tree type, int align)
12515 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12516 to 16byte boundary. */
12519 if (AGGREGATE_TYPE_P (type)
12520 && TYPE_SIZE (type)
12521 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12522 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12523 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12526 if (TREE_CODE (type) == ARRAY_TYPE)
12528 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12530 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12533 else if (TREE_CODE (type) == COMPLEX_TYPE)
12535 if (TYPE_MODE (type) == DCmode && align < 64)
12537 if (TYPE_MODE (type) == XCmode && align < 128)
12540 else if ((TREE_CODE (type) == RECORD_TYPE
12541 || TREE_CODE (type) == UNION_TYPE
12542 || TREE_CODE (type) == QUAL_UNION_TYPE)
12543 && TYPE_FIELDS (type))
12545 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12547 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12550 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12551 || TREE_CODE (type) == INTEGER_TYPE)
12554 if (TYPE_MODE (type) == DFmode && align < 64)
12556 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12562 /* Emit RTL insns to initialize the variable parts of a trampoline.
12563 FNADDR is an RTX for the address of the function's pure code.
12564 CXT is an RTX for the static chain value for the function. */
12566 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12570 /* Compute offset from the end of the jmp to the target function. */
12571 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12572 plus_constant (tramp, 10),
12573 NULL_RTX, 1, OPTAB_DIRECT);
12574 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12575 gen_int_mode (0xb9, QImode));
12576 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12577 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12578 gen_int_mode (0xe9, QImode));
12579 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12584 /* Try to load address using shorter movl instead of movabs.
12585 We may want to support movq for kernel mode, but kernel does not use
12586 trampolines at the moment. */
12587 if (x86_64_zero_extended_value (fnaddr))
12589 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12590 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12591 gen_int_mode (0xbb41, HImode));
12592 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12593 gen_lowpart (SImode, fnaddr));
12598 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12599 gen_int_mode (0xbb49, HImode));
12600 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12604 /* Load static chain using movabs to r10. */
12605 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12606 gen_int_mode (0xba49, HImode));
12607 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12610 /* Jump to the r11 */
12611 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12612 gen_int_mode (0xff49, HImode));
12613 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12614 gen_int_mode (0xe3, QImode));
12616 if (offset > TRAMPOLINE_SIZE)
12620 #ifdef TRANSFER_FROM_TRAMPOLINE
12621 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12622 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12626 #define def_builtin(MASK, NAME, TYPE, CODE) \
12628 if ((MASK) & target_flags \
12629 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12630 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12631 NULL, NULL_TREE); \
12634 struct builtin_description
12636 const unsigned int mask;
12637 const enum insn_code icode;
12638 const char *const name;
12639 const enum ix86_builtins code;
12640 const enum rtx_code comparison;
12641 const unsigned int flag;
12644 static const struct builtin_description bdesc_comi[] =
12646 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12647 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12648 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12649 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12650 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12651 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12652 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12653 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12654 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12655 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12656 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12657 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12658 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12659 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12660 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12661 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12662 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12663 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12664 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12665 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12666 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12667 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12668 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12669 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12672 static const struct builtin_description bdesc_2arg[] =
12675 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12676 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12677 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12678 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12679 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12680 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12681 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12682 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12684 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12685 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12686 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12687 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12688 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12689 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12690 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12691 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12692 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12693 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12694 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12695 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12696 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12697 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12698 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12699 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12700 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12701 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12702 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12703 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12705 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12706 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12707 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12708 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12710 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12711 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12712 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12713 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12715 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12716 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12717 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12718 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12719 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12722 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12723 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12724 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12725 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12726 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12727 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12728 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12729 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12731 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12732 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12733 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12734 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12735 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12736 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12737 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12738 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12740 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12741 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12742 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12744 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12745 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12746 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12747 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12749 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12750 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12752 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12753 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12754 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12755 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12756 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12757 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12759 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12760 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12761 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12762 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12764 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12765 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12766 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12767 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12768 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12772 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12773 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12774 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12776 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12777 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12778 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12780 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12781 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12782 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12783 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12784 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12785 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12787 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12788 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12789 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12790 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12792 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12794 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12795 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12796 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12797 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12799 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12800 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12804 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12805 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12812 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12813 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12814 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12815 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12816 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12817 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12818 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12819 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12820 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12821 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12822 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12823 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12824 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12825 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12826 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12827 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12828 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12829 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12830 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12831 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12833 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12836 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12838 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12839 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12840 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12841 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12843 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12844 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12845 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12849 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12850 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12853 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12855 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12857 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12858 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12859 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12860 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12861 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12862 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12863 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12864 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12873 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12888 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12924 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12929 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12931 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12934 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12935 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12936 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12937 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12938 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12939 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12942 static const struct builtin_description bdesc_1arg[] =
12944 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12945 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12947 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12948 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12949 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12951 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12952 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12953 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12954 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12955 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12956 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12958 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12963 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12978 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12979 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12988 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12989 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12990 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12994 ix86_init_builtins (void)
12997 ix86_init_mmx_sse_builtins ();
13000 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13001 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13004 ix86_init_mmx_sse_builtins (void)
13006 const struct builtin_description * d;
13009 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13010 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13011 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13012 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
13013 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13014 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13015 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13016 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13017 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13018 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13020 tree pchar_type_node = build_pointer_type (char_type_node);
13021 tree pcchar_type_node = build_pointer_type (
13022 build_type_variant (char_type_node, 1, 0));
13023 tree pfloat_type_node = build_pointer_type (float_type_node);
13024 tree pcfloat_type_node = build_pointer_type (
13025 build_type_variant (float_type_node, 1, 0));
13026 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13027 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13028 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13031 tree int_ftype_v4sf_v4sf
13032 = build_function_type_list (integer_type_node,
13033 V4SF_type_node, V4SF_type_node, NULL_TREE);
13034 tree v4si_ftype_v4sf_v4sf
13035 = build_function_type_list (V4SI_type_node,
13036 V4SF_type_node, V4SF_type_node, NULL_TREE);
13037 /* MMX/SSE/integer conversions. */
13038 tree int_ftype_v4sf
13039 = build_function_type_list (integer_type_node,
13040 V4SF_type_node, NULL_TREE);
13041 tree int64_ftype_v4sf
13042 = build_function_type_list (long_long_integer_type_node,
13043 V4SF_type_node, NULL_TREE);
13044 tree int_ftype_v8qi
13045 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13046 tree v4sf_ftype_v4sf_int
13047 = build_function_type_list (V4SF_type_node,
13048 V4SF_type_node, integer_type_node, NULL_TREE);
13049 tree v4sf_ftype_v4sf_int64
13050 = build_function_type_list (V4SF_type_node,
13051 V4SF_type_node, long_long_integer_type_node,
13053 tree v4sf_ftype_v4sf_v2si
13054 = build_function_type_list (V4SF_type_node,
13055 V4SF_type_node, V2SI_type_node, NULL_TREE);
13056 tree int_ftype_v4hi_int
13057 = build_function_type_list (integer_type_node,
13058 V4HI_type_node, integer_type_node, NULL_TREE);
13059 tree v4hi_ftype_v4hi_int_int
13060 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13061 integer_type_node, integer_type_node,
13063 /* Miscellaneous. */
13064 tree v8qi_ftype_v4hi_v4hi
13065 = build_function_type_list (V8QI_type_node,
13066 V4HI_type_node, V4HI_type_node, NULL_TREE);
13067 tree v4hi_ftype_v2si_v2si
13068 = build_function_type_list (V4HI_type_node,
13069 V2SI_type_node, V2SI_type_node, NULL_TREE);
13070 tree v4sf_ftype_v4sf_v4sf_int
13071 = build_function_type_list (V4SF_type_node,
13072 V4SF_type_node, V4SF_type_node,
13073 integer_type_node, NULL_TREE);
13074 tree v2si_ftype_v4hi_v4hi
13075 = build_function_type_list (V2SI_type_node,
13076 V4HI_type_node, V4HI_type_node, NULL_TREE);
13077 tree v4hi_ftype_v4hi_int
13078 = build_function_type_list (V4HI_type_node,
13079 V4HI_type_node, integer_type_node, NULL_TREE);
13080 tree v4hi_ftype_v4hi_di
13081 = build_function_type_list (V4HI_type_node,
13082 V4HI_type_node, long_long_unsigned_type_node,
13084 tree v2si_ftype_v2si_di
13085 = build_function_type_list (V2SI_type_node,
13086 V2SI_type_node, long_long_unsigned_type_node,
13088 tree void_ftype_void
13089 = build_function_type (void_type_node, void_list_node);
13090 tree void_ftype_unsigned
13091 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13092 tree void_ftype_unsigned_unsigned
13093 = build_function_type_list (void_type_node, unsigned_type_node,
13094 unsigned_type_node, NULL_TREE);
13095 tree void_ftype_pcvoid_unsigned_unsigned
13096 = build_function_type_list (void_type_node, const_ptr_type_node,
13097 unsigned_type_node, unsigned_type_node,
13099 tree unsigned_ftype_void
13100 = build_function_type (unsigned_type_node, void_list_node);
13102 = build_function_type (long_long_unsigned_type_node, void_list_node);
13103 tree v4sf_ftype_void
13104 = build_function_type (V4SF_type_node, void_list_node);
13105 tree v2si_ftype_v4sf
13106 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13107 /* Loads/stores. */
13108 tree void_ftype_v8qi_v8qi_pchar
13109 = build_function_type_list (void_type_node,
13110 V8QI_type_node, V8QI_type_node,
13111 pchar_type_node, NULL_TREE);
13112 tree v4sf_ftype_pcfloat
13113 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13114 /* @@@ the type is bogus */
13115 tree v4sf_ftype_v4sf_pv2si
13116 = build_function_type_list (V4SF_type_node,
13117 V4SF_type_node, pv2si_type_node, NULL_TREE);
13118 tree void_ftype_pv2si_v4sf
13119 = build_function_type_list (void_type_node,
13120 pv2si_type_node, V4SF_type_node, NULL_TREE);
13121 tree void_ftype_pfloat_v4sf
13122 = build_function_type_list (void_type_node,
13123 pfloat_type_node, V4SF_type_node, NULL_TREE);
13124 tree void_ftype_pdi_di
13125 = build_function_type_list (void_type_node,
13126 pdi_type_node, long_long_unsigned_type_node,
13128 tree void_ftype_pv2di_v2di
13129 = build_function_type_list (void_type_node,
13130 pv2di_type_node, V2DI_type_node, NULL_TREE);
13131 /* Normal vector unops. */
13132 tree v4sf_ftype_v4sf
13133 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13135 /* Normal vector binops. */
13136 tree v4sf_ftype_v4sf_v4sf
13137 = build_function_type_list (V4SF_type_node,
13138 V4SF_type_node, V4SF_type_node, NULL_TREE);
13139 tree v8qi_ftype_v8qi_v8qi
13140 = build_function_type_list (V8QI_type_node,
13141 V8QI_type_node, V8QI_type_node, NULL_TREE);
13142 tree v4hi_ftype_v4hi_v4hi
13143 = build_function_type_list (V4HI_type_node,
13144 V4HI_type_node, V4HI_type_node, NULL_TREE);
13145 tree v2si_ftype_v2si_v2si
13146 = build_function_type_list (V2SI_type_node,
13147 V2SI_type_node, V2SI_type_node, NULL_TREE);
13148 tree di_ftype_di_di
13149 = build_function_type_list (long_long_unsigned_type_node,
13150 long_long_unsigned_type_node,
13151 long_long_unsigned_type_node, NULL_TREE);
13153 tree v2si_ftype_v2sf
13154 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13155 tree v2sf_ftype_v2si
13156 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13157 tree v2si_ftype_v2si
13158 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13159 tree v2sf_ftype_v2sf
13160 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13161 tree v2sf_ftype_v2sf_v2sf
13162 = build_function_type_list (V2SF_type_node,
13163 V2SF_type_node, V2SF_type_node, NULL_TREE);
13164 tree v2si_ftype_v2sf_v2sf
13165 = build_function_type_list (V2SI_type_node,
13166 V2SF_type_node, V2SF_type_node, NULL_TREE);
13167 tree pint_type_node = build_pointer_type (integer_type_node);
13168 tree pcint_type_node = build_pointer_type (
13169 build_type_variant (integer_type_node, 1, 0));
13170 tree pdouble_type_node = build_pointer_type (double_type_node);
13171 tree pcdouble_type_node = build_pointer_type (
13172 build_type_variant (double_type_node, 1, 0));
13173 tree int_ftype_v2df_v2df
13174 = build_function_type_list (integer_type_node,
13175 V2DF_type_node, V2DF_type_node, NULL_TREE);
13178 = build_function_type (intTI_type_node, void_list_node);
13179 tree v2di_ftype_void
13180 = build_function_type (V2DI_type_node, void_list_node);
13181 tree ti_ftype_ti_ti
13182 = build_function_type_list (intTI_type_node,
13183 intTI_type_node, intTI_type_node, NULL_TREE);
13184 tree void_ftype_pcvoid
13185 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13187 = build_function_type_list (V2DI_type_node,
13188 long_long_unsigned_type_node, NULL_TREE);
13190 = build_function_type_list (long_long_unsigned_type_node,
13191 V2DI_type_node, NULL_TREE);
13192 tree v4sf_ftype_v4si
13193 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13194 tree v4si_ftype_v4sf
13195 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13196 tree v2df_ftype_v4si
13197 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13198 tree v4si_ftype_v2df
13199 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13200 tree v2si_ftype_v2df
13201 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13202 tree v4sf_ftype_v2df
13203 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13204 tree v2df_ftype_v2si
13205 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13206 tree v2df_ftype_v4sf
13207 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13208 tree int_ftype_v2df
13209 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13210 tree int64_ftype_v2df
13211 = build_function_type_list (long_long_integer_type_node,
13212 V2DF_type_node, NULL_TREE);
13213 tree v2df_ftype_v2df_int
13214 = build_function_type_list (V2DF_type_node,
13215 V2DF_type_node, integer_type_node, NULL_TREE);
13216 tree v2df_ftype_v2df_int64
13217 = build_function_type_list (V2DF_type_node,
13218 V2DF_type_node, long_long_integer_type_node,
13220 tree v4sf_ftype_v4sf_v2df
13221 = build_function_type_list (V4SF_type_node,
13222 V4SF_type_node, V2DF_type_node, NULL_TREE);
13223 tree v2df_ftype_v2df_v4sf
13224 = build_function_type_list (V2DF_type_node,
13225 V2DF_type_node, V4SF_type_node, NULL_TREE);
13226 tree v2df_ftype_v2df_v2df_int
13227 = build_function_type_list (V2DF_type_node,
13228 V2DF_type_node, V2DF_type_node,
13231 tree v2df_ftype_v2df_pv2si
13232 = build_function_type_list (V2DF_type_node,
13233 V2DF_type_node, pv2si_type_node, NULL_TREE);
13234 tree void_ftype_pv2si_v2df
13235 = build_function_type_list (void_type_node,
13236 pv2si_type_node, V2DF_type_node, NULL_TREE);
13237 tree void_ftype_pdouble_v2df
13238 = build_function_type_list (void_type_node,
13239 pdouble_type_node, V2DF_type_node, NULL_TREE);
13240 tree void_ftype_pint_int
13241 = build_function_type_list (void_type_node,
13242 pint_type_node, integer_type_node, NULL_TREE);
13243 tree void_ftype_v16qi_v16qi_pchar
13244 = build_function_type_list (void_type_node,
13245 V16QI_type_node, V16QI_type_node,
13246 pchar_type_node, NULL_TREE);
13247 tree v2df_ftype_pcdouble
13248 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13249 tree v2df_ftype_v2df_v2df
13250 = build_function_type_list (V2DF_type_node,
13251 V2DF_type_node, V2DF_type_node, NULL_TREE);
13252 tree v16qi_ftype_v16qi_v16qi
13253 = build_function_type_list (V16QI_type_node,
13254 V16QI_type_node, V16QI_type_node, NULL_TREE);
13255 tree v8hi_ftype_v8hi_v8hi
13256 = build_function_type_list (V8HI_type_node,
13257 V8HI_type_node, V8HI_type_node, NULL_TREE);
13258 tree v4si_ftype_v4si_v4si
13259 = build_function_type_list (V4SI_type_node,
13260 V4SI_type_node, V4SI_type_node, NULL_TREE);
13261 tree v2di_ftype_v2di_v2di
13262 = build_function_type_list (V2DI_type_node,
13263 V2DI_type_node, V2DI_type_node, NULL_TREE);
13264 tree v2di_ftype_v2df_v2df
13265 = build_function_type_list (V2DI_type_node,
13266 V2DF_type_node, V2DF_type_node, NULL_TREE);
13267 tree v2df_ftype_v2df
13268 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13269 tree v2df_ftype_double
13270 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13271 tree v2df_ftype_double_double
13272 = build_function_type_list (V2DF_type_node,
13273 double_type_node, double_type_node, NULL_TREE);
13274 tree int_ftype_v8hi_int
13275 = build_function_type_list (integer_type_node,
13276 V8HI_type_node, integer_type_node, NULL_TREE);
13277 tree v8hi_ftype_v8hi_int_int
13278 = build_function_type_list (V8HI_type_node,
13279 V8HI_type_node, integer_type_node,
13280 integer_type_node, NULL_TREE);
13281 tree v2di_ftype_v2di_int
13282 = build_function_type_list (V2DI_type_node,
13283 V2DI_type_node, integer_type_node, NULL_TREE);
13284 tree v4si_ftype_v4si_int
13285 = build_function_type_list (V4SI_type_node,
13286 V4SI_type_node, integer_type_node, NULL_TREE);
13287 tree v8hi_ftype_v8hi_int
13288 = build_function_type_list (V8HI_type_node,
13289 V8HI_type_node, integer_type_node, NULL_TREE);
13290 tree v8hi_ftype_v8hi_v2di
13291 = build_function_type_list (V8HI_type_node,
13292 V8HI_type_node, V2DI_type_node, NULL_TREE);
13293 tree v4si_ftype_v4si_v2di
13294 = build_function_type_list (V4SI_type_node,
13295 V4SI_type_node, V2DI_type_node, NULL_TREE);
13296 tree v4si_ftype_v8hi_v8hi
13297 = build_function_type_list (V4SI_type_node,
13298 V8HI_type_node, V8HI_type_node, NULL_TREE);
13299 tree di_ftype_v8qi_v8qi
13300 = build_function_type_list (long_long_unsigned_type_node,
13301 V8QI_type_node, V8QI_type_node, NULL_TREE);
13302 tree v2di_ftype_v16qi_v16qi
13303 = build_function_type_list (V2DI_type_node,
13304 V16QI_type_node, V16QI_type_node, NULL_TREE);
13305 tree int_ftype_v16qi
13306 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13307 tree v16qi_ftype_pcchar
13308 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13309 tree void_ftype_pchar_v16qi
13310 = build_function_type_list (void_type_node,
13311 pchar_type_node, V16QI_type_node, NULL_TREE);
13312 tree v4si_ftype_pcint
13313 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13314 tree void_ftype_pcint_v4si
13315 = build_function_type_list (void_type_node,
13316 pcint_type_node, V4SI_type_node, NULL_TREE);
13317 tree v2di_ftype_v2di
13318 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13321 tree float128_type;
13323 /* The __float80 type. */
13324 if (TYPE_MODE (long_double_type_node) == XFmode)
13325 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13329 /* The __float80 type. */
13330 float80_type = make_node (REAL_TYPE);
13331 TYPE_PRECISION (float80_type) = 96;
13332 layout_type (float80_type);
13333 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13336 float128_type = make_node (REAL_TYPE);
13337 TYPE_PRECISION (float128_type) = 128;
13338 layout_type (float128_type);
13339 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13341 /* Add all builtins that are more or less simple operations on two
13343 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13345 /* Use one of the operands; the target can have a different mode for
13346 mask-generating compares. */
13347 enum machine_mode mode;
13352 mode = insn_data[d->icode].operand[1].mode;
13357 type = v16qi_ftype_v16qi_v16qi;
13360 type = v8hi_ftype_v8hi_v8hi;
13363 type = v4si_ftype_v4si_v4si;
13366 type = v2di_ftype_v2di_v2di;
13369 type = v2df_ftype_v2df_v2df;
13372 type = ti_ftype_ti_ti;
13375 type = v4sf_ftype_v4sf_v4sf;
13378 type = v8qi_ftype_v8qi_v8qi;
13381 type = v4hi_ftype_v4hi_v4hi;
13384 type = v2si_ftype_v2si_v2si;
13387 type = di_ftype_di_di;
13394 /* Override for comparisons. */
13395 if (d->icode == CODE_FOR_maskcmpv4sf3
13396 || d->icode == CODE_FOR_maskncmpv4sf3
13397 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13398 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13399 type = v4si_ftype_v4sf_v4sf;
13401 if (d->icode == CODE_FOR_maskcmpv2df3
13402 || d->icode == CODE_FOR_maskncmpv2df3
13403 || d->icode == CODE_FOR_vmmaskcmpv2df3
13404 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13405 type = v2di_ftype_v2df_v2df;
13407 def_builtin (d->mask, d->name, type, d->code);
13410 /* Add the remaining MMX insns with somewhat more complicated types. */
13411 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13412 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13413 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13414 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13415 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13417 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13418 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13419 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13421 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13422 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13424 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13425 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13427 /* comi/ucomi insns. */
13428 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13429 if (d->mask == MASK_SSE2)
13430 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13432 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13434 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13435 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13436 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13438 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13439 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13440 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13441 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13442 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13443 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13444 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13445 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13446 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13447 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13448 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13450 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13451 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13453 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13455 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13456 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13457 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13458 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13459 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13460 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13462 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13463 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13464 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13465 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13467 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13468 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13469 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13470 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13472 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13474 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13476 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13477 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13478 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13479 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13480 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13481 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13483 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13485 /* Original 3DNow! */
13486 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13487 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13488 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13489 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13490 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13491 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13492 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13493 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13494 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13495 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13496 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13497 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13498 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13499 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13500 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13503 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13504 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13505 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13507 /* 3DNow! extension as used in the Athlon CPU. */
13508 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13509 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13510 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13511 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13512 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13513 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13515 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13521 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13522 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13525 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13526 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13527 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13529 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13535 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13546 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13551 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13557 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13562 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13566 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13567 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13574 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13598 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13614 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13626 /* Prescott New Instructions. */
13627 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13628 void_ftype_pcvoid_unsigned_unsigned,
13629 IX86_BUILTIN_MONITOR);
13630 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13631 void_ftype_unsigned_unsigned,
13632 IX86_BUILTIN_MWAIT);
13633 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13635 IX86_BUILTIN_MOVSHDUP);
13636 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13638 IX86_BUILTIN_MOVSLDUP);
13639 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13640 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13641 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13642 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13643 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13644 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13647 /* Errors in the source file can cause expand_expr to return const0_rtx
13648 where we expect a vector. To avoid crashing, use one of the vector
13649 clear instructions. */
13651 safe_vector_operand (rtx x, enum machine_mode mode)
13653 if (x != const0_rtx)
13655 x = gen_reg_rtx (mode);
13657 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13658 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13659 : gen_rtx_SUBREG (DImode, x, 0)));
13661 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13662 : gen_rtx_SUBREG (V4SFmode, x, 0),
13663 CONST0_RTX (V4SFmode)));
13667 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13670 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13673 tree arg0 = TREE_VALUE (arglist);
13674 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13675 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13676 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13677 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13678 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13679 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13681 if (VECTOR_MODE_P (mode0))
13682 op0 = safe_vector_operand (op0, mode0);
13683 if (VECTOR_MODE_P (mode1))
13684 op1 = safe_vector_operand (op1, mode1);
13687 || GET_MODE (target) != tmode
13688 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13689 target = gen_reg_rtx (tmode);
13691 if (GET_MODE (op1) == SImode && mode1 == TImode)
13693 rtx x = gen_reg_rtx (V4SImode);
13694 emit_insn (gen_sse2_loadd (x, op1));
13695 op1 = gen_lowpart (TImode, x);
13698 /* In case the insn wants input operands in modes different from
13699 the result, abort. */
13700 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13701 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13704 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13705 op0 = copy_to_mode_reg (mode0, op0);
13706 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13707 op1 = copy_to_mode_reg (mode1, op1);
13709 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13710 yet one of the two must not be a memory. This is normally enforced
13711 by expanders, but we didn't bother to create one here. */
13712 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13713 op0 = copy_to_mode_reg (mode0, op0);
13715 pat = GEN_FCN (icode) (target, op0, op1);
13722 /* Subroutine of ix86_expand_builtin to take care of stores. */
13725 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13728 tree arg0 = TREE_VALUE (arglist);
13729 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13730 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13731 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13732 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13733 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13735 if (VECTOR_MODE_P (mode1))
13736 op1 = safe_vector_operand (op1, mode1);
13738 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13739 op1 = copy_to_mode_reg (mode1, op1);
13741 pat = GEN_FCN (icode) (op0, op1);
13747 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13750 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13751 rtx target, int do_load)
13754 tree arg0 = TREE_VALUE (arglist);
13755 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13756 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13757 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13760 || GET_MODE (target) != tmode
13761 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13762 target = gen_reg_rtx (tmode);
13764 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13767 if (VECTOR_MODE_P (mode0))
13768 op0 = safe_vector_operand (op0, mode0);
13770 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13771 op0 = copy_to_mode_reg (mode0, op0);
13774 pat = GEN_FCN (icode) (target, op0);
13781 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13782 sqrtss, rsqrtss, rcpss. */
13785 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13788 tree arg0 = TREE_VALUE (arglist);
13789 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13790 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13791 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13794 || GET_MODE (target) != tmode
13795 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13796 target = gen_reg_rtx (tmode);
13798 if (VECTOR_MODE_P (mode0))
13799 op0 = safe_vector_operand (op0, mode0);
13801 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13802 op0 = copy_to_mode_reg (mode0, op0);
13805 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13806 op1 = copy_to_mode_reg (mode0, op1);
13808 pat = GEN_FCN (icode) (target, op0, op1);
13815 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13818 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13822 tree arg0 = TREE_VALUE (arglist);
13823 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13824 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13825 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13827 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13828 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13829 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13830 enum rtx_code comparison = d->comparison;
13832 if (VECTOR_MODE_P (mode0))
13833 op0 = safe_vector_operand (op0, mode0);
13834 if (VECTOR_MODE_P (mode1))
13835 op1 = safe_vector_operand (op1, mode1);
13837 /* Swap operands if we have a comparison that isn't available in
13841 rtx tmp = gen_reg_rtx (mode1);
13842 emit_move_insn (tmp, op1);
13848 || GET_MODE (target) != tmode
13849 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13850 target = gen_reg_rtx (tmode);
13852 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13853 op0 = copy_to_mode_reg (mode0, op0);
13854 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13855 op1 = copy_to_mode_reg (mode1, op1);
13857 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13858 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13865 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13868 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13872 tree arg0 = TREE_VALUE (arglist);
13873 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13874 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13875 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13877 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13878 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13879 enum rtx_code comparison = d->comparison;
13881 if (VECTOR_MODE_P (mode0))
13882 op0 = safe_vector_operand (op0, mode0);
13883 if (VECTOR_MODE_P (mode1))
13884 op1 = safe_vector_operand (op1, mode1);
13886 /* Swap operands if we have a comparison that isn't available in
13895 target = gen_reg_rtx (SImode);
13896 emit_move_insn (target, const0_rtx);
13897 target = gen_rtx_SUBREG (QImode, target, 0);
13899 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13900 op0 = copy_to_mode_reg (mode0, op0);
13901 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13902 op1 = copy_to_mode_reg (mode1, op1);
13904 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13905 pat = GEN_FCN (d->icode) (op0, op1);
13909 emit_insn (gen_rtx_SET (VOIDmode,
13910 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13911 gen_rtx_fmt_ee (comparison, QImode,
13915 return SUBREG_REG (target);
13918 /* Expand an expression EXP that calls a built-in function,
13919 with result going to TARGET if that's convenient
13920 (and in mode MODE if that's convenient).
13921 SUBTARGET may be used as the target for computing one of EXP's operands.
13922 IGNORE is nonzero if the value is to be ignored. */
13925 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13926 enum machine_mode mode ATTRIBUTE_UNUSED,
13927 int ignore ATTRIBUTE_UNUSED)
13929 const struct builtin_description *d;
13931 enum insn_code icode;
13932 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13933 tree arglist = TREE_OPERAND (exp, 1);
13934 tree arg0, arg1, arg2;
13935 rtx op0, op1, op2, pat;
13936 enum machine_mode tmode, mode0, mode1, mode2;
13937 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13941 case IX86_BUILTIN_EMMS:
13942 emit_insn (gen_emms ());
13945 case IX86_BUILTIN_SFENCE:
13946 emit_insn (gen_sfence ());
13949 case IX86_BUILTIN_PEXTRW:
13950 case IX86_BUILTIN_PEXTRW128:
13951 icode = (fcode == IX86_BUILTIN_PEXTRW
13952 ? CODE_FOR_mmx_pextrw
13953 : CODE_FOR_sse2_pextrw);
13954 arg0 = TREE_VALUE (arglist);
13955 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13956 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13957 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13958 tmode = insn_data[icode].operand[0].mode;
13959 mode0 = insn_data[icode].operand[1].mode;
13960 mode1 = insn_data[icode].operand[2].mode;
13962 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13963 op0 = copy_to_mode_reg (mode0, op0);
13964 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13966 error ("selector must be an integer constant in the range 0..%i",
13967 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13968 return gen_reg_rtx (tmode);
13971 || GET_MODE (target) != tmode
13972 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13973 target = gen_reg_rtx (tmode);
13974 pat = GEN_FCN (icode) (target, op0, op1);
13980 case IX86_BUILTIN_PINSRW:
13981 case IX86_BUILTIN_PINSRW128:
13982 icode = (fcode == IX86_BUILTIN_PINSRW
13983 ? CODE_FOR_mmx_pinsrw
13984 : CODE_FOR_sse2_pinsrw);
13985 arg0 = TREE_VALUE (arglist);
13986 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13987 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13988 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13989 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13990 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13991 tmode = insn_data[icode].operand[0].mode;
13992 mode0 = insn_data[icode].operand[1].mode;
13993 mode1 = insn_data[icode].operand[2].mode;
13994 mode2 = insn_data[icode].operand[3].mode;
13996 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13997 op0 = copy_to_mode_reg (mode0, op0);
13998 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13999 op1 = copy_to_mode_reg (mode1, op1);
14000 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14002 error ("selector must be an integer constant in the range 0..%i",
14003 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14007 || GET_MODE (target) != tmode
14008 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14009 target = gen_reg_rtx (tmode);
14010 pat = GEN_FCN (icode) (target, op0, op1, op2);
14016 case IX86_BUILTIN_MASKMOVQ:
14017 case IX86_BUILTIN_MASKMOVDQU:
14018 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14019 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14020 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14021 : CODE_FOR_sse2_maskmovdqu));
14022 /* Note the arg order is different from the operand order. */
14023 arg1 = TREE_VALUE (arglist);
14024 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14025 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14026 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14027 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14028 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14029 mode0 = insn_data[icode].operand[0].mode;
14030 mode1 = insn_data[icode].operand[1].mode;
14031 mode2 = insn_data[icode].operand[2].mode;
14033 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14034 op0 = copy_to_mode_reg (mode0, op0);
14035 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14036 op1 = copy_to_mode_reg (mode1, op1);
14037 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14038 op2 = copy_to_mode_reg (mode2, op2);
14039 pat = GEN_FCN (icode) (op0, op1, op2);
14045 case IX86_BUILTIN_SQRTSS:
14046 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14047 case IX86_BUILTIN_RSQRTSS:
14048 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14049 case IX86_BUILTIN_RCPSS:
14050 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14052 case IX86_BUILTIN_LOADAPS:
14053 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14055 case IX86_BUILTIN_LOADUPS:
14056 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14058 case IX86_BUILTIN_STOREAPS:
14059 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14061 case IX86_BUILTIN_STOREUPS:
14062 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14064 case IX86_BUILTIN_LOADSS:
14065 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14067 case IX86_BUILTIN_STORESS:
14068 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14070 case IX86_BUILTIN_LOADHPS:
14071 case IX86_BUILTIN_LOADLPS:
14072 case IX86_BUILTIN_LOADHPD:
14073 case IX86_BUILTIN_LOADLPD:
14074 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14075 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14076 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14077 : CODE_FOR_sse2_movsd);
14078 arg0 = TREE_VALUE (arglist);
14079 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14080 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14081 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14082 tmode = insn_data[icode].operand[0].mode;
14083 mode0 = insn_data[icode].operand[1].mode;
14084 mode1 = insn_data[icode].operand[2].mode;
14086 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14087 op0 = copy_to_mode_reg (mode0, op0);
14088 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14090 || GET_MODE (target) != tmode
14091 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14092 target = gen_reg_rtx (tmode);
14093 pat = GEN_FCN (icode) (target, op0, op1);
14099 case IX86_BUILTIN_STOREHPS:
14100 case IX86_BUILTIN_STORELPS:
14101 case IX86_BUILTIN_STOREHPD:
14102 case IX86_BUILTIN_STORELPD:
14103 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14104 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14105 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14106 : CODE_FOR_sse2_movsd);
14107 arg0 = TREE_VALUE (arglist);
14108 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14109 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14110 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14111 mode0 = insn_data[icode].operand[1].mode;
14112 mode1 = insn_data[icode].operand[2].mode;
14114 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14115 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14116 op1 = copy_to_mode_reg (mode1, op1);
14118 pat = GEN_FCN (icode) (op0, op0, op1);
14124 case IX86_BUILTIN_MOVNTPS:
14125 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14126 case IX86_BUILTIN_MOVNTQ:
14127 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14129 case IX86_BUILTIN_LDMXCSR:
14130 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14131 target = assign_386_stack_local (SImode, 0);
14132 emit_move_insn (target, op0);
14133 emit_insn (gen_ldmxcsr (target));
14136 case IX86_BUILTIN_STMXCSR:
14137 target = assign_386_stack_local (SImode, 0);
14138 emit_insn (gen_stmxcsr (target));
14139 return copy_to_mode_reg (SImode, target);
14141 case IX86_BUILTIN_SHUFPS:
14142 case IX86_BUILTIN_SHUFPD:
14143 icode = (fcode == IX86_BUILTIN_SHUFPS
14144 ? CODE_FOR_sse_shufps
14145 : CODE_FOR_sse2_shufpd);
14146 arg0 = TREE_VALUE (arglist);
14147 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14148 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14149 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14150 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14151 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14152 tmode = insn_data[icode].operand[0].mode;
14153 mode0 = insn_data[icode].operand[1].mode;
14154 mode1 = insn_data[icode].operand[2].mode;
14155 mode2 = insn_data[icode].operand[3].mode;
14157 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14158 op0 = copy_to_mode_reg (mode0, op0);
14159 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14160 op1 = copy_to_mode_reg (mode1, op1);
14161 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14163 /* @@@ better error message */
14164 error ("mask must be an immediate");
14165 return gen_reg_rtx (tmode);
14168 || GET_MODE (target) != tmode
14169 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14170 target = gen_reg_rtx (tmode);
14171 pat = GEN_FCN (icode) (target, op0, op1, op2);
14177 case IX86_BUILTIN_PSHUFW:
14178 case IX86_BUILTIN_PSHUFD:
14179 case IX86_BUILTIN_PSHUFHW:
14180 case IX86_BUILTIN_PSHUFLW:
14181 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14182 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14183 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14184 : CODE_FOR_mmx_pshufw);
14185 arg0 = TREE_VALUE (arglist);
14186 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14187 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14188 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14189 tmode = insn_data[icode].operand[0].mode;
14190 mode1 = insn_data[icode].operand[1].mode;
14191 mode2 = insn_data[icode].operand[2].mode;
14193 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14194 op0 = copy_to_mode_reg (mode1, op0);
14195 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14197 /* @@@ better error message */
14198 error ("mask must be an immediate");
14202 || GET_MODE (target) != tmode
14203 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14204 target = gen_reg_rtx (tmode);
14205 pat = GEN_FCN (icode) (target, op0, op1);
14211 case IX86_BUILTIN_PSLLDQI128:
14212 case IX86_BUILTIN_PSRLDQI128:
14213 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14214 : CODE_FOR_sse2_lshrti3);
14215 arg0 = TREE_VALUE (arglist);
14216 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14217 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14218 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14219 tmode = insn_data[icode].operand[0].mode;
14220 mode1 = insn_data[icode].operand[1].mode;
14221 mode2 = insn_data[icode].operand[2].mode;
14223 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14225 op0 = copy_to_reg (op0);
14226 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14228 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14230 error ("shift must be an immediate");
14233 target = gen_reg_rtx (V2DImode);
14234 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14240 case IX86_BUILTIN_FEMMS:
14241 emit_insn (gen_femms ());
14244 case IX86_BUILTIN_PAVGUSB:
14245 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14247 case IX86_BUILTIN_PF2ID:
14248 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14250 case IX86_BUILTIN_PFACC:
14251 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14253 case IX86_BUILTIN_PFADD:
14254 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14256 case IX86_BUILTIN_PFCMPEQ:
14257 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14259 case IX86_BUILTIN_PFCMPGE:
14260 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14262 case IX86_BUILTIN_PFCMPGT:
14263 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14265 case IX86_BUILTIN_PFMAX:
14266 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14268 case IX86_BUILTIN_PFMIN:
14269 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14271 case IX86_BUILTIN_PFMUL:
14272 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14274 case IX86_BUILTIN_PFRCP:
14275 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14277 case IX86_BUILTIN_PFRCPIT1:
14278 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14280 case IX86_BUILTIN_PFRCPIT2:
14281 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14283 case IX86_BUILTIN_PFRSQIT1:
14284 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14286 case IX86_BUILTIN_PFRSQRT:
14287 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14289 case IX86_BUILTIN_PFSUB:
14290 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14292 case IX86_BUILTIN_PFSUBR:
14293 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14295 case IX86_BUILTIN_PI2FD:
14296 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14298 case IX86_BUILTIN_PMULHRW:
14299 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14301 case IX86_BUILTIN_PF2IW:
14302 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14304 case IX86_BUILTIN_PFNACC:
14305 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14307 case IX86_BUILTIN_PFPNACC:
14308 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14310 case IX86_BUILTIN_PI2FW:
14311 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14313 case IX86_BUILTIN_PSWAPDSI:
14314 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14316 case IX86_BUILTIN_PSWAPDSF:
14317 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14319 case IX86_BUILTIN_SSE_ZERO:
14320 target = gen_reg_rtx (V4SFmode);
14321 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14324 case IX86_BUILTIN_MMX_ZERO:
14325 target = gen_reg_rtx (DImode);
14326 emit_insn (gen_mmx_clrdi (target));
14329 case IX86_BUILTIN_CLRTI:
14330 target = gen_reg_rtx (V2DImode);
14331 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14335 case IX86_BUILTIN_SQRTSD:
14336 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14337 case IX86_BUILTIN_LOADAPD:
14338 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14339 case IX86_BUILTIN_LOADUPD:
14340 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14342 case IX86_BUILTIN_STOREAPD:
14343 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14344 case IX86_BUILTIN_STOREUPD:
14345 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14347 case IX86_BUILTIN_LOADSD:
14348 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14350 case IX86_BUILTIN_STORESD:
14351 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14353 case IX86_BUILTIN_SETPD1:
14354 target = assign_386_stack_local (DFmode, 0);
14355 arg0 = TREE_VALUE (arglist);
14356 emit_move_insn (adjust_address (target, DFmode, 0),
14357 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14358 op0 = gen_reg_rtx (V2DFmode);
14359 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14360 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14363 case IX86_BUILTIN_SETPD:
14364 target = assign_386_stack_local (V2DFmode, 0);
14365 arg0 = TREE_VALUE (arglist);
14366 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14367 emit_move_insn (adjust_address (target, DFmode, 0),
14368 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14369 emit_move_insn (adjust_address (target, DFmode, 8),
14370 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14371 op0 = gen_reg_rtx (V2DFmode);
14372 emit_insn (gen_sse2_movapd (op0, target));
14375 case IX86_BUILTIN_LOADRPD:
14376 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14377 gen_reg_rtx (V2DFmode), 1);
14378 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14381 case IX86_BUILTIN_LOADPD1:
14382 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14383 gen_reg_rtx (V2DFmode), 1);
14384 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14387 case IX86_BUILTIN_STOREPD1:
14388 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14389 case IX86_BUILTIN_STORERPD:
14390 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14392 case IX86_BUILTIN_CLRPD:
14393 target = gen_reg_rtx (V2DFmode);
14394 emit_insn (gen_sse_clrv2df (target));
14397 case IX86_BUILTIN_MFENCE:
14398 emit_insn (gen_sse2_mfence ());
14400 case IX86_BUILTIN_LFENCE:
14401 emit_insn (gen_sse2_lfence ());
14404 case IX86_BUILTIN_CLFLUSH:
14405 arg0 = TREE_VALUE (arglist);
14406 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14407 icode = CODE_FOR_sse2_clflush;
14408 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14409 op0 = copy_to_mode_reg (Pmode, op0);
14411 emit_insn (gen_sse2_clflush (op0));
14414 case IX86_BUILTIN_MOVNTPD:
14415 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14416 case IX86_BUILTIN_MOVNTDQ:
14417 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14418 case IX86_BUILTIN_MOVNTI:
14419 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14421 case IX86_BUILTIN_LOADDQA:
14422 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14423 case IX86_BUILTIN_LOADDQU:
14424 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14425 case IX86_BUILTIN_LOADD:
14426 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14428 case IX86_BUILTIN_STOREDQA:
14429 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14430 case IX86_BUILTIN_STOREDQU:
14431 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14432 case IX86_BUILTIN_STORED:
14433 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14435 case IX86_BUILTIN_MONITOR:
14436 arg0 = TREE_VALUE (arglist);
14437 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14438 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14439 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14440 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14441 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14443 op0 = copy_to_mode_reg (SImode, op0);
14445 op1 = copy_to_mode_reg (SImode, op1);
14447 op2 = copy_to_mode_reg (SImode, op2);
14448 emit_insn (gen_monitor (op0, op1, op2));
14451 case IX86_BUILTIN_MWAIT:
14452 arg0 = TREE_VALUE (arglist);
14453 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14454 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14455 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14457 op0 = copy_to_mode_reg (SImode, op0);
14459 op1 = copy_to_mode_reg (SImode, op1);
14460 emit_insn (gen_mwait (op0, op1));
14463 case IX86_BUILTIN_LOADDDUP:
14464 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14466 case IX86_BUILTIN_LDDQU:
14467 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14474 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14475 if (d->code == fcode)
14477 /* Compares are treated specially. */
14478 if (d->icode == CODE_FOR_maskcmpv4sf3
14479 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14480 || d->icode == CODE_FOR_maskncmpv4sf3
14481 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14482 || d->icode == CODE_FOR_maskcmpv2df3
14483 || d->icode == CODE_FOR_vmmaskcmpv2df3
14484 || d->icode == CODE_FOR_maskncmpv2df3
14485 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14486 return ix86_expand_sse_compare (d, arglist, target);
14488 return ix86_expand_binop_builtin (d->icode, arglist, target);
14491 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14492 if (d->code == fcode)
14493 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14495 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14496 if (d->code == fcode)
14497 return ix86_expand_sse_comi (d, arglist, target);
14499 /* @@@ Should really do something sensible here. */
14503 /* Store OPERAND to the memory after reload is completed. This means
14504 that we can't easily use assign_stack_local. */
14506 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14509 if (!reload_completed)
14511 if (TARGET_RED_ZONE)
14513 result = gen_rtx_MEM (mode,
14514 gen_rtx_PLUS (Pmode,
14516 GEN_INT (-RED_ZONE_SIZE)));
14517 emit_move_insn (result, operand);
14519 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14525 operand = gen_lowpart (DImode, operand);
14529 gen_rtx_SET (VOIDmode,
14530 gen_rtx_MEM (DImode,
14531 gen_rtx_PRE_DEC (DImode,
14532 stack_pointer_rtx)),
14538 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14547 split_di (&operand, 1, operands, operands + 1);
14549 gen_rtx_SET (VOIDmode,
14550 gen_rtx_MEM (SImode,
14551 gen_rtx_PRE_DEC (Pmode,
14552 stack_pointer_rtx)),
14555 gen_rtx_SET (VOIDmode,
14556 gen_rtx_MEM (SImode,
14557 gen_rtx_PRE_DEC (Pmode,
14558 stack_pointer_rtx)),
14563 /* It is better to store HImodes as SImodes. */
14564 if (!TARGET_PARTIAL_REG_STALL)
14565 operand = gen_lowpart (SImode, operand);
14569 gen_rtx_SET (VOIDmode,
14570 gen_rtx_MEM (GET_MODE (operand),
14571 gen_rtx_PRE_DEC (SImode,
14572 stack_pointer_rtx)),
14578 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14583 /* Free operand from the memory. */
14585 ix86_free_from_memory (enum machine_mode mode)
14587 if (!TARGET_RED_ZONE)
14591 if (mode == DImode || TARGET_64BIT)
14593 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14597 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14598 to pop or add instruction if registers are available. */
14599 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14600 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14605 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14606 QImode must go into class Q_REGS.
14607 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14608 movdf to do mem-to-mem moves through integer regs. */
14610 ix86_preferred_reload_class (rtx x, enum reg_class class)
14612 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14614 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14616 /* SSE can't load any constant directly yet. */
14617 if (SSE_CLASS_P (class))
14619 /* Floats can load 0 and 1. */
14620 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14622 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14623 if (MAYBE_SSE_CLASS_P (class))
14624 return (reg_class_subset_p (class, GENERAL_REGS)
14625 ? GENERAL_REGS : FLOAT_REGS);
14629 /* General regs can load everything. */
14630 if (reg_class_subset_p (class, GENERAL_REGS))
14631 return GENERAL_REGS;
14632 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14633 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14636 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14638 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14643 /* If we are copying between general and FP registers, we need a memory
14644 location. The same is true for SSE and MMX registers.
14646 The macro can't work reliably when one of the CLASSES is class containing
14647 registers from multiple units (SSE, MMX, integer). We avoid this by never
14648 combining those units in single alternative in the machine description.
14649 Ensure that this constraint holds to avoid unexpected surprises.
14651 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14652 enforce these sanity checks. */
14654 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14655 enum machine_mode mode, int strict)
14657 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14658 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14659 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14660 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14661 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14662 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14669 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14670 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14671 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14672 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14673 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14675 /* Return the cost of moving data from a register in class CLASS1 to
14676 one in class CLASS2.
14678 It is not required that the cost always equal 2 when FROM is the same as TO;
14679 on some machines it is expensive to move between registers if they are not
14680 general registers. */
14682 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14683 enum reg_class class2)
14685 /* In case we require secondary memory, compute cost of the store followed
14686 by load. In order to avoid bad register allocation choices, we need
14687 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14689 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14693 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14694 MEMORY_MOVE_COST (mode, class1, 1));
14695 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14696 MEMORY_MOVE_COST (mode, class2, 1));
14698 /* In case of copying from general_purpose_register we may emit multiple
14699 stores followed by single load causing memory size mismatch stall.
14700 Count this as arbitrarily high cost of 20. */
14701 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14704 /* In the case of FP/MMX moves, the registers actually overlap, and we
14705 have to switch modes in order to treat them differently. */
14706 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14707 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14713 /* Moves between SSE/MMX and integer unit are expensive. */
14714 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14715 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14716 return ix86_cost->mmxsse_to_integer;
14717 if (MAYBE_FLOAT_CLASS_P (class1))
14718 return ix86_cost->fp_move;
14719 if (MAYBE_SSE_CLASS_P (class1))
14720 return ix86_cost->sse_move;
14721 if (MAYBE_MMX_CLASS_P (class1))
14722 return ix86_cost->mmx_move;
14726 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14728 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14730 /* Flags and only flags can only hold CCmode values. */
14731 if (CC_REGNO_P (regno))
14732 return GET_MODE_CLASS (mode) == MODE_CC;
14733 if (GET_MODE_CLASS (mode) == MODE_CC
14734 || GET_MODE_CLASS (mode) == MODE_RANDOM
14735 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14737 if (FP_REGNO_P (regno))
14738 return VALID_FP_MODE_P (mode);
14739 if (SSE_REGNO_P (regno))
14740 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14741 if (MMX_REGNO_P (regno))
14743 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14744 /* We handle both integer and floats in the general purpose registers.
14745 In future we should be able to handle vector modes as well. */
14746 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14748 /* Take care for QImode values - they can be in non-QI regs, but then
14749 they do cause partial register stalls. */
14750 if (regno < 4 || mode != QImode || TARGET_64BIT)
14752 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14755 /* Return the cost of moving data of mode M between a
14756 register and memory. A value of 2 is the default; this cost is
14757 relative to those in `REGISTER_MOVE_COST'.
14759 If moving between registers and memory is more expensive than
14760 between two registers, you should define this macro to express the
14763 Model also increased moving costs of QImode registers in non
14767 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14769 if (FLOAT_CLASS_P (class))
14786 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14788 if (SSE_CLASS_P (class))
14791 switch (GET_MODE_SIZE (mode))
14805 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14807 if (MMX_CLASS_P (class))
14810 switch (GET_MODE_SIZE (mode))
14821 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14823 switch (GET_MODE_SIZE (mode))
14827 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14828 : ix86_cost->movzbl_load);
14830 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14831 : ix86_cost->int_store[0] + 4);
14834 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14836 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14837 if (mode == TFmode)
14839 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14840 * (((int) GET_MODE_SIZE (mode)
14841 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14845 /* Compute a (partial) cost for rtx X. Return true if the complete
14846 cost has been computed, and false if subexpressions should be
14847 scanned. In either case, *TOTAL contains the cost result. */
14850 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14852 enum machine_mode mode = GET_MODE (x);
14860 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14862 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14864 else if (flag_pic && SYMBOLIC_CONST (x)
14866 || (!GET_CODE (x) != LABEL_REF
14867 && (GET_CODE (x) != SYMBOL_REF
14868 || !SYMBOL_REF_LOCAL_P (x)))))
14875 if (mode == VOIDmode)
14878 switch (standard_80387_constant_p (x))
14883 default: /* Other constants */
14888 /* Start with (MEM (SYMBOL_REF)), since that's where
14889 it'll probably end up. Add a penalty for size. */
14890 *total = (COSTS_N_INSNS (1)
14891 + (flag_pic != 0 && !TARGET_64BIT)
14892 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14898 /* The zero extensions is often completely free on x86_64, so make
14899 it as cheap as possible. */
14900 if (TARGET_64BIT && mode == DImode
14901 && GET_MODE (XEXP (x, 0)) == SImode)
14903 else if (TARGET_ZERO_EXTEND_WITH_AND)
14904 *total = COSTS_N_INSNS (ix86_cost->add);
14906 *total = COSTS_N_INSNS (ix86_cost->movzx);
14910 *total = COSTS_N_INSNS (ix86_cost->movsx);
14914 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14915 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14917 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14920 *total = COSTS_N_INSNS (ix86_cost->add);
14923 if ((value == 2 || value == 3)
14924 && ix86_cost->lea <= ix86_cost->shift_const)
14926 *total = COSTS_N_INSNS (ix86_cost->lea);
14936 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14938 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14940 if (INTVAL (XEXP (x, 1)) > 32)
14941 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14943 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14947 if (GET_CODE (XEXP (x, 1)) == AND)
14948 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14950 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14955 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14956 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14958 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14963 if (FLOAT_MODE_P (mode))
14965 *total = COSTS_N_INSNS (ix86_cost->fmul);
14970 rtx op0 = XEXP (x, 0);
14971 rtx op1 = XEXP (x, 1);
14973 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14975 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14976 for (nbits = 0; value != 0; value &= value - 1)
14980 /* This is arbitrary. */
14983 /* Compute costs correctly for widening multiplication. */
14984 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14985 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14986 == GET_MODE_SIZE (mode))
14988 int is_mulwiden = 0;
14989 enum machine_mode inner_mode = GET_MODE (op0);
14991 if (GET_CODE (op0) == GET_CODE (op1))
14992 is_mulwiden = 1, op1 = XEXP (op1, 0);
14993 else if (GET_CODE (op1) == CONST_INT)
14995 if (GET_CODE (op0) == SIGN_EXTEND)
14996 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14999 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15003 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15006 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15007 + nbits * ix86_cost->mult_bit)
15008 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15017 if (FLOAT_MODE_P (mode))
15018 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15020 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15024 if (FLOAT_MODE_P (mode))
15025 *total = COSTS_N_INSNS (ix86_cost->fadd);
15026 else if (GET_MODE_CLASS (mode) == MODE_INT
15027 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15029 if (GET_CODE (XEXP (x, 0)) == PLUS
15030 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15031 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15032 && CONSTANT_P (XEXP (x, 1)))
15034 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15035 if (val == 2 || val == 4 || val == 8)
15037 *total = COSTS_N_INSNS (ix86_cost->lea);
15038 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15039 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15041 *total += rtx_cost (XEXP (x, 1), outer_code);
15045 else if (GET_CODE (XEXP (x, 0)) == MULT
15046 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15048 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15049 if (val == 2 || val == 4 || val == 8)
15051 *total = COSTS_N_INSNS (ix86_cost->lea);
15052 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15053 *total += rtx_cost (XEXP (x, 1), outer_code);
15057 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15059 *total = COSTS_N_INSNS (ix86_cost->lea);
15060 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15061 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15062 *total += rtx_cost (XEXP (x, 1), outer_code);
15069 if (FLOAT_MODE_P (mode))
15071 *total = COSTS_N_INSNS (ix86_cost->fadd);
15079 if (!TARGET_64BIT && mode == DImode)
15081 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15082 + (rtx_cost (XEXP (x, 0), outer_code)
15083 << (GET_MODE (XEXP (x, 0)) != DImode))
15084 + (rtx_cost (XEXP (x, 1), outer_code)
15085 << (GET_MODE (XEXP (x, 1)) != DImode)));
15091 if (FLOAT_MODE_P (mode))
15093 *total = COSTS_N_INSNS (ix86_cost->fchs);
15099 if (!TARGET_64BIT && mode == DImode)
15100 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15102 *total = COSTS_N_INSNS (ix86_cost->add);
15106 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15111 if (FLOAT_MODE_P (mode))
15112 *total = COSTS_N_INSNS (ix86_cost->fabs);
15116 if (FLOAT_MODE_P (mode))
15117 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15121 if (XINT (x, 1) == UNSPEC_TP)
15130 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15132 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15135 fputs ("\tpushl $", asm_out_file);
15136 assemble_name (asm_out_file, XSTR (symbol, 0));
15137 fputc ('\n', asm_out_file);
15143 static int current_machopic_label_num;
15145 /* Given a symbol name and its associated stub, write out the
15146 definition of the stub. */
15149 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15151 unsigned int length;
15152 char *binder_name, *symbol_name, lazy_ptr_name[32];
15153 int label = ++current_machopic_label_num;
15155 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15156 symb = (*targetm.strip_name_encoding) (symb);
15158 length = strlen (stub);
15159 binder_name = alloca (length + 32);
15160 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15162 length = strlen (symb);
15163 symbol_name = alloca (length + 32);
15164 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15166 sprintf (lazy_ptr_name, "L%d$lz", label);
15169 machopic_picsymbol_stub_section ();
15171 machopic_symbol_stub_section ();
15173 fprintf (file, "%s:\n", stub);
15174 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15178 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15179 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15180 fprintf (file, "\tjmp %%edx\n");
15183 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15185 fprintf (file, "%s:\n", binder_name);
15189 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15190 fprintf (file, "\tpushl %%eax\n");
15193 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15195 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15197 machopic_lazy_symbol_ptr_section ();
15198 fprintf (file, "%s:\n", lazy_ptr_name);
15199 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15200 fprintf (file, "\t.long %s\n", binder_name);
15202 #endif /* TARGET_MACHO */
15204 /* Order the registers for register allocator. */
15207 x86_order_regs_for_local_alloc (void)
15212 /* First allocate the local general purpose registers. */
15213 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15214 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15215 reg_alloc_order [pos++] = i;
15217 /* Global general purpose registers. */
15218 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15219 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15220 reg_alloc_order [pos++] = i;
15222 /* x87 registers come first in case we are doing FP math
15224 if (!TARGET_SSE_MATH)
15225 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15226 reg_alloc_order [pos++] = i;
15228 /* SSE registers. */
15229 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15230 reg_alloc_order [pos++] = i;
15231 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15232 reg_alloc_order [pos++] = i;
15234 /* x87 registers. */
15235 if (TARGET_SSE_MATH)
15236 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15237 reg_alloc_order [pos++] = i;
15239 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15240 reg_alloc_order [pos++] = i;
15242 /* Initialize the rest of array as we do not allocate some registers
15244 while (pos < FIRST_PSEUDO_REGISTER)
15245 reg_alloc_order [pos++] = 0;
15248 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15249 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15252 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15253 struct attribute_spec.handler. */
15255 ix86_handle_struct_attribute (tree *node, tree name,
15256 tree args ATTRIBUTE_UNUSED,
15257 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15260 if (DECL_P (*node))
15262 if (TREE_CODE (*node) == TYPE_DECL)
15263 type = &TREE_TYPE (*node);
15268 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15269 || TREE_CODE (*type) == UNION_TYPE)))
15271 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15272 *no_add_attrs = true;
15275 else if ((is_attribute_p ("ms_struct", name)
15276 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15277 || ((is_attribute_p ("gcc_struct", name)
15278 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15280 warning ("`%s' incompatible attribute ignored",
15281 IDENTIFIER_POINTER (name));
15282 *no_add_attrs = true;
15289 ix86_ms_bitfield_layout_p (tree record_type)
15291 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15292 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15293 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15296 /* Returns an expression indicating where the this parameter is
15297 located on entry to the FUNCTION. */
15300 x86_this_parameter (tree function)
15302 tree type = TREE_TYPE (function);
15306 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15307 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15310 if (ix86_function_regparm (type, function) > 0)
15314 parm = TYPE_ARG_TYPES (type);
15315 /* Figure out whether or not the function has a variable number of
15317 for (; parm; parm = TREE_CHAIN (parm))
15318 if (TREE_VALUE (parm) == void_type_node)
15320 /* If not, the this parameter is in the first argument. */
15324 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15326 return gen_rtx_REG (SImode, regno);
15330 if (aggregate_value_p (TREE_TYPE (type), type))
15331 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15333 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15336 /* Determine whether x86_output_mi_thunk can succeed. */
15339 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15340 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15341 HOST_WIDE_INT vcall_offset, tree function)
15343 /* 64-bit can handle anything. */
15347 /* For 32-bit, everything's fine if we have one free register. */
15348 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15351 /* Need a free register for vcall_offset. */
15355 /* Need a free register for GOT references. */
15356 if (flag_pic && !(*targetm.binds_local_p) (function))
15359 /* Otherwise ok. */
15363 /* Output the assembler code for a thunk function. THUNK_DECL is the
15364 declaration for the thunk function itself, FUNCTION is the decl for
15365 the target function. DELTA is an immediate constant offset to be
15366 added to THIS. If VCALL_OFFSET is nonzero, the word at
15367 *(*this + vcall_offset) should be added to THIS. */
15370 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15371 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15372 HOST_WIDE_INT vcall_offset, tree function)
15375 rtx this = x86_this_parameter (function);
15378 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15379 pull it in now and let DELTA benefit. */
15382 else if (vcall_offset)
15384 /* Put the this parameter into %eax. */
15386 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15387 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15390 this_reg = NULL_RTX;
15392 /* Adjust the this parameter by a fixed constant. */
15395 xops[0] = GEN_INT (delta);
15396 xops[1] = this_reg ? this_reg : this;
15399 if (!x86_64_general_operand (xops[0], DImode))
15401 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15403 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15407 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15410 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15413 /* Adjust the this parameter by a value stored in the vtable. */
15417 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15420 int tmp_regno = 2 /* ECX */;
15421 if (lookup_attribute ("fastcall",
15422 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15423 tmp_regno = 0 /* EAX */;
15424 tmp = gen_rtx_REG (SImode, tmp_regno);
15427 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15430 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15432 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15434 /* Adjust the this parameter. */
15435 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15436 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15438 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15439 xops[0] = GEN_INT (vcall_offset);
15441 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15442 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15444 xops[1] = this_reg;
15446 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15448 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15451 /* If necessary, drop THIS back to its stack slot. */
15452 if (this_reg && this_reg != this)
15454 xops[0] = this_reg;
15456 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15459 xops[0] = XEXP (DECL_RTL (function), 0);
15462 if (!flag_pic || (*targetm.binds_local_p) (function))
15463 output_asm_insn ("jmp\t%P0", xops);
15466 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15467 tmp = gen_rtx_CONST (Pmode, tmp);
15468 tmp = gen_rtx_MEM (QImode, tmp);
15470 output_asm_insn ("jmp\t%A0", xops);
15475 if (!flag_pic || (*targetm.binds_local_p) (function))
15476 output_asm_insn ("jmp\t%P0", xops);
15481 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15482 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15483 tmp = gen_rtx_MEM (QImode, tmp);
15485 output_asm_insn ("jmp\t%0", xops);
15488 #endif /* TARGET_MACHO */
15490 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15491 output_set_got (tmp);
15494 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15495 output_asm_insn ("jmp\t{*}%1", xops);
15501 x86_file_start (void)
15503 default_file_start ();
15504 if (X86_FILE_START_VERSION_DIRECTIVE)
15505 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15506 if (X86_FILE_START_FLTUSED)
15507 fputs ("\t.global\t__fltused\n", asm_out_file);
15508 if (ix86_asm_dialect == ASM_INTEL)
15509 fputs ("\t.intel_syntax\n", asm_out_file);
15513 x86_field_alignment (tree field, int computed)
15515 enum machine_mode mode;
15516 tree type = TREE_TYPE (field);
15518 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15520 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15521 ? get_inner_array_type (type) : type);
15522 if (mode == DFmode || mode == DCmode
15523 || GET_MODE_CLASS (mode) == MODE_INT
15524 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15525 return MIN (32, computed);
15529 /* Output assembler code to FILE to increment profiler label # LABELNO
15530 for profiling a function entry. */
15532 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15537 #ifndef NO_PROFILE_COUNTERS
15538 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15540 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15544 #ifndef NO_PROFILE_COUNTERS
15545 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15547 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15551 #ifndef NO_PROFILE_COUNTERS
15552 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15553 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15555 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15559 #ifndef NO_PROFILE_COUNTERS
15560 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15561 PROFILE_COUNT_REGISTER);
15563 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15567 /* We don't have exact information about the insn sizes, but we may assume
15568 quite safely that we are informed about all 1 byte insns and memory
15569 address sizes. This is enough to eliminate unnecessary padding in
15573 min_insn_size (rtx insn)
15577 if (!INSN_P (insn) || !active_insn_p (insn))
15580 /* Discard alignments we've emit and jump instructions. */
15581 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15582 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15584 if (GET_CODE (insn) == JUMP_INSN
15585 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15586 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15589 /* Important case - calls are always 5 bytes.
15590 It is common to have many calls in the row. */
15591 if (GET_CODE (insn) == CALL_INSN
15592 && symbolic_reference_mentioned_p (PATTERN (insn))
15593 && !SIBLING_CALL_P (insn))
15595 if (get_attr_length (insn) <= 1)
15598 /* For normal instructions we may rely on the sizes of addresses
15599 and the presence of symbol to require 4 bytes of encoding.
15600 This is not the case for jumps where references are PC relative. */
15601 if (GET_CODE (insn) != JUMP_INSN)
15603 l = get_attr_length_address (insn);
15604 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15613 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15617 ix86_avoid_jump_misspredicts (void)
15619 rtx insn, start = get_insns ();
15620 int nbytes = 0, njumps = 0;
15623 /* Look for all minimal intervals of instructions containing 4 jumps.
15624 The intervals are bounded by START and INSN. NBYTES is the total
15625 size of instructions in the interval including INSN and not including
15626 START. When the NBYTES is smaller than 16 bytes, it is possible
15627 that the end of START and INSN ends up in the same 16byte page.
15629 The smallest offset in the page INSN can start is the case where START
15630 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15631 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15633 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15636 nbytes += min_insn_size (insn);
15638 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15639 INSN_UID (insn), min_insn_size (insn));
15640 if ((GET_CODE (insn) == JUMP_INSN
15641 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15642 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15643 || GET_CODE (insn) == CALL_INSN)
15650 start = NEXT_INSN (start);
15651 if ((GET_CODE (start) == JUMP_INSN
15652 && GET_CODE (PATTERN (start)) != ADDR_VEC
15653 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15654 || GET_CODE (start) == CALL_INSN)
15655 njumps--, isjump = 1;
15658 nbytes -= min_insn_size (start);
15663 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15664 INSN_UID (start), INSN_UID (insn), nbytes);
15666 if (njumps == 3 && isjump && nbytes < 16)
15668 int padsize = 15 - nbytes + min_insn_size (insn);
15671 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15672 INSN_UID (insn), padsize);
15673 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15678 /* AMD Athlon works faster
15679 when RET is not destination of conditional jump or directly preceded
15680 by other jump instruction. We avoid the penalty by inserting NOP just
15681 before the RET instructions in such cases. */
15683 ix86_pad_returns (void)
15687 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15689 basic_block bb = e->src;
15690 rtx ret = BB_END (bb);
15692 bool replace = false;
15694 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15695 || !maybe_hot_bb_p (bb))
15697 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15698 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15700 if (prev && GET_CODE (prev) == CODE_LABEL)
15703 for (e = bb->pred; e; e = e->pred_next)
15704 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15705 && !(e->flags & EDGE_FALLTHRU))
15710 prev = prev_active_insn (ret);
15712 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15713 || GET_CODE (prev) == CALL_INSN))
15715 /* Empty functions get branch mispredict even when the jump destination
15716 is not visible to us. */
15717 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15722 emit_insn_before (gen_return_internal_long (), ret);
15728 /* Implement machine specific optimizations. We implement padding of returns
15729 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15733 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15734 ix86_pad_returns ();
15735 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15736 ix86_avoid_jump_misspredicts ();
15739 /* Return nonzero when QImode register that must be represented via REX prefix
15742 x86_extended_QIreg_mentioned_p (rtx insn)
15745 extract_insn_cached (insn);
15746 for (i = 0; i < recog_data.n_operands; i++)
15747 if (REG_P (recog_data.operand[i])
15748 && REGNO (recog_data.operand[i]) >= 4)
15753 /* Return nonzero when P points to register encoded via REX prefix.
15754 Called via for_each_rtx. */
15756 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15758 unsigned int regno;
15761 regno = REGNO (*p);
15762 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15765 /* Return true when INSN mentions register that must be encoded using REX
15768 x86_extended_reg_mentioned_p (rtx insn)
15770 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15773 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15774 optabs would emit if we didn't have TFmode patterns. */
15777 x86_emit_floatuns (rtx operands[2])
15779 rtx neglab, donelab, i0, i1, f0, in, out;
15780 enum machine_mode mode, inmode;
15782 inmode = GET_MODE (operands[1]);
15783 if (inmode != SImode
15784 && inmode != DImode)
15788 in = force_reg (inmode, operands[1]);
15789 mode = GET_MODE (out);
15790 neglab = gen_label_rtx ();
15791 donelab = gen_label_rtx ();
15792 i1 = gen_reg_rtx (Pmode);
15793 f0 = gen_reg_rtx (mode);
15795 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15797 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15798 emit_jump_insn (gen_jump (donelab));
15801 emit_label (neglab);
15803 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15804 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15805 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15806 expand_float (f0, i0, 0);
15807 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15809 emit_label (donelab);
15812 /* Return if we do not know how to pass TYPE solely in registers. */
15814 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15816 if (default_must_pass_in_stack (mode, type))
15818 return (!TARGET_64BIT && type && mode == TImode);
15821 /* Initialize vector TARGET via VALS. */
15823 ix86_expand_vector_init (rtx target, rtx vals)
15825 enum machine_mode mode = GET_MODE (target);
15826 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15827 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15830 for (i = n_elts - 1; i >= 0; i--)
15831 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15832 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15835 /* Few special cases first...
15836 ... constants are best loaded from constant pool. */
15839 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15843 /* ... values where only first field is non-constant are best loaded
15844 from the pool and overwritten via move later. */
15847 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15848 GET_MODE_INNER (mode), 0);
15850 op = force_reg (mode, op);
15851 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15852 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15853 switch (GET_MODE (target))
15856 emit_insn (gen_sse2_movsd (target, target, op));
15859 emit_insn (gen_sse_movss (target, target, op));
15867 /* And the busy sequence doing rotations. */
15868 switch (GET_MODE (target))
15873 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15875 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15877 vecop0 = force_reg (V2DFmode, vecop0);
15878 vecop1 = force_reg (V2DFmode, vecop1);
15879 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15885 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15887 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15889 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15891 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15892 rtx tmp1 = gen_reg_rtx (V4SFmode);
15893 rtx tmp2 = gen_reg_rtx (V4SFmode);
15895 vecop0 = force_reg (V4SFmode, vecop0);
15896 vecop1 = force_reg (V4SFmode, vecop1);
15897 vecop2 = force_reg (V4SFmode, vecop2);
15898 vecop3 = force_reg (V4SFmode, vecop3);
15899 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15900 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15901 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15909 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15911 We do this in the new i386 backend to maintain source compatibility
15912 with the old cc0-based compiler. */
15915 ix86_md_asm_clobbers (tree clobbers)
15917 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15919 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15921 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15926 /* Worker function for REVERSE_CONDITION. */
15929 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15931 return (mode != CCFPmode && mode != CCFPUmode
15932 ? reverse_condition (code)
15933 : reverse_condition_maybe_unordered (code));
15936 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15940 output_387_reg_move (rtx insn, rtx *operands)
15942 if (REG_P (operands[1])
15943 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15945 if (REGNO (operands[0]) == FIRST_STACK_REG
15946 && TARGET_USE_FFREEP)
15947 return "ffreep\t%y0";
15948 return "fstp\t%y0";
15950 if (STACK_TOP_P (operands[0]))
15951 return "fld%z1\t%y1";
15955 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15956 FP status register is set. */
15959 ix86_emit_fp_unordered_jump (rtx label)
15961 rtx reg = gen_reg_rtx (HImode);
15964 emit_insn (gen_x86_fnstsw_1 (reg));
15966 if (TARGET_USE_SAHF)
15968 emit_insn (gen_x86_sahf_1 (reg));
15970 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15971 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15975 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15977 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15978 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15981 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15982 gen_rtx_LABEL_REF (VOIDmode, label),
15984 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15985 emit_jump_insn (temp);
15988 /* Output code to perform a log1p XFmode calculation. */
15990 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15992 rtx label1 = gen_label_rtx ();
15993 rtx label2 = gen_label_rtx ();
15995 rtx tmp = gen_reg_rtx (XFmode);
15996 rtx tmp2 = gen_reg_rtx (XFmode);
15998 emit_insn (gen_absxf2 (tmp, op1));
15999 emit_insn (gen_cmpxf (tmp,
16000 CONST_DOUBLE_FROM_REAL_VALUE (
16001 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16003 emit_jump_insn (gen_bge (label1));
16005 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16006 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16007 emit_jump (label2);
16009 emit_label (label1);
16010 emit_move_insn (tmp, CONST1_RTX (XFmode));
16011 emit_insn (gen_addxf3 (tmp, op1, tmp));
16012 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16013 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16015 emit_label (label2);
16018 #include "gt-i386.h"