1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
594 AREG, DREG, CREG, BREG,
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
749 > to_allocate <- FRAME_POINTER
761 int outgoing_arguments_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
780 enum cmodel ix86_cmodel;
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix[16];
833 static int internal_label_prefix_len;
835 static int local_symbolic_operand (rtx, enum machine_mode);
836 static int tls_symbolic_operand_1 (rtx, enum tls_model);
837 static void output_pic_addr_const (FILE *, rtx, int);
838 static void put_condition_code (enum rtx_code, enum machine_mode,
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx *, void *);
842 static rtx maybe_get_pool_constant (rtx);
843 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
849 static rtx get_thread_pointer (int);
850 static rtx legitimize_tls_address (rtx, enum tls_model, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx gen_push (rtx);
853 static int memory_address_length (rtx addr);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
885 rtx base, index, disp;
887 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
890 static int ix86_decompose_address (rtx, struct ix86_address *);
891 static int ix86_address_cost (rtx);
892 static bool ix86_cannot_force_const_mem (rtx);
893 static rtx ix86_delegitimize_address (rtx);
895 struct builtin_description;
896 static rtx ix86_expand_sse_comi (const struct builtin_description *,
898 static rtx ix86_expand_sse_compare (const struct builtin_description *,
900 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
901 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
902 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
903 static rtx ix86_expand_store_builtin (enum insn_code, tree);
904 static rtx safe_vector_operand (rtx, enum machine_mode);
905 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
906 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
907 enum rtx_code *, enum rtx_code *);
908 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912 static int ix86_fp_comparison_cost (enum rtx_code code);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame *);
916 static int ix86_comp_type_attributes (tree, tree);
917 static int ix86_function_regparm (tree, tree);
918 const struct attribute_spec ix86_attribute_table[];
919 static bool ix86_function_ok_for_sibcall (tree, tree);
920 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922 static int ix86_value_regno (enum machine_mode);
923 static bool contains_128bit_aligned_vector_p (tree);
924 static rtx ix86_struct_value_rtx (tree, int);
925 static bool ix86_ms_bitfield_layout_p (tree);
926 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927 static int extended_reg_mentioned_1 (rtx *, void *);
928 static bool ix86_rtx_costs (rtx, int, int, int *);
929 static int min_insn_size (rtx);
930 static tree ix86_md_asm_clobbers (tree clobbers);
932 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
933 static void ix86_svr3_asm_out_constructor (rtx, int);
936 /* Register class used for passing given 64bit part of the argument.
937 These represent classes as documented by the PS ABI, with the exception
938 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
939 use SF or DFmode move instead of DImode to avoid reformatting penalties.
941 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
942 whenever possible (upper half does contain padding).
944 enum x86_64_reg_class
947 X86_64_INTEGER_CLASS,
948 X86_64_INTEGERSI_CLASS,
957 static const char * const x86_64_reg_class_name[] =
958 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
960 #define MAX_CLASSES 4
961 static int classify_argument (enum machine_mode, tree,
962 enum x86_64_reg_class [MAX_CLASSES], int);
963 static int examine_argument (enum machine_mode, tree, int, int *, int *);
964 static rtx construct_container (enum machine_mode, tree, int, int, int,
966 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
967 enum x86_64_reg_class);
969 /* Table of constants used by fldpi, fldln2, etc.... */
970 static REAL_VALUE_TYPE ext_80387_constants_table [5];
971 static bool ext_80387_constants_init = 0;
972 static void init_ext_80387_constants (void);
974 /* Initialize the GCC target structure. */
975 #undef TARGET_ATTRIBUTE_TABLE
976 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
977 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
978 # undef TARGET_MERGE_DECL_ATTRIBUTES
979 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
982 #undef TARGET_COMP_TYPE_ATTRIBUTES
983 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
985 #undef TARGET_INIT_BUILTINS
986 #define TARGET_INIT_BUILTINS ix86_init_builtins
988 #undef TARGET_EXPAND_BUILTIN
989 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
991 #undef TARGET_ASM_FUNCTION_EPILOGUE
992 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
994 #undef TARGET_ASM_OPEN_PAREN
995 #define TARGET_ASM_OPEN_PAREN ""
996 #undef TARGET_ASM_CLOSE_PAREN
997 #define TARGET_ASM_CLOSE_PAREN ""
999 #undef TARGET_ASM_ALIGNED_HI_OP
1000 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1001 #undef TARGET_ASM_ALIGNED_SI_OP
1002 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1004 #undef TARGET_ASM_ALIGNED_DI_OP
1005 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1008 #undef TARGET_ASM_UNALIGNED_HI_OP
1009 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1010 #undef TARGET_ASM_UNALIGNED_SI_OP
1011 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1012 #undef TARGET_ASM_UNALIGNED_DI_OP
1013 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1015 #undef TARGET_SCHED_ADJUST_COST
1016 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1017 #undef TARGET_SCHED_ISSUE_RATE
1018 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1019 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
1020 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
1021 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1022 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1023 ia32_multipass_dfa_lookahead
1025 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1026 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1029 #undef TARGET_HAVE_TLS
1030 #define TARGET_HAVE_TLS true
1032 #undef TARGET_CANNOT_FORCE_CONST_MEM
1033 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1035 #undef TARGET_DELEGITIMIZE_ADDRESS
1036 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1038 #undef TARGET_MS_BITFIELD_LAYOUT_P
1039 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1041 #undef TARGET_ASM_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1043 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1044 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1046 #undef TARGET_ASM_FILE_START
1047 #define TARGET_ASM_FILE_START x86_file_start
1049 #undef TARGET_RTX_COSTS
1050 #define TARGET_RTX_COSTS ix86_rtx_costs
1051 #undef TARGET_ADDRESS_COST
1052 #define TARGET_ADDRESS_COST ix86_address_cost
1054 #undef TARGET_FIXED_CONDITION_CODE_REGS
1055 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1056 #undef TARGET_CC_MODES_COMPATIBLE
1057 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1059 #undef TARGET_MACHINE_DEPENDENT_REORG
1060 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1062 #undef TARGET_BUILD_BUILTIN_VA_LIST
1063 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1065 #undef TARGET_MD_ASM_CLOBBERS
1066 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1068 #undef TARGET_PROMOTE_PROTOTYPES
1069 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1070 #undef TARGET_STRUCT_VALUE_RTX
1071 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1072 #undef TARGET_SETUP_INCOMING_VARARGS
1073 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1075 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1076 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1078 struct gcc_target targetm = TARGET_INITIALIZER;
1081 /* The svr4 ABI for the i386 says that records and unions are returned
1083 #ifndef DEFAULT_PCC_STRUCT_RETURN
1084 #define DEFAULT_PCC_STRUCT_RETURN 1
1087 /* Sometimes certain combinations of command options do not make
1088 sense on a particular target machine. You can define a macro
1089 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1090 defined, is executed once just after all the command options have
1093 Don't use this macro to turn on various extra optimizations for
1094 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1097 override_options (void)
1100 /* Comes from final.c -- no real reason to change it. */
1101 #define MAX_CODE_ALIGN 16
1105 const struct processor_costs *cost; /* Processor costs */
1106 const int target_enable; /* Target flags to enable. */
1107 const int target_disable; /* Target flags to disable. */
1108 const int align_loop; /* Default alignments. */
1109 const int align_loop_max_skip;
1110 const int align_jump;
1111 const int align_jump_max_skip;
1112 const int align_func;
1114 const processor_target_table[PROCESSOR_max] =
1116 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1117 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1118 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1119 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1120 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1121 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1122 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1123 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1124 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1127 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1130 const char *const name; /* processor name or nickname. */
1131 const enum processor_type processor;
1132 const enum pta_flags
1138 PTA_PREFETCH_SSE = 16,
1144 const processor_alias_table[] =
1146 {"i386", PROCESSOR_I386, 0},
1147 {"i486", PROCESSOR_I486, 0},
1148 {"i586", PROCESSOR_PENTIUM, 0},
1149 {"pentium", PROCESSOR_PENTIUM, 0},
1150 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1151 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1152 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1153 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1154 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1155 {"i686", PROCESSOR_PENTIUMPRO, 0},
1156 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1157 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1158 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1159 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1160 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1161 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1162 | PTA_MMX | PTA_PREFETCH_SSE},
1163 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1164 | PTA_MMX | PTA_PREFETCH_SSE},
1165 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1166 | PTA_MMX | PTA_PREFETCH_SSE},
1167 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"k6", PROCESSOR_K6, PTA_MMX},
1170 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1171 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1172 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1174 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1175 | PTA_3DNOW | PTA_3DNOW_A},
1176 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1177 | PTA_3DNOW_A | PTA_SSE},
1178 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1179 | PTA_3DNOW_A | PTA_SSE},
1180 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1181 | PTA_3DNOW_A | PTA_SSE},
1182 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1183 | PTA_SSE | PTA_SSE2 },
1184 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1185 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1186 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1187 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1188 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1189 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1190 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 int const pta_size = ARRAY_SIZE (processor_alias_table);
1196 /* Set the default values for switches whose default depends on TARGET_64BIT
1197 in case they weren't overwritten by command line options. */
1200 if (flag_omit_frame_pointer == 2)
1201 flag_omit_frame_pointer = 1;
1202 if (flag_asynchronous_unwind_tables == 2)
1203 flag_asynchronous_unwind_tables = 1;
1204 if (flag_pcc_struct_return == 2)
1205 flag_pcc_struct_return = 0;
1209 if (flag_omit_frame_pointer == 2)
1210 flag_omit_frame_pointer = 0;
1211 if (flag_asynchronous_unwind_tables == 2)
1212 flag_asynchronous_unwind_tables = 0;
1213 if (flag_pcc_struct_return == 2)
1214 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1217 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1218 SUBTARGET_OVERRIDE_OPTIONS;
1221 if (!ix86_tune_string && ix86_arch_string)
1222 ix86_tune_string = ix86_arch_string;
1223 if (!ix86_tune_string)
1224 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1225 if (!ix86_arch_string)
1226 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1228 if (ix86_cmodel_string != 0)
1230 if (!strcmp (ix86_cmodel_string, "small"))
1231 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1233 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1234 else if (!strcmp (ix86_cmodel_string, "32"))
1235 ix86_cmodel = CM_32;
1236 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1237 ix86_cmodel = CM_KERNEL;
1238 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1239 ix86_cmodel = CM_MEDIUM;
1240 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1241 ix86_cmodel = CM_LARGE;
1243 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1247 ix86_cmodel = CM_32;
1249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1251 if (ix86_asm_string != 0)
1253 if (!strcmp (ix86_asm_string, "intel"))
1254 ix86_asm_dialect = ASM_INTEL;
1255 else if (!strcmp (ix86_asm_string, "att"))
1256 ix86_asm_dialect = ASM_ATT;
1258 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1260 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1261 error ("code model `%s' not supported in the %s bit mode",
1262 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1263 if (ix86_cmodel == CM_LARGE)
1264 sorry ("code model `large' not supported yet");
1265 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1266 sorry ("%i-bit mode not compiled in",
1267 (target_flags & MASK_64BIT) ? 64 : 32);
1269 for (i = 0; i < pta_size; i++)
1270 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1272 ix86_arch = processor_alias_table[i].processor;
1273 /* Default cpu tuning to the architecture. */
1274 ix86_tune = ix86_arch;
1275 if (processor_alias_table[i].flags & PTA_MMX
1276 && !(target_flags_explicit & MASK_MMX))
1277 target_flags |= MASK_MMX;
1278 if (processor_alias_table[i].flags & PTA_3DNOW
1279 && !(target_flags_explicit & MASK_3DNOW))
1280 target_flags |= MASK_3DNOW;
1281 if (processor_alias_table[i].flags & PTA_3DNOW_A
1282 && !(target_flags_explicit & MASK_3DNOW_A))
1283 target_flags |= MASK_3DNOW_A;
1284 if (processor_alias_table[i].flags & PTA_SSE
1285 && !(target_flags_explicit & MASK_SSE))
1286 target_flags |= MASK_SSE;
1287 if (processor_alias_table[i].flags & PTA_SSE2
1288 && !(target_flags_explicit & MASK_SSE2))
1289 target_flags |= MASK_SSE2;
1290 if (processor_alias_table[i].flags & PTA_SSE3
1291 && !(target_flags_explicit & MASK_SSE3))
1292 target_flags |= MASK_SSE3;
1293 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1294 x86_prefetch_sse = true;
1295 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1296 error ("CPU you selected does not support x86-64 instruction set");
1301 error ("bad value (%s) for -march= switch", ix86_arch_string);
1303 for (i = 0; i < pta_size; i++)
1304 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1306 ix86_tune = processor_alias_table[i].processor;
1307 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1308 error ("CPU you selected does not support x86-64 instruction set");
1311 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1312 x86_prefetch_sse = true;
1314 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1317 ix86_cost = &size_cost;
1319 ix86_cost = processor_target_table[ix86_tune].cost;
1320 target_flags |= processor_target_table[ix86_tune].target_enable;
1321 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1323 /* Arrange to set up i386_stack_locals for all functions. */
1324 init_machine_status = ix86_init_machine_status;
1326 /* Validate -mregparm= value. */
1327 if (ix86_regparm_string)
1329 i = atoi (ix86_regparm_string);
1330 if (i < 0 || i > REGPARM_MAX)
1331 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1337 ix86_regparm = REGPARM_MAX;
1339 /* If the user has provided any of the -malign-* options,
1340 warn and use that value only if -falign-* is not set.
1341 Remove this code in GCC 3.2 or later. */
1342 if (ix86_align_loops_string)
1344 warning ("-malign-loops is obsolete, use -falign-loops");
1345 if (align_loops == 0)
1347 i = atoi (ix86_align_loops_string);
1348 if (i < 0 || i > MAX_CODE_ALIGN)
1349 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1351 align_loops = 1 << i;
1355 if (ix86_align_jumps_string)
1357 warning ("-malign-jumps is obsolete, use -falign-jumps");
1358 if (align_jumps == 0)
1360 i = atoi (ix86_align_jumps_string);
1361 if (i < 0 || i > MAX_CODE_ALIGN)
1362 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1364 align_jumps = 1 << i;
1368 if (ix86_align_funcs_string)
1370 warning ("-malign-functions is obsolete, use -falign-functions");
1371 if (align_functions == 0)
1373 i = atoi (ix86_align_funcs_string);
1374 if (i < 0 || i > MAX_CODE_ALIGN)
1375 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1377 align_functions = 1 << i;
1381 /* Default align_* from the processor table. */
1382 if (align_loops == 0)
1384 align_loops = processor_target_table[ix86_tune].align_loop;
1385 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1387 if (align_jumps == 0)
1389 align_jumps = processor_target_table[ix86_tune].align_jump;
1390 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1392 if (align_functions == 0)
1394 align_functions = processor_target_table[ix86_tune].align_func;
1397 /* Validate -mpreferred-stack-boundary= value, or provide default.
1398 The default of 128 bits is for Pentium III's SSE __m128, but we
1399 don't want additional code to keep the stack aligned when
1400 optimizing for code size. */
1401 ix86_preferred_stack_boundary = (optimize_size
1402 ? TARGET_64BIT ? 128 : 32
1404 if (ix86_preferred_stack_boundary_string)
1406 i = atoi (ix86_preferred_stack_boundary_string);
1407 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1408 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1409 TARGET_64BIT ? 4 : 2);
1411 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1414 /* Validate -mbranch-cost= value, or provide default. */
1415 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1416 if (ix86_branch_cost_string)
1418 i = atoi (ix86_branch_cost_string);
1420 error ("-mbranch-cost=%d is not between 0 and 5", i);
1422 ix86_branch_cost = i;
1425 if (ix86_tls_dialect_string)
1427 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1428 ix86_tls_dialect = TLS_DIALECT_GNU;
1429 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1430 ix86_tls_dialect = TLS_DIALECT_SUN;
1432 error ("bad value (%s) for -mtls-dialect= switch",
1433 ix86_tls_dialect_string);
1436 /* Keep nonleaf frame pointers. */
1437 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1438 flag_omit_frame_pointer = 1;
1440 /* If we're doing fast math, we don't care about comparison order
1441 wrt NaNs. This lets us use a shorter comparison sequence. */
1442 if (flag_unsafe_math_optimizations)
1443 target_flags &= ~MASK_IEEE_FP;
1445 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1446 since the insns won't need emulation. */
1447 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1448 target_flags &= ~MASK_NO_FANCY_MATH_387;
1450 /* Turn on SSE2 builtins for -msse3. */
1452 target_flags |= MASK_SSE2;
1454 /* Turn on SSE builtins for -msse2. */
1456 target_flags |= MASK_SSE;
1460 if (TARGET_ALIGN_DOUBLE)
1461 error ("-malign-double makes no sense in the 64bit mode");
1463 error ("-mrtd calling convention not supported in the 64bit mode");
1464 /* Enable by default the SSE and MMX builtins. */
1465 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1466 ix86_fpmath = FPMATH_SSE;
1470 ix86_fpmath = FPMATH_387;
1471 /* i386 ABI does not specify red zone. It still makes sense to use it
1472 when programmer takes care to stack from being destroyed. */
1473 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1474 target_flags |= MASK_NO_RED_ZONE;
1477 if (ix86_fpmath_string != 0)
1479 if (! strcmp (ix86_fpmath_string, "387"))
1480 ix86_fpmath = FPMATH_387;
1481 else if (! strcmp (ix86_fpmath_string, "sse"))
1485 warning ("SSE instruction set disabled, using 387 arithmetics");
1486 ix86_fpmath = FPMATH_387;
1489 ix86_fpmath = FPMATH_SSE;
1491 else if (! strcmp (ix86_fpmath_string, "387,sse")
1492 || ! strcmp (ix86_fpmath_string, "sse,387"))
1496 warning ("SSE instruction set disabled, using 387 arithmetics");
1497 ix86_fpmath = FPMATH_387;
1499 else if (!TARGET_80387)
1501 warning ("387 instruction set disabled, using SSE arithmetics");
1502 ix86_fpmath = FPMATH_SSE;
1505 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1508 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1511 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1515 target_flags |= MASK_MMX;
1516 x86_prefetch_sse = true;
1519 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1522 target_flags |= MASK_MMX;
1523 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1524 extensions it adds. */
1525 if (x86_3dnow_a & (1 << ix86_arch))
1526 target_flags |= MASK_3DNOW_A;
1528 if ((x86_accumulate_outgoing_args & TUNEMASK)
1529 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1531 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1533 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1536 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1537 p = strchr (internal_label_prefix, 'X');
1538 internal_label_prefix_len = p - internal_label_prefix;
1544 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1546 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1547 make the problem with not enough registers even worse. */
1548 #ifdef INSN_SCHEDULING
1550 flag_schedule_insns = 0;
1553 /* The default values of these switches depend on the TARGET_64BIT
1554 that is not known at this moment. Mark these values with 2 and
1555 let user the to override these. In case there is no command line option
1556 specifying them, we will set the defaults in override_options. */
1558 flag_omit_frame_pointer = 2;
1559 flag_pcc_struct_return = 2;
1560 flag_asynchronous_unwind_tables = 2;
1563 /* Table of valid machine attributes. */
1564 const struct attribute_spec ix86_attribute_table[] =
1566 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1567 /* Stdcall attribute says callee is responsible for popping arguments
1568 if they are not variable. */
1569 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1570 /* Fastcall attribute says callee is responsible for popping arguments
1571 if they are not variable. */
1572 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1573 /* Cdecl attribute says the callee is a normal C declaration */
1574 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1575 /* Regparm attribute specifies how many integer arguments are to be
1576 passed in registers. */
1577 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1578 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1579 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1580 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1581 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1583 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1584 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1585 { NULL, 0, 0, false, false, false, NULL }
1588 /* Decide whether we can make a sibling call to a function. DECL is the
1589 declaration of the function being targeted by the call and EXP is the
1590 CALL_EXPR representing the call. */
1593 ix86_function_ok_for_sibcall (tree decl, tree exp)
1595 /* If we are generating position-independent code, we cannot sibcall
1596 optimize any indirect call, or a direct call to a global function,
1597 as the PLT requires %ebx be live. */
1598 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1601 /* If we are returning floats on the 80387 register stack, we cannot
1602 make a sibcall from a function that doesn't return a float to a
1603 function that does or, conversely, from a function that does return
1604 a float to a function that doesn't; the necessary stack adjustment
1605 would not be executed. */
1606 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1607 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1610 /* If this call is indirect, we'll need to be able to use a call-clobbered
1611 register for the address of the target function. Make sure that all
1612 such registers are not used for passing parameters. */
1613 if (!decl && !TARGET_64BIT)
1617 /* We're looking at the CALL_EXPR, we need the type of the function. */
1618 type = TREE_OPERAND (exp, 0); /* pointer expression */
1619 type = TREE_TYPE (type); /* pointer type */
1620 type = TREE_TYPE (type); /* function type */
1622 if (ix86_function_regparm (type, NULL) >= 3)
1624 /* ??? Need to count the actual number of registers to be used,
1625 not the possible number of registers. Fix later. */
1630 /* Otherwise okay. That also includes certain types of indirect calls. */
1634 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1635 arguments as in struct attribute_spec.handler. */
1637 ix86_handle_cdecl_attribute (tree *node, tree name,
1638 tree args ATTRIBUTE_UNUSED,
1639 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1641 if (TREE_CODE (*node) != FUNCTION_TYPE
1642 && TREE_CODE (*node) != METHOD_TYPE
1643 && TREE_CODE (*node) != FIELD_DECL
1644 && TREE_CODE (*node) != TYPE_DECL)
1646 warning ("`%s' attribute only applies to functions",
1647 IDENTIFIER_POINTER (name));
1648 *no_add_attrs = true;
1652 if (is_attribute_p ("fastcall", name))
1654 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1656 error ("fastcall and stdcall attributes are not compatible");
1658 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1660 error ("fastcall and regparm attributes are not compatible");
1663 else if (is_attribute_p ("stdcall", name))
1665 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1667 error ("fastcall and stdcall attributes are not compatible");
1674 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1675 *no_add_attrs = true;
1681 /* Handle a "regparm" attribute;
1682 arguments as in struct attribute_spec.handler. */
1684 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1685 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1687 if (TREE_CODE (*node) != FUNCTION_TYPE
1688 && TREE_CODE (*node) != METHOD_TYPE
1689 && TREE_CODE (*node) != FIELD_DECL
1690 && TREE_CODE (*node) != TYPE_DECL)
1692 warning ("`%s' attribute only applies to functions",
1693 IDENTIFIER_POINTER (name));
1694 *no_add_attrs = true;
1700 cst = TREE_VALUE (args);
1701 if (TREE_CODE (cst) != INTEGER_CST)
1703 warning ("`%s' attribute requires an integer constant argument",
1704 IDENTIFIER_POINTER (name));
1705 *no_add_attrs = true;
1707 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1709 warning ("argument to `%s' attribute larger than %d",
1710 IDENTIFIER_POINTER (name), REGPARM_MAX);
1711 *no_add_attrs = true;
1714 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1716 error ("fastcall and regparm attributes are not compatible");
1723 /* Return 0 if the attributes for two types are incompatible, 1 if they
1724 are compatible, and 2 if they are nearly compatible (which causes a
1725 warning to be generated). */
1728 ix86_comp_type_attributes (tree type1, tree type2)
1730 /* Check for mismatch of non-default calling convention. */
1731 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1733 if (TREE_CODE (type1) != FUNCTION_TYPE)
1736 /* Check for mismatched fastcall types */
1737 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1738 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1741 /* Check for mismatched return types (cdecl vs stdcall). */
1742 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1743 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1745 if (ix86_function_regparm (type1, NULL)
1746 != ix86_function_regparm (type2, NULL))
1751 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1752 DECL may be NULL when calling function indirectly
1753 or considering a libcall. */
1756 ix86_function_regparm (tree type, tree decl)
1759 int regparm = ix86_regparm;
1760 bool user_convention = false;
1764 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1767 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1768 user_convention = true;
1771 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1774 user_convention = true;
1777 /* Use register calling convention for local functions when possible. */
1778 if (!TARGET_64BIT && !user_convention && decl
1779 && flag_unit_at_a_time && !profile_flag)
1781 struct cgraph_local_info *i = cgraph_local_info (decl);
1784 /* We can't use regparm(3) for nested functions as these use
1785 static chain pointer in third argument. */
1786 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1796 /* Return true if EAX is live at the start of the function. Used by
1797 ix86_expand_prologue to determine if we need special help before
1798 calling allocate_stack_worker. */
1801 ix86_eax_live_at_start_p (void)
1803 /* Cheat. Don't bother working forward from ix86_function_regparm
1804 to the function type to whether an actual argument is located in
1805 eax. Instead just look at cfg info, which is still close enough
1806 to correct at this point. This gives false positives for broken
1807 functions that might use uninitialized data that happens to be
1808 allocated in eax, but who cares? */
1809 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1812 /* Value is the number of bytes of arguments automatically
1813 popped when returning from a subroutine call.
1814 FUNDECL is the declaration node of the function (as a tree),
1815 FUNTYPE is the data type of the function (as a tree),
1816 or for a library call it is an identifier node for the subroutine name.
1817 SIZE is the number of bytes of arguments passed on the stack.
1819 On the 80386, the RTD insn may be used to pop them if the number
1820 of args is fixed, but if the number is variable then the caller
1821 must pop them all. RTD can't be used for library calls now
1822 because the library is compiled with the Unix compiler.
1823 Use of RTD is a selectable option, since it is incompatible with
1824 standard Unix calling sequences. If the option is not selected,
1825 the caller must always pop the args.
1827 The attribute stdcall is equivalent to RTD on a per module basis. */
1830 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1832 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1834 /* Cdecl functions override -mrtd, and never pop the stack. */
1835 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1837 /* Stdcall and fastcall functions will pop the stack if not
1839 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1840 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1844 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1845 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1846 == void_type_node)))
1850 /* Lose any fake structure return argument if it is passed on the stack. */
1851 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1854 int nregs = ix86_function_regparm (funtype, fundecl);
1857 return GET_MODE_SIZE (Pmode);
1863 /* Argument support functions. */
1865 /* Return true when register may be used to pass function parameters. */
1867 ix86_function_arg_regno_p (int regno)
1871 return (regno < REGPARM_MAX
1872 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1873 if (SSE_REGNO_P (regno) && TARGET_SSE)
1875 /* RAX is used as hidden argument to va_arg functions. */
1878 for (i = 0; i < REGPARM_MAX; i++)
1879 if (regno == x86_64_int_parameter_registers[i])
1884 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1885 for a call to a function whose data type is FNTYPE.
1886 For a library call, FNTYPE is 0. */
1889 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1890 tree fntype, /* tree ptr for function decl */
1891 rtx libname, /* SYMBOL_REF of library name or 0 */
1894 static CUMULATIVE_ARGS zero_cum;
1895 tree param, next_param;
1897 if (TARGET_DEBUG_ARG)
1899 fprintf (stderr, "\ninit_cumulative_args (");
1901 fprintf (stderr, "fntype code = %s, ret code = %s",
1902 tree_code_name[(int) TREE_CODE (fntype)],
1903 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1905 fprintf (stderr, "no fntype");
1908 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1913 /* Set up the number of registers to use for passing arguments. */
1915 cum->nregs = ix86_function_regparm (fntype, fndecl);
1917 cum->nregs = ix86_regparm;
1918 cum->sse_nregs = SSE_REGPARM_MAX;
1919 cum->mmx_nregs = MMX_REGPARM_MAX;
1920 cum->warn_sse = true;
1921 cum->warn_mmx = true;
1922 cum->maybe_vaarg = false;
1924 /* Use ecx and edx registers if function has fastcall attribute */
1925 if (fntype && !TARGET_64BIT)
1927 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1935 /* Determine if this function has variable arguments. This is
1936 indicated by the last argument being 'void_type_mode' if there
1937 are no variable arguments. If there are variable arguments, then
1938 we won't pass anything in registers */
1940 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1942 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1943 param != 0; param = next_param)
1945 next_param = TREE_CHAIN (param);
1946 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1957 cum->maybe_vaarg = true;
1961 if ((!fntype && !libname)
1962 || (fntype && !TYPE_ARG_TYPES (fntype)))
1963 cum->maybe_vaarg = 1;
1965 if (TARGET_DEBUG_ARG)
1966 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1971 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1972 of this code is to classify each 8bytes of incoming argument by the register
1973 class and assign registers accordingly. */
1975 /* Return the union class of CLASS1 and CLASS2.
1976 See the x86-64 PS ABI for details. */
1978 static enum x86_64_reg_class
1979 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1981 /* Rule #1: If both classes are equal, this is the resulting class. */
1982 if (class1 == class2)
1985 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1987 if (class1 == X86_64_NO_CLASS)
1989 if (class2 == X86_64_NO_CLASS)
1992 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1993 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1994 return X86_64_MEMORY_CLASS;
1996 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1997 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1998 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1999 return X86_64_INTEGERSI_CLASS;
2000 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2001 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2002 return X86_64_INTEGER_CLASS;
2004 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2005 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2006 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2007 return X86_64_MEMORY_CLASS;
2009 /* Rule #6: Otherwise class SSE is used. */
2010 return X86_64_SSE_CLASS;
2013 /* Classify the argument of type TYPE and mode MODE.
2014 CLASSES will be filled by the register class used to pass each word
2015 of the operand. The number of words is returned. In case the parameter
2016 should be passed in memory, 0 is returned. As a special case for zero
2017 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2019 BIT_OFFSET is used internally for handling records and specifies offset
2020 of the offset in bits modulo 256 to avoid overflow cases.
2022 See the x86-64 PS ABI for details.
2026 classify_argument (enum machine_mode mode, tree type,
2027 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2029 HOST_WIDE_INT bytes =
2030 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2031 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2033 /* Variable sized entities are always passed/returned in memory. */
2037 if (mode != VOIDmode
2038 && MUST_PASS_IN_STACK (mode, type))
2041 if (type && AGGREGATE_TYPE_P (type))
2045 enum x86_64_reg_class subclasses[MAX_CLASSES];
2047 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2051 for (i = 0; i < words; i++)
2052 classes[i] = X86_64_NO_CLASS;
2054 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2055 signalize memory class, so handle it as special case. */
2058 classes[0] = X86_64_NO_CLASS;
2062 /* Classify each field of record and merge classes. */
2063 if (TREE_CODE (type) == RECORD_TYPE)
2065 /* For classes first merge in the field of the subclasses. */
2066 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2068 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2069 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2072 for (i = 0; i < n_bases; ++i)
2074 tree binfo = TREE_VEC_ELT (bases, i);
2076 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2077 tree type = BINFO_TYPE (binfo);
2079 num = classify_argument (TYPE_MODE (type),
2081 (offset + bit_offset) % 256);
2084 for (i = 0; i < num; i++)
2086 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2088 merge_classes (subclasses[i], classes[i + pos]);
2092 /* And now merge the fields of structure. */
2093 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2095 if (TREE_CODE (field) == FIELD_DECL)
2099 /* Bitfields are always classified as integer. Handle them
2100 early, since later code would consider them to be
2101 misaligned integers. */
2102 if (DECL_BIT_FIELD (field))
2104 for (i = int_bit_position (field) / 8 / 8;
2105 i < (int_bit_position (field)
2106 + tree_low_cst (DECL_SIZE (field), 0)
2109 merge_classes (X86_64_INTEGER_CLASS,
2114 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2115 TREE_TYPE (field), subclasses,
2116 (int_bit_position (field)
2117 + bit_offset) % 256);
2120 for (i = 0; i < num; i++)
2123 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2125 merge_classes (subclasses[i], classes[i + pos]);
2131 /* Arrays are handled as small records. */
2132 else if (TREE_CODE (type) == ARRAY_TYPE)
2135 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2136 TREE_TYPE (type), subclasses, bit_offset);
2140 /* The partial classes are now full classes. */
2141 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2142 subclasses[0] = X86_64_SSE_CLASS;
2143 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2144 subclasses[0] = X86_64_INTEGER_CLASS;
2146 for (i = 0; i < words; i++)
2147 classes[i] = subclasses[i % num];
2149 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2150 else if (TREE_CODE (type) == UNION_TYPE
2151 || TREE_CODE (type) == QUAL_UNION_TYPE)
2153 /* For classes first merge in the field of the subclasses. */
2154 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2156 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2157 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2160 for (i = 0; i < n_bases; ++i)
2162 tree binfo = TREE_VEC_ELT (bases, i);
2164 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2165 tree type = BINFO_TYPE (binfo);
2167 num = classify_argument (TYPE_MODE (type),
2169 (offset + (bit_offset % 64)) % 256);
2172 for (i = 0; i < num; i++)
2174 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2176 merge_classes (subclasses[i], classes[i + pos]);
2180 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2182 if (TREE_CODE (field) == FIELD_DECL)
2185 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2186 TREE_TYPE (field), subclasses,
2190 for (i = 0; i < num; i++)
2191 classes[i] = merge_classes (subclasses[i], classes[i]);
2195 else if (TREE_CODE (type) == SET_TYPE)
2199 classes[0] = X86_64_INTEGERSI_CLASS;
2202 else if (bytes <= 8)
2204 classes[0] = X86_64_INTEGER_CLASS;
2207 else if (bytes <= 12)
2209 classes[0] = X86_64_INTEGER_CLASS;
2210 classes[1] = X86_64_INTEGERSI_CLASS;
2215 classes[0] = X86_64_INTEGER_CLASS;
2216 classes[1] = X86_64_INTEGER_CLASS;
2223 /* Final merger cleanup. */
2224 for (i = 0; i < words; i++)
2226 /* If one class is MEMORY, everything should be passed in
2228 if (classes[i] == X86_64_MEMORY_CLASS)
2231 /* The X86_64_SSEUP_CLASS should be always preceded by
2232 X86_64_SSE_CLASS. */
2233 if (classes[i] == X86_64_SSEUP_CLASS
2234 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2235 classes[i] = X86_64_SSE_CLASS;
2237 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2238 if (classes[i] == X86_64_X87UP_CLASS
2239 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2240 classes[i] = X86_64_SSE_CLASS;
2245 /* Compute alignment needed. We align all types to natural boundaries with
2246 exception of XFmode that is aligned to 64bits. */
2247 if (mode != VOIDmode && mode != BLKmode)
2249 int mode_alignment = GET_MODE_BITSIZE (mode);
2252 mode_alignment = 128;
2253 else if (mode == XCmode)
2254 mode_alignment = 256;
2255 if (COMPLEX_MODE_P (mode))
2256 mode_alignment /= 2;
2257 /* Misaligned fields are always returned in memory. */
2258 if (bit_offset % mode_alignment)
2262 /* Classification of atomic types. */
2272 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2273 classes[0] = X86_64_INTEGERSI_CLASS;
2275 classes[0] = X86_64_INTEGER_CLASS;
2279 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2282 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2283 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2286 if (!(bit_offset % 64))
2287 classes[0] = X86_64_SSESF_CLASS;
2289 classes[0] = X86_64_SSE_CLASS;
2292 classes[0] = X86_64_SSEDF_CLASS;
2295 classes[0] = X86_64_X87_CLASS;
2296 classes[1] = X86_64_X87UP_CLASS;
2302 classes[0] = X86_64_X87_CLASS;
2303 classes[1] = X86_64_X87UP_CLASS;
2304 classes[2] = X86_64_X87_CLASS;
2305 classes[3] = X86_64_X87UP_CLASS;
2308 classes[0] = X86_64_SSEDF_CLASS;
2309 classes[1] = X86_64_SSEDF_CLASS;
2312 classes[0] = X86_64_SSE_CLASS;
2320 classes[0] = X86_64_SSE_CLASS;
2321 classes[1] = X86_64_SSEUP_CLASS;
2336 /* Examine the argument and return set number of register required in each
2337 class. Return 0 iff parameter should be passed in memory. */
2339 examine_argument (enum machine_mode mode, tree type, int in_return,
2340 int *int_nregs, int *sse_nregs)
2342 enum x86_64_reg_class class[MAX_CLASSES];
2343 int n = classify_argument (mode, type, class, 0);
2349 for (n--; n >= 0; n--)
2352 case X86_64_INTEGER_CLASS:
2353 case X86_64_INTEGERSI_CLASS:
2356 case X86_64_SSE_CLASS:
2357 case X86_64_SSESF_CLASS:
2358 case X86_64_SSEDF_CLASS:
2361 case X86_64_NO_CLASS:
2362 case X86_64_SSEUP_CLASS:
2364 case X86_64_X87_CLASS:
2365 case X86_64_X87UP_CLASS:
2369 case X86_64_MEMORY_CLASS:
2374 /* Construct container for the argument used by GCC interface. See
2375 FUNCTION_ARG for the detailed description. */
2377 construct_container (enum machine_mode mode, tree type, int in_return,
2378 int nintregs, int nsseregs, const int * intreg,
2381 enum machine_mode tmpmode;
2383 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2384 enum x86_64_reg_class class[MAX_CLASSES];
2388 int needed_sseregs, needed_intregs;
2389 rtx exp[MAX_CLASSES];
2392 n = classify_argument (mode, type, class, 0);
2393 if (TARGET_DEBUG_ARG)
2396 fprintf (stderr, "Memory class\n");
2399 fprintf (stderr, "Classes:");
2400 for (i = 0; i < n; i++)
2402 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2404 fprintf (stderr, "\n");
2409 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2411 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2414 /* First construct simple cases. Avoid SCmode, since we want to use
2415 single register to pass this type. */
2416 if (n == 1 && mode != SCmode)
2419 case X86_64_INTEGER_CLASS:
2420 case X86_64_INTEGERSI_CLASS:
2421 return gen_rtx_REG (mode, intreg[0]);
2422 case X86_64_SSE_CLASS:
2423 case X86_64_SSESF_CLASS:
2424 case X86_64_SSEDF_CLASS:
2425 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2426 case X86_64_X87_CLASS:
2427 return gen_rtx_REG (mode, FIRST_STACK_REG);
2428 case X86_64_NO_CLASS:
2429 /* Zero sized array, struct or class. */
2434 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2436 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2438 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2439 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2440 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2441 && class[1] == X86_64_INTEGER_CLASS
2442 && (mode == CDImode || mode == TImode || mode == TFmode)
2443 && intreg[0] + 1 == intreg[1])
2444 return gen_rtx_REG (mode, intreg[0]);
2446 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2447 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2449 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2451 /* Otherwise figure out the entries of the PARALLEL. */
2452 for (i = 0; i < n; i++)
2456 case X86_64_NO_CLASS:
2458 case X86_64_INTEGER_CLASS:
2459 case X86_64_INTEGERSI_CLASS:
2460 /* Merge TImodes on aligned occasions here too. */
2461 if (i * 8 + 8 > bytes)
2462 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2463 else if (class[i] == X86_64_INTEGERSI_CLASS)
2467 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2468 if (tmpmode == BLKmode)
2470 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2471 gen_rtx_REG (tmpmode, *intreg),
2475 case X86_64_SSESF_CLASS:
2476 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2477 gen_rtx_REG (SFmode,
2478 SSE_REGNO (sse_regno)),
2482 case X86_64_SSEDF_CLASS:
2483 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2484 gen_rtx_REG (DFmode,
2485 SSE_REGNO (sse_regno)),
2489 case X86_64_SSE_CLASS:
2490 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2494 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2495 gen_rtx_REG (tmpmode,
2496 SSE_REGNO (sse_regno)),
2498 if (tmpmode == TImode)
2506 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2507 for (i = 0; i < nexps; i++)
2508 XVECEXP (ret, 0, i) = exp [i];
2512 /* Update the data in CUM to advance over an argument
2513 of mode MODE and data type TYPE.
2514 (TYPE is null for libcalls where that information may not be available.) */
2517 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2518 enum machine_mode mode, /* current arg mode */
2519 tree type, /* type of the argument or 0 if lib support */
2520 int named) /* whether or not the argument was named */
2523 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2524 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2526 if (TARGET_DEBUG_ARG)
2528 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2529 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2532 int int_nregs, sse_nregs;
2533 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2534 cum->words += words;
2535 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2537 cum->nregs -= int_nregs;
2538 cum->sse_nregs -= sse_nregs;
2539 cum->regno += int_nregs;
2540 cum->sse_regno += sse_nregs;
2543 cum->words += words;
2547 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2548 && (!type || !AGGREGATE_TYPE_P (type)))
2550 cum->sse_words += words;
2551 cum->sse_nregs -= 1;
2552 cum->sse_regno += 1;
2553 if (cum->sse_nregs <= 0)
2559 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2560 && (!type || !AGGREGATE_TYPE_P (type)))
2562 cum->mmx_words += words;
2563 cum->mmx_nregs -= 1;
2564 cum->mmx_regno += 1;
2565 if (cum->mmx_nregs <= 0)
2573 cum->words += words;
2574 cum->nregs -= words;
2575 cum->regno += words;
2577 if (cum->nregs <= 0)
2587 /* Define where to put the arguments to a function.
2588 Value is zero to push the argument on the stack,
2589 or a hard register in which to store the argument.
2591 MODE is the argument's machine mode.
2592 TYPE is the data type of the argument (as a tree).
2593 This is null for libcalls where that information may
2595 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2596 the preceding args and about the function being called.
2597 NAMED is nonzero if this argument is a named parameter
2598 (otherwise it is an extra parameter matching an ellipsis). */
2601 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2602 enum machine_mode mode, /* current arg mode */
2603 tree type, /* type of the argument or 0 if lib support */
2604 int named) /* != 0 for normal args, == 0 for ... args */
2608 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2609 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2610 static bool warnedsse, warnedmmx;
2612 /* Handle a hidden AL argument containing number of registers for varargs
2613 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2615 if (mode == VOIDmode)
2618 return GEN_INT (cum->maybe_vaarg
2619 ? (cum->sse_nregs < 0
2627 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2628 &x86_64_int_parameter_registers [cum->regno],
2633 /* For now, pass fp/complex values on the stack. */
2645 if (words <= cum->nregs)
2647 int regno = cum->regno;
2649 /* Fastcall allocates the first two DWORD (SImode) or
2650 smaller arguments to ECX and EDX. */
2653 if (mode == BLKmode || mode == DImode)
2656 /* ECX not EAX is the first allocated register. */
2660 ret = gen_rtx_REG (mode, regno);
2670 if (!type || !AGGREGATE_TYPE_P (type))
2672 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2675 warning ("SSE vector argument without SSE enabled "
2679 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2686 if (!type || !AGGREGATE_TYPE_P (type))
2688 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2691 warning ("MMX vector argument without MMX enabled "
2695 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2700 if (TARGET_DEBUG_ARG)
2703 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2704 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2707 print_simple_rtl (stderr, ret);
2709 fprintf (stderr, ", stack");
2711 fprintf (stderr, " )\n");
2717 /* A C expression that indicates when an argument must be passed by
2718 reference. If nonzero for an argument, a copy of that argument is
2719 made in memory and a pointer to the argument is passed instead of
2720 the argument itself. The pointer is passed in whatever way is
2721 appropriate for passing a pointer to that type. */
2724 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2725 enum machine_mode mode ATTRIBUTE_UNUSED,
2726 tree type, int named ATTRIBUTE_UNUSED)
2731 if (type && int_size_in_bytes (type) == -1)
2733 if (TARGET_DEBUG_ARG)
2734 fprintf (stderr, "function_arg_pass_by_reference\n");
2741 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2744 contains_128bit_aligned_vector_p (tree type)
2746 enum machine_mode mode = TYPE_MODE (type);
2747 if (SSE_REG_MODE_P (mode)
2748 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2750 if (TYPE_ALIGN (type) < 128)
2753 if (AGGREGATE_TYPE_P (type))
2755 /* Walk the aggregates recursively. */
2756 if (TREE_CODE (type) == RECORD_TYPE
2757 || TREE_CODE (type) == UNION_TYPE
2758 || TREE_CODE (type) == QUAL_UNION_TYPE)
2762 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2764 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2765 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2768 for (i = 0; i < n_bases; ++i)
2770 tree binfo = TREE_VEC_ELT (bases, i);
2771 tree type = BINFO_TYPE (binfo);
2773 if (contains_128bit_aligned_vector_p (type))
2777 /* And now merge the fields of structure. */
2778 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2780 if (TREE_CODE (field) == FIELD_DECL
2781 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2785 /* Just for use if some languages passes arrays by value. */
2786 else if (TREE_CODE (type) == ARRAY_TYPE)
2788 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2797 /* Gives the alignment boundary, in bits, of an argument with the
2798 specified mode and type. */
2801 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2805 align = TYPE_ALIGN (type);
2807 align = GET_MODE_ALIGNMENT (mode);
2808 if (align < PARM_BOUNDARY)
2809 align = PARM_BOUNDARY;
2812 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2813 make an exception for SSE modes since these require 128bit
2816 The handling here differs from field_alignment. ICC aligns MMX
2817 arguments to 4 byte boundaries, while structure fields are aligned
2818 to 8 byte boundaries. */
2821 if (!SSE_REG_MODE_P (mode))
2822 align = PARM_BOUNDARY;
2826 if (!contains_128bit_aligned_vector_p (type))
2827 align = PARM_BOUNDARY;
2835 /* Return true if N is a possible register number of function value. */
2837 ix86_function_value_regno_p (int regno)
2841 return ((regno) == 0
2842 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2843 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2845 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2846 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2847 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2850 /* Define how to find the value returned by a function.
2851 VALTYPE is the data type of the value (as a tree).
2852 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2853 otherwise, FUNC is 0. */
2855 ix86_function_value (tree valtype)
2859 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2860 REGPARM_MAX, SSE_REGPARM_MAX,
2861 x86_64_int_return_registers, 0);
2862 /* For zero sized structures, construct_container return NULL, but we need
2863 to keep rest of compiler happy by returning meaningful value. */
2865 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2869 return gen_rtx_REG (TYPE_MODE (valtype),
2870 ix86_value_regno (TYPE_MODE (valtype)));
2873 /* Return false iff type is returned in memory. */
2875 ix86_return_in_memory (tree type)
2877 int needed_intregs, needed_sseregs, size;
2878 enum machine_mode mode = TYPE_MODE (type);
2881 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2883 if (mode == BLKmode)
2886 size = int_size_in_bytes (type);
2888 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2891 if (VECTOR_MODE_P (mode) || mode == TImode)
2893 /* User-created vectors small enough to fit in EAX. */
2897 /* MMX/3dNow values are returned on the stack, since we've
2898 got to EMMS/FEMMS before returning. */
2902 /* SSE values are returned in XMM0, except when it doesn't exist. */
2904 return (TARGET_SSE ? 0 : 1);
2915 /* When returning SSE vector types, we have a choice of either
2916 (1) being abi incompatible with a -march switch, or
2917 (2) generating an error.
2918 Given no good solution, I think the safest thing is one warning.
2919 The user won't be able to use -Werror, but....
2921 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2922 called in response to actually generating a caller or callee that
2923 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2924 via aggregate_value_p for general type probing from tree-ssa. */
2927 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2931 if (!TARGET_SSE && type && !warned)
2933 /* Look at the return type of the function, not the function type. */
2934 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
2937 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2940 warning ("SSE vector return without SSE enabled changes the ABI");
2947 /* Define how to find the value returned by a library function
2948 assuming the value has mode MODE. */
2950 ix86_libcall_value (enum machine_mode mode)
2960 return gen_rtx_REG (mode, FIRST_SSE_REG);
2963 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2968 return gen_rtx_REG (mode, 0);
2972 return gen_rtx_REG (mode, ix86_value_regno (mode));
2975 /* Given a mode, return the register to use for a return value. */
2978 ix86_value_regno (enum machine_mode mode)
2980 /* Floating point return values in %st(0). */
2981 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2982 return FIRST_FLOAT_REG;
2983 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2984 we prevent this case when sse is not available. */
2985 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2986 return FIRST_SSE_REG;
2987 /* Everything else in %eax. */
2991 /* Create the va_list data type. */
2994 ix86_build_builtin_va_list (void)
2996 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2998 /* For i386 we use plain pointer to argument area. */
3000 return build_pointer_type (char_type_node);
3002 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3003 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3005 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3006 unsigned_type_node);
3007 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3008 unsigned_type_node);
3009 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3011 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3014 DECL_FIELD_CONTEXT (f_gpr) = record;
3015 DECL_FIELD_CONTEXT (f_fpr) = record;
3016 DECL_FIELD_CONTEXT (f_ovf) = record;
3017 DECL_FIELD_CONTEXT (f_sav) = record;
3019 TREE_CHAIN (record) = type_decl;
3020 TYPE_NAME (record) = type_decl;
3021 TYPE_FIELDS (record) = f_gpr;
3022 TREE_CHAIN (f_gpr) = f_fpr;
3023 TREE_CHAIN (f_fpr) = f_ovf;
3024 TREE_CHAIN (f_ovf) = f_sav;
3026 layout_type (record);
3028 /* The correct type is an array type of one element. */
3029 return build_array_type (record, build_index_type (size_zero_node));
3032 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3035 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3036 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3039 CUMULATIVE_ARGS next_cum;
3040 rtx save_area = NULL_RTX, mem;
3053 /* Indicate to allocate space on the stack for varargs save area. */
3054 ix86_save_varrargs_registers = 1;
3056 cfun->stack_alignment_needed = 128;
3058 fntype = TREE_TYPE (current_function_decl);
3059 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3060 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3061 != void_type_node));
3063 /* For varargs, we do not want to skip the dummy va_dcl argument.
3064 For stdargs, we do want to skip the last named argument. */
3067 function_arg_advance (&next_cum, mode, type, 1);
3070 save_area = frame_pointer_rtx;
3072 set = get_varargs_alias_set ();
3074 for (i = next_cum.regno; i < ix86_regparm; i++)
3076 mem = gen_rtx_MEM (Pmode,
3077 plus_constant (save_area, i * UNITS_PER_WORD));
3078 set_mem_alias_set (mem, set);
3079 emit_move_insn (mem, gen_rtx_REG (Pmode,
3080 x86_64_int_parameter_registers[i]));
3083 if (next_cum.sse_nregs)
3085 /* Now emit code to save SSE registers. The AX parameter contains number
3086 of SSE parameter registers used to call this function. We use
3087 sse_prologue_save insn template that produces computed jump across
3088 SSE saves. We need some preparation work to get this working. */
3090 label = gen_label_rtx ();
3091 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3093 /* Compute address to jump to :
3094 label - 5*eax + nnamed_sse_arguments*5 */
3095 tmp_reg = gen_reg_rtx (Pmode);
3096 nsse_reg = gen_reg_rtx (Pmode);
3097 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3098 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3099 gen_rtx_MULT (Pmode, nsse_reg,
3101 if (next_cum.sse_regno)
3104 gen_rtx_CONST (DImode,
3105 gen_rtx_PLUS (DImode,
3107 GEN_INT (next_cum.sse_regno * 4))));
3109 emit_move_insn (nsse_reg, label_ref);
3110 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3112 /* Compute address of memory block we save into. We always use pointer
3113 pointing 127 bytes after first byte to store - this is needed to keep
3114 instruction size limited by 4 bytes. */
3115 tmp_reg = gen_reg_rtx (Pmode);
3116 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3117 plus_constant (save_area,
3118 8 * REGPARM_MAX + 127)));
3119 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3120 set_mem_alias_set (mem, set);
3121 set_mem_align (mem, BITS_PER_WORD);
3123 /* And finally do the dirty job! */
3124 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3125 GEN_INT (next_cum.sse_regno), label));
3130 /* Implement va_start. */
3133 ix86_va_start (tree valist, rtx nextarg)
3135 HOST_WIDE_INT words, n_gpr, n_fpr;
3136 tree f_gpr, f_fpr, f_ovf, f_sav;
3137 tree gpr, fpr, ovf, sav, t;
3139 /* Only 64bit target needs something special. */
3142 std_expand_builtin_va_start (valist, nextarg);
3146 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3147 f_fpr = TREE_CHAIN (f_gpr);
3148 f_ovf = TREE_CHAIN (f_fpr);
3149 f_sav = TREE_CHAIN (f_ovf);
3151 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3152 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3153 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3154 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3155 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3157 /* Count number of gp and fp argument registers used. */
3158 words = current_function_args_info.words;
3159 n_gpr = current_function_args_info.regno;
3160 n_fpr = current_function_args_info.sse_regno;
3162 if (TARGET_DEBUG_ARG)
3163 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3164 (int) words, (int) n_gpr, (int) n_fpr);
3166 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3167 build_int_2 (n_gpr * 8, 0));
3168 TREE_SIDE_EFFECTS (t) = 1;
3169 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3171 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3172 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3173 TREE_SIDE_EFFECTS (t) = 1;
3174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3176 /* Find the overflow area. */
3177 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3179 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3180 build_int_2 (words * UNITS_PER_WORD, 0));
3181 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3182 TREE_SIDE_EFFECTS (t) = 1;
3183 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3185 /* Find the register save area.
3186 Prologue of the function save it right above stack frame. */
3187 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3188 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3189 TREE_SIDE_EFFECTS (t) = 1;
3190 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3193 /* Implement va_arg. */
3196 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3198 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3199 tree f_gpr, f_fpr, f_ovf, f_sav;
3200 tree gpr, fpr, ovf, sav, t;
3202 tree lab_false, lab_over = NULL_TREE;
3208 /* Only 64bit target needs something special. */
3210 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3212 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3213 f_fpr = TREE_CHAIN (f_gpr);
3214 f_ovf = TREE_CHAIN (f_fpr);
3215 f_sav = TREE_CHAIN (f_ovf);
3217 valist = build_fold_indirect_ref (valist);
3218 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3219 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3220 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3221 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3223 size = int_size_in_bytes (type);
3226 /* Variable-size types are passed by reference. */
3228 type = build_pointer_type (type);
3229 size = int_size_in_bytes (type);
3231 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3233 container = construct_container (TYPE_MODE (type), type, 0,
3234 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3236 * Pull the value out of the saved registers ...
3239 addr = create_tmp_var (ptr_type_node, "addr");
3240 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3244 int needed_intregs, needed_sseregs;
3246 tree int_addr, sse_addr;
3248 lab_false = create_artificial_label ();
3249 lab_over = create_artificial_label ();
3251 examine_argument (TYPE_MODE (type), type, 0,
3252 &needed_intregs, &needed_sseregs);
3255 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3256 || TYPE_ALIGN (type) > 128);
3258 /* In case we are passing structure, verify that it is consecutive block
3259 on the register save area. If not we need to do moves. */
3260 if (!need_temp && !REG_P (container))
3262 /* Verify that all registers are strictly consecutive */
3263 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3267 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3269 rtx slot = XVECEXP (container, 0, i);
3270 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3271 || INTVAL (XEXP (slot, 1)) != i * 16)
3279 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3281 rtx slot = XVECEXP (container, 0, i);
3282 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3283 || INTVAL (XEXP (slot, 1)) != i * 8)
3295 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3296 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3297 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3298 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3300 /* First ensure that we fit completely in registers. */
3303 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3304 TREE_TYPE (t) = TREE_TYPE (gpr);
3305 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3306 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3307 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3308 gimplify_and_add (t, pre_p);
3312 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3313 + REGPARM_MAX * 8, 0);
3314 TREE_TYPE (t) = TREE_TYPE (fpr);
3315 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3316 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3317 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3318 gimplify_and_add (t, pre_p);
3321 /* Compute index to start of area used for integer regs. */
3324 /* int_addr = gpr + sav; */
3325 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3326 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3327 gimplify_and_add (t, pre_p);
3331 /* sse_addr = fpr + sav; */
3332 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3333 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3334 gimplify_and_add (t, pre_p);
3339 tree temp = create_tmp_var (type, "va_arg_tmp");
3342 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3343 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3344 gimplify_and_add (t, pre_p);
3346 for (i = 0; i < XVECLEN (container, 0); i++)
3348 rtx slot = XVECEXP (container, 0, i);
3349 rtx reg = XEXP (slot, 0);
3350 enum machine_mode mode = GET_MODE (reg);
3351 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3352 tree addr_type = build_pointer_type (piece_type);
3355 tree dest_addr, dest;
3357 if (SSE_REGNO_P (REGNO (reg)))
3359 src_addr = sse_addr;
3360 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3364 src_addr = int_addr;
3365 src_offset = REGNO (reg) * 8;
3367 src_addr = fold_convert (addr_type, src_addr);
3368 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3369 size_int (src_offset)));
3370 src = build_fold_indirect_ref (src_addr);
3372 dest_addr = fold_convert (addr_type, addr);
3373 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3374 size_int (INTVAL (XEXP (slot, 1)))));
3375 dest = build_fold_indirect_ref (dest_addr);
3377 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3378 gimplify_and_add (t, pre_p);
3384 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3385 build_int_2 (needed_intregs * 8, 0));
3386 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3387 gimplify_and_add (t, pre_p);
3392 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3393 build_int_2 (needed_sseregs * 16, 0));
3394 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3395 gimplify_and_add (t, pre_p);
3398 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3399 gimplify_and_add (t, pre_p);
3401 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3402 append_to_statement_list (t, pre_p);
3405 /* ... otherwise out of the overflow area. */
3407 /* Care for on-stack alignment if needed. */
3408 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3412 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3413 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3414 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3416 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3418 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3419 gimplify_and_add (t2, pre_p);
3421 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3422 build_int_2 (rsize * UNITS_PER_WORD, 0));
3423 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3424 gimplify_and_add (t, pre_p);
3428 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3429 append_to_statement_list (t, pre_p);
3432 ptrtype = build_pointer_type (type);
3433 addr = fold_convert (ptrtype, addr);
3436 addr = build_fold_indirect_ref (addr);
3437 return build_fold_indirect_ref (addr);
3440 /* Return nonzero if OP is either a i387 or SSE fp register. */
3442 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3444 return ANY_FP_REG_P (op);
3447 /* Return nonzero if OP is an i387 fp register. */
3449 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3451 return FP_REG_P (op);
3454 /* Return nonzero if OP is a non-fp register_operand. */
3456 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3458 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3461 /* Return nonzero if OP is a register operand other than an
3462 i387 fp register. */
3464 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3466 return register_operand (op, mode) && !FP_REG_P (op);
3469 /* Return nonzero if OP is general operand representable on x86_64. */
3472 x86_64_general_operand (rtx op, enum machine_mode mode)
3475 return general_operand (op, mode);
3476 if (nonimmediate_operand (op, mode))
3478 return x86_64_sign_extended_value (op);
3481 /* Return nonzero if OP is general operand representable on x86_64
3482 as either sign extended or zero extended constant. */
3485 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3488 return general_operand (op, mode);
3489 if (nonimmediate_operand (op, mode))
3491 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3494 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3497 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3500 return nonmemory_operand (op, mode);
3501 if (register_operand (op, mode))
3503 return x86_64_sign_extended_value (op);
3506 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3509 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3511 if (!TARGET_64BIT || !flag_pic)
3512 return nonmemory_operand (op, mode);
3513 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3515 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3520 /* Return nonzero if OPNUM's MEM should be matched
3521 in movabs* patterns. */
3524 ix86_check_movabs (rtx insn, int opnum)
3528 set = PATTERN (insn);
3529 if (GET_CODE (set) == PARALLEL)
3530 set = XVECEXP (set, 0, 0);
3531 if (GET_CODE (set) != SET)
3533 mem = XEXP (set, opnum);
3534 while (GET_CODE (mem) == SUBREG)
3535 mem = SUBREG_REG (mem);
3536 if (GET_CODE (mem) != MEM)
3538 return (volatile_ok || !MEM_VOLATILE_P (mem));
3541 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3544 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3547 return nonmemory_operand (op, mode);
3548 if (register_operand (op, mode))
3550 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3553 /* Return nonzero if OP is immediate operand representable on x86_64. */
3556 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3559 return immediate_operand (op, mode);
3560 return x86_64_sign_extended_value (op);
3563 /* Return nonzero if OP is immediate operand representable on x86_64. */
3566 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3568 return x86_64_zero_extended_value (op);
3571 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3572 for shift & compare patterns, as shifting by 0 does not change flags),
3573 else return zero. */
3576 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3578 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3581 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3582 reference and a constant. */
3585 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3587 switch (GET_CODE (op))
3595 if (GET_CODE (op) == SYMBOL_REF
3596 || GET_CODE (op) == LABEL_REF
3597 || (GET_CODE (op) == UNSPEC
3598 && (XINT (op, 1) == UNSPEC_GOT
3599 || XINT (op, 1) == UNSPEC_GOTOFF
3600 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3602 if (GET_CODE (op) != PLUS
3603 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3607 if (GET_CODE (op) == SYMBOL_REF
3608 || GET_CODE (op) == LABEL_REF)
3610 /* Only @GOTOFF gets offsets. */
3611 if (GET_CODE (op) != UNSPEC
3612 || XINT (op, 1) != UNSPEC_GOTOFF)
3615 op = XVECEXP (op, 0, 0);
3616 if (GET_CODE (op) == SYMBOL_REF
3617 || GET_CODE (op) == LABEL_REF)
3626 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3629 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3631 if (GET_CODE (op) != CONST)
3636 if (GET_CODE (op) == UNSPEC
3637 && XINT (op, 1) == UNSPEC_GOTPCREL)
3639 if (GET_CODE (op) == PLUS
3640 && GET_CODE (XEXP (op, 0)) == UNSPEC
3641 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3646 if (GET_CODE (op) == UNSPEC)
3648 if (GET_CODE (op) != PLUS
3649 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3652 if (GET_CODE (op) == UNSPEC)
3658 /* Return true if OP is a symbolic operand that resolves locally. */
3661 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3663 if (GET_CODE (op) == CONST
3664 && GET_CODE (XEXP (op, 0)) == PLUS
3665 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3666 op = XEXP (XEXP (op, 0), 0);
3668 if (GET_CODE (op) == LABEL_REF)
3671 if (GET_CODE (op) != SYMBOL_REF)
3674 if (SYMBOL_REF_LOCAL_P (op))
3677 /* There is, however, a not insubstantial body of code in the rest of
3678 the compiler that assumes it can just stick the results of
3679 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3680 /* ??? This is a hack. Should update the body of the compiler to
3681 always create a DECL an invoke targetm.encode_section_info. */
3682 if (strncmp (XSTR (op, 0), internal_label_prefix,
3683 internal_label_prefix_len) == 0)
3689 /* Test for various thread-local symbols. */
3692 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3694 if (GET_CODE (op) != SYMBOL_REF)
3696 return SYMBOL_REF_TLS_MODEL (op);
3700 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3702 if (GET_CODE (op) != SYMBOL_REF)
3704 return SYMBOL_REF_TLS_MODEL (op) == kind;
3708 global_dynamic_symbolic_operand (rtx op,
3709 enum machine_mode mode ATTRIBUTE_UNUSED)
3711 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3715 local_dynamic_symbolic_operand (rtx op,
3716 enum machine_mode mode ATTRIBUTE_UNUSED)
3718 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3722 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3724 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3728 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3730 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3733 /* Test for a valid operand for a call instruction. Don't allow the
3734 arg pointer register or virtual regs since they may decay into
3735 reg + const, which the patterns can't handle. */
3738 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3740 /* Disallow indirect through a virtual register. This leads to
3741 compiler aborts when trying to eliminate them. */
3742 if (GET_CODE (op) == REG
3743 && (op == arg_pointer_rtx
3744 || op == frame_pointer_rtx
3745 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3746 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3749 /* Disallow `call 1234'. Due to varying assembler lameness this
3750 gets either rejected or translated to `call .+1234'. */
3751 if (GET_CODE (op) == CONST_INT)
3754 /* Explicitly allow SYMBOL_REF even if pic. */
3755 if (GET_CODE (op) == SYMBOL_REF)
3758 /* Otherwise we can allow any general_operand in the address. */
3759 return general_operand (op, Pmode);
3762 /* Test for a valid operand for a call instruction. Don't allow the
3763 arg pointer register or virtual regs since they may decay into
3764 reg + const, which the patterns can't handle. */
3767 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3769 /* Disallow indirect through a virtual register. This leads to
3770 compiler aborts when trying to eliminate them. */
3771 if (GET_CODE (op) == REG
3772 && (op == arg_pointer_rtx
3773 || op == frame_pointer_rtx
3774 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3775 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3778 /* Explicitly allow SYMBOL_REF even if pic. */
3779 if (GET_CODE (op) == SYMBOL_REF)
3782 /* Otherwise we can only allow register operands. */
3783 return register_operand (op, Pmode);
3787 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3789 if (GET_CODE (op) == CONST
3790 && GET_CODE (XEXP (op, 0)) == PLUS
3791 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3792 op = XEXP (XEXP (op, 0), 0);
3793 return GET_CODE (op) == SYMBOL_REF;
3796 /* Match exactly zero and one. */
3799 const0_operand (rtx op, enum machine_mode mode)
3801 return op == CONST0_RTX (mode);
3805 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3807 return op == const1_rtx;
3810 /* Match 2, 4, or 8. Used for leal multiplicands. */
3813 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3815 return (GET_CODE (op) == CONST_INT
3816 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3820 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3822 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3826 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3828 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3832 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3834 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3838 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3840 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3844 /* True if this is a constant appropriate for an increment or decrement. */
3847 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3849 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3850 registers, since carry flag is not set. */
3851 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3853 return op == const1_rtx || op == constm1_rtx;
3856 /* Return nonzero if OP is acceptable as operand of DImode shift
3860 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3863 return nonimmediate_operand (op, mode);
3865 return register_operand (op, mode);
3868 /* Return false if this is the stack pointer, or any other fake
3869 register eliminable to the stack pointer. Otherwise, this is
3872 This is used to prevent esp from being used as an index reg.
3873 Which would only happen in pathological cases. */
3876 reg_no_sp_operand (rtx op, enum machine_mode mode)
3879 if (GET_CODE (t) == SUBREG)
3881 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3884 return register_operand (op, mode);
3888 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3890 return MMX_REG_P (op);
3893 /* Return false if this is any eliminable register. Otherwise
3897 general_no_elim_operand (rtx op, enum machine_mode mode)
3900 if (GET_CODE (t) == SUBREG)
3902 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3903 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3904 || t == virtual_stack_dynamic_rtx)
3907 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3908 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3911 return general_operand (op, mode);
3914 /* Return false if this is any eliminable register. Otherwise
3915 register_operand or const_int. */
3918 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3921 if (GET_CODE (t) == SUBREG)
3923 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3924 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3925 || t == virtual_stack_dynamic_rtx)
3928 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3931 /* Return false if this is any eliminable register or stack register,
3932 otherwise work like register_operand. */
3935 index_register_operand (rtx op, enum machine_mode mode)
3938 if (GET_CODE (t) == SUBREG)
3942 if (t == arg_pointer_rtx
3943 || t == frame_pointer_rtx
3944 || t == virtual_incoming_args_rtx
3945 || t == virtual_stack_vars_rtx
3946 || t == virtual_stack_dynamic_rtx
3947 || REGNO (t) == STACK_POINTER_REGNUM)
3950 return general_operand (op, mode);
3953 /* Return true if op is a Q_REGS class register. */
3956 q_regs_operand (rtx op, enum machine_mode mode)
3958 if (mode != VOIDmode && GET_MODE (op) != mode)
3960 if (GET_CODE (op) == SUBREG)
3961 op = SUBREG_REG (op);
3962 return ANY_QI_REG_P (op);
3965 /* Return true if op is an flags register. */
3968 flags_reg_operand (rtx op, enum machine_mode mode)
3970 if (mode != VOIDmode && GET_MODE (op) != mode)
3972 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3975 /* Return true if op is a NON_Q_REGS class register. */
3978 non_q_regs_operand (rtx op, enum machine_mode mode)
3980 if (mode != VOIDmode && GET_MODE (op) != mode)
3982 if (GET_CODE (op) == SUBREG)
3983 op = SUBREG_REG (op);
3984 return NON_QI_REG_P (op);
3988 zero_extended_scalar_load_operand (rtx op,
3989 enum machine_mode mode ATTRIBUTE_UNUSED)
3992 if (GET_CODE (op) != MEM)
3994 op = maybe_get_pool_constant (op);
3997 if (GET_CODE (op) != CONST_VECTOR)
4000 (GET_MODE_SIZE (GET_MODE (op)) /
4001 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4002 for (n_elts--; n_elts > 0; n_elts--)
4004 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4005 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4011 /* Return 1 when OP is operand acceptable for standard SSE move. */
4013 vector_move_operand (rtx op, enum machine_mode mode)
4015 if (nonimmediate_operand (op, mode))
4017 if (GET_MODE (op) != mode && mode != VOIDmode)
4019 return (op == CONST0_RTX (GET_MODE (op)));
4022 /* Return true if op if a valid address, and does not contain
4023 a segment override. */
4026 no_seg_address_operand (rtx op, enum machine_mode mode)
4028 struct ix86_address parts;
4030 if (! address_operand (op, mode))
4033 if (! ix86_decompose_address (op, &parts))
4036 return parts.seg == SEG_DEFAULT;
4039 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4042 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4044 enum rtx_code code = GET_CODE (op);
4047 /* Operations supported directly. */
4057 /* These are equivalent to ones above in non-IEEE comparisons. */
4064 return !TARGET_IEEE_FP;
4069 /* Return 1 if OP is a valid comparison operator in valid mode. */
4071 ix86_comparison_operator (rtx op, enum machine_mode mode)
4073 enum machine_mode inmode;
4074 enum rtx_code code = GET_CODE (op);
4075 if (mode != VOIDmode && GET_MODE (op) != mode)
4077 if (!COMPARISON_P (op))
4079 inmode = GET_MODE (XEXP (op, 0));
4081 if (inmode == CCFPmode || inmode == CCFPUmode)
4083 enum rtx_code second_code, bypass_code;
4084 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4085 return (bypass_code == NIL && second_code == NIL);
4092 if (inmode == CCmode || inmode == CCGCmode
4093 || inmode == CCGOCmode || inmode == CCNOmode)
4096 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4097 if (inmode == CCmode)
4101 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4109 /* Return 1 if OP is a valid comparison operator testing carry flag
4112 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4114 enum machine_mode inmode;
4115 enum rtx_code code = GET_CODE (op);
4117 if (mode != VOIDmode && GET_MODE (op) != mode)
4119 if (!COMPARISON_P (op))
4121 inmode = GET_MODE (XEXP (op, 0));
4122 if (GET_CODE (XEXP (op, 0)) != REG
4123 || REGNO (XEXP (op, 0)) != 17
4124 || XEXP (op, 1) != const0_rtx)
4127 if (inmode == CCFPmode || inmode == CCFPUmode)
4129 enum rtx_code second_code, bypass_code;
4131 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4132 if (bypass_code != NIL || second_code != NIL)
4134 code = ix86_fp_compare_code_to_integer (code);
4136 else if (inmode != CCmode)
4141 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4144 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4146 enum machine_mode inmode;
4147 enum rtx_code code = GET_CODE (op);
4149 if (mode != VOIDmode && GET_MODE (op) != mode)
4151 if (!COMPARISON_P (op))
4153 inmode = GET_MODE (XEXP (op, 0));
4154 if (inmode == CCFPmode || inmode == CCFPUmode)
4156 enum rtx_code second_code, bypass_code;
4158 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4159 if (bypass_code != NIL || second_code != NIL)
4161 code = ix86_fp_compare_code_to_integer (code);
4163 /* i387 supports just limited amount of conditional codes. */
4166 case LTU: case GTU: case LEU: case GEU:
4167 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4170 case ORDERED: case UNORDERED:
4178 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4181 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4183 switch (GET_CODE (op))
4186 /* Modern CPUs have same latency for HImode and SImode multiply,
4187 but 386 and 486 do HImode multiply faster. */
4188 return ix86_tune > PROCESSOR_I486;
4200 /* Nearly general operand, but accept any const_double, since we wish
4201 to be able to drop them into memory rather than have them get pulled
4205 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4207 if (mode != VOIDmode && mode != GET_MODE (op))
4209 if (GET_CODE (op) == CONST_DOUBLE)
4211 return general_operand (op, mode);
4214 /* Match an SI or HImode register for a zero_extract. */
4217 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4220 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4221 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4224 if (!register_operand (op, VOIDmode))
4227 /* Be careful to accept only registers having upper parts. */
4228 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4229 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4232 /* Return 1 if this is a valid binary floating-point operation.
4233 OP is the expression matched, and MODE is its mode. */
4236 binary_fp_operator (rtx op, enum machine_mode mode)
4238 if (mode != VOIDmode && mode != GET_MODE (op))
4241 switch (GET_CODE (op))
4247 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4255 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4257 return GET_CODE (op) == MULT;
4261 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4263 return GET_CODE (op) == DIV;
4267 arith_or_logical_operator (rtx op, enum machine_mode mode)
4269 return ((mode == VOIDmode || GET_MODE (op) == mode)
4270 && ARITHMETIC_P (op));
4273 /* Returns 1 if OP is memory operand with a displacement. */
4276 memory_displacement_operand (rtx op, enum machine_mode mode)
4278 struct ix86_address parts;
4280 if (! memory_operand (op, mode))
4283 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4286 return parts.disp != NULL_RTX;
4289 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4290 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4292 ??? It seems likely that this will only work because cmpsi is an
4293 expander, and no actual insns use this. */
4296 cmpsi_operand (rtx op, enum machine_mode mode)
4298 if (nonimmediate_operand (op, mode))
4301 if (GET_CODE (op) == AND
4302 && GET_MODE (op) == SImode
4303 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4304 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4305 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4306 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4307 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4308 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4314 /* Returns 1 if OP is memory operand that can not be represented by the
4318 long_memory_operand (rtx op, enum machine_mode mode)
4320 if (! memory_operand (op, mode))
4323 return memory_address_length (op) != 0;
4326 /* Return nonzero if the rtx is known aligned. */
4329 aligned_operand (rtx op, enum machine_mode mode)
4331 struct ix86_address parts;
4333 if (!general_operand (op, mode))
4336 /* Registers and immediate operands are always "aligned". */
4337 if (GET_CODE (op) != MEM)
4340 /* Don't even try to do any aligned optimizations with volatiles. */
4341 if (MEM_VOLATILE_P (op))
4346 /* Pushes and pops are only valid on the stack pointer. */
4347 if (GET_CODE (op) == PRE_DEC
4348 || GET_CODE (op) == POST_INC)
4351 /* Decode the address. */
4352 if (! ix86_decompose_address (op, &parts))
4355 /* Look for some component that isn't known to be aligned. */
4359 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4364 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4369 if (GET_CODE (parts.disp) != CONST_INT
4370 || (INTVAL (parts.disp) & 3) != 0)
4374 /* Didn't find one -- this must be an aligned address. */
4378 /* Initialize the table of extra 80387 mathematical constants. */
4381 init_ext_80387_constants (void)
4383 static const char * cst[5] =
4385 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4386 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4387 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4388 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4389 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4393 for (i = 0; i < 5; i++)
4395 real_from_string (&ext_80387_constants_table[i], cst[i]);
4396 /* Ensure each constant is rounded to XFmode precision. */
4397 real_convert (&ext_80387_constants_table[i],
4398 XFmode, &ext_80387_constants_table[i]);
4401 ext_80387_constants_init = 1;
4404 /* Return true if the constant is something that can be loaded with
4405 a special instruction. */
4408 standard_80387_constant_p (rtx x)
4410 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4413 if (x == CONST0_RTX (GET_MODE (x)))
4415 if (x == CONST1_RTX (GET_MODE (x)))
4418 /* For XFmode constants, try to find a special 80387 instruction when
4419 optimizing for size or on those CPUs that benefit from them. */
4420 if (GET_MODE (x) == XFmode
4421 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4426 if (! ext_80387_constants_init)
4427 init_ext_80387_constants ();
4429 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4430 for (i = 0; i < 5; i++)
4431 if (real_identical (&r, &ext_80387_constants_table[i]))
4438 /* Return the opcode of the special instruction to be used to load
4442 standard_80387_constant_opcode (rtx x)
4444 switch (standard_80387_constant_p (x))
4464 /* Return the CONST_DOUBLE representing the 80387 constant that is
4465 loaded by the specified special instruction. The argument IDX
4466 matches the return value from standard_80387_constant_p. */
4469 standard_80387_constant_rtx (int idx)
4473 if (! ext_80387_constants_init)
4474 init_ext_80387_constants ();
4490 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4494 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4497 standard_sse_constant_p (rtx x)
4499 if (x == const0_rtx)
4501 return (x == CONST0_RTX (GET_MODE (x)));
4504 /* Returns 1 if OP contains a symbol reference */
4507 symbolic_reference_mentioned_p (rtx op)
4512 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4515 fmt = GET_RTX_FORMAT (GET_CODE (op));
4516 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4522 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4523 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4527 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4534 /* Return 1 if it is appropriate to emit `ret' instructions in the
4535 body of a function. Do this only if the epilogue is simple, needing a
4536 couple of insns. Prior to reloading, we can't tell how many registers
4537 must be saved, so return 0 then. Return 0 if there is no frame
4538 marker to de-allocate.
4540 If NON_SAVING_SETJMP is defined and true, then it is not possible
4541 for the epilogue to be simple, so return 0. This is a special case
4542 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4543 until final, but jump_optimize may need to know sooner if a
4547 ix86_can_use_return_insn_p (void)
4549 struct ix86_frame frame;
4551 #ifdef NON_SAVING_SETJMP
4552 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4556 if (! reload_completed || frame_pointer_needed)
4559 /* Don't allow more than 32 pop, since that's all we can do
4560 with one instruction. */
4561 if (current_function_pops_args
4562 && current_function_args_size >= 32768)
4565 ix86_compute_frame_layout (&frame);
4566 return frame.to_allocate == 0 && frame.nregs == 0;
4569 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4571 x86_64_sign_extended_value (rtx value)
4573 switch (GET_CODE (value))
4575 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4576 to be at least 32 and this all acceptable constants are
4577 represented as CONST_INT. */
4579 if (HOST_BITS_PER_WIDE_INT == 32)
4583 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4584 return trunc_int_for_mode (val, SImode) == val;
4588 /* For certain code models, the symbolic references are known to fit.
4589 in CM_SMALL_PIC model we know it fits if it is local to the shared
4590 library. Don't count TLS SYMBOL_REFs here, since they should fit
4591 only if inside of UNSPEC handled below. */
4593 /* TLS symbols are not constant. */
4594 if (tls_symbolic_operand (value, Pmode))
4596 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4598 /* For certain code models, the code is near as well. */
4600 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4601 || ix86_cmodel == CM_KERNEL);
4603 /* We also may accept the offsetted memory references in certain special
4606 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4607 switch (XINT (XEXP (value, 0), 1))
4609 case UNSPEC_GOTPCREL:
4611 case UNSPEC_GOTNTPOFF:
4617 if (GET_CODE (XEXP (value, 0)) == PLUS)
4619 rtx op1 = XEXP (XEXP (value, 0), 0);
4620 rtx op2 = XEXP (XEXP (value, 0), 1);
4621 HOST_WIDE_INT offset;
4623 if (ix86_cmodel == CM_LARGE)
4625 if (GET_CODE (op2) != CONST_INT)
4627 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4628 switch (GET_CODE (op1))
4631 /* For CM_SMALL assume that latest object is 16MB before
4632 end of 31bits boundary. We may also accept pretty
4633 large negative constants knowing that all objects are
4634 in the positive half of address space. */
4635 if (ix86_cmodel == CM_SMALL
4636 && offset < 16*1024*1024
4637 && trunc_int_for_mode (offset, SImode) == offset)
4639 /* For CM_KERNEL we know that all object resist in the
4640 negative half of 32bits address space. We may not
4641 accept negative offsets, since they may be just off
4642 and we may accept pretty large positive ones. */
4643 if (ix86_cmodel == CM_KERNEL
4645 && trunc_int_for_mode (offset, SImode) == offset)
4649 /* These conditions are similar to SYMBOL_REF ones, just the
4650 constraints for code models differ. */
4651 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4652 && offset < 16*1024*1024
4653 && trunc_int_for_mode (offset, SImode) == offset)
4655 if (ix86_cmodel == CM_KERNEL
4657 && trunc_int_for_mode (offset, SImode) == offset)
4661 switch (XINT (op1, 1))
4666 && trunc_int_for_mode (offset, SImode) == offset)
4680 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4682 x86_64_zero_extended_value (rtx value)
4684 switch (GET_CODE (value))
4687 if (HOST_BITS_PER_WIDE_INT == 32)
4688 return (GET_MODE (value) == VOIDmode
4689 && !CONST_DOUBLE_HIGH (value));
4693 if (HOST_BITS_PER_WIDE_INT == 32)
4694 return INTVAL (value) >= 0;
4696 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4699 /* For certain code models, the symbolic references are known to fit. */
4701 /* TLS symbols are not constant. */
4702 if (tls_symbolic_operand (value, Pmode))
4704 return ix86_cmodel == CM_SMALL;
4706 /* For certain code models, the code is near as well. */
4708 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4710 /* We also may accept the offsetted memory references in certain special
4713 if (GET_CODE (XEXP (value, 0)) == PLUS)
4715 rtx op1 = XEXP (XEXP (value, 0), 0);
4716 rtx op2 = XEXP (XEXP (value, 0), 1);
4718 if (ix86_cmodel == CM_LARGE)
4720 switch (GET_CODE (op1))
4724 /* For small code model we may accept pretty large positive
4725 offsets, since one bit is available for free. Negative
4726 offsets are limited by the size of NULL pointer area
4727 specified by the ABI. */
4728 if (ix86_cmodel == CM_SMALL
4729 && GET_CODE (op2) == CONST_INT
4730 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4731 && (trunc_int_for_mode (INTVAL (op2), SImode)
4734 /* ??? For the kernel, we may accept adjustment of
4735 -0x10000000, since we know that it will just convert
4736 negative address space to positive, but perhaps this
4737 is not worthwhile. */
4740 /* These conditions are similar to SYMBOL_REF ones, just the
4741 constraints for code models differ. */
4742 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4743 && GET_CODE (op2) == CONST_INT
4744 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4745 && (trunc_int_for_mode (INTVAL (op2), SImode)
4759 /* Value should be nonzero if functions must have frame pointers.
4760 Zero means the frame pointer need not be set up (and parms may
4761 be accessed via the stack pointer) in functions that seem suitable. */
4764 ix86_frame_pointer_required (void)
4766 /* If we accessed previous frames, then the generated code expects
4767 to be able to access the saved ebp value in our frame. */
4768 if (cfun->machine->accesses_prev_frame)
4771 /* Several x86 os'es need a frame pointer for other reasons,
4772 usually pertaining to setjmp. */
4773 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4776 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4777 the frame pointer by default. Turn it back on now if we've not
4778 got a leaf function. */
4779 if (TARGET_OMIT_LEAF_FRAME_POINTER
4780 && (!current_function_is_leaf))
4783 if (current_function_profile)
4789 /* Record that the current function accesses previous call frames. */
4792 ix86_setup_frame_addresses (void)
4794 cfun->machine->accesses_prev_frame = 1;
4797 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4798 # define USE_HIDDEN_LINKONCE 1
4800 # define USE_HIDDEN_LINKONCE 0
4803 static int pic_labels_used;
4805 /* Fills in the label name that should be used for a pc thunk for
4806 the given register. */
4809 get_pc_thunk_name (char name[32], unsigned int regno)
4811 if (USE_HIDDEN_LINKONCE)
4812 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4814 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4818 /* This function generates code for -fpic that loads %ebx with
4819 the return address of the caller and then returns. */
4822 ix86_file_end (void)
4827 for (regno = 0; regno < 8; ++regno)
4831 if (! ((pic_labels_used >> regno) & 1))
4834 get_pc_thunk_name (name, regno);
4836 if (USE_HIDDEN_LINKONCE)
4840 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4842 TREE_PUBLIC (decl) = 1;
4843 TREE_STATIC (decl) = 1;
4844 DECL_ONE_ONLY (decl) = 1;
4846 (*targetm.asm_out.unique_section) (decl, 0);
4847 named_section (decl, NULL, 0);
4849 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4850 fputs ("\t.hidden\t", asm_out_file);
4851 assemble_name (asm_out_file, name);
4852 fputc ('\n', asm_out_file);
4853 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4858 ASM_OUTPUT_LABEL (asm_out_file, name);
4861 xops[0] = gen_rtx_REG (SImode, regno);
4862 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4863 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4864 output_asm_insn ("ret", xops);
4867 if (NEED_INDICATE_EXEC_STACK)
4868 file_end_indicate_exec_stack ();
4871 /* Emit code for the SET_GOT patterns. */
4874 output_set_got (rtx dest)
4879 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4881 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4883 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4886 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4888 output_asm_insn ("call\t%a2", xops);
4891 /* Output the "canonical" label name ("Lxx$pb") here too. This
4892 is what will be referred to by the Mach-O PIC subsystem. */
4893 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4895 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4896 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4899 output_asm_insn ("pop{l}\t%0", xops);
4904 get_pc_thunk_name (name, REGNO (dest));
4905 pic_labels_used |= 1 << REGNO (dest);
4907 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4908 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4909 output_asm_insn ("call\t%X2", xops);
4912 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4913 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4914 else if (!TARGET_MACHO)
4915 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4920 /* Generate an "push" pattern for input ARG. */
4925 return gen_rtx_SET (VOIDmode,
4927 gen_rtx_PRE_DEC (Pmode,
4928 stack_pointer_rtx)),
4932 /* Return >= 0 if there is an unused call-clobbered register available
4933 for the entire function. */
4936 ix86_select_alt_pic_regnum (void)
4938 if (current_function_is_leaf && !current_function_profile)
4941 for (i = 2; i >= 0; --i)
4942 if (!regs_ever_live[i])
4946 return INVALID_REGNUM;
4949 /* Return 1 if we need to save REGNO. */
4951 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4953 if (pic_offset_table_rtx
4954 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4955 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4956 || current_function_profile
4957 || current_function_calls_eh_return
4958 || current_function_uses_const_pool))
4960 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4965 if (current_function_calls_eh_return && maybe_eh_return)
4970 unsigned test = EH_RETURN_DATA_REGNO (i);
4971 if (test == INVALID_REGNUM)
4978 return (regs_ever_live[regno]
4979 && !call_used_regs[regno]
4980 && !fixed_regs[regno]
4981 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4984 /* Return number of registers to be saved on the stack. */
4987 ix86_nsaved_regs (void)
4992 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4993 if (ix86_save_reg (regno, true))
4998 /* Return the offset between two registers, one to be eliminated, and the other
4999 its replacement, at the start of a routine. */
5002 ix86_initial_elimination_offset (int from, int to)
5004 struct ix86_frame frame;
5005 ix86_compute_frame_layout (&frame);
5007 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5008 return frame.hard_frame_pointer_offset;
5009 else if (from == FRAME_POINTER_REGNUM
5010 && to == HARD_FRAME_POINTER_REGNUM)
5011 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5014 if (to != STACK_POINTER_REGNUM)
5016 else if (from == ARG_POINTER_REGNUM)
5017 return frame.stack_pointer_offset;
5018 else if (from != FRAME_POINTER_REGNUM)
5021 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5025 /* Fill structure ix86_frame about frame of currently computed function. */
5028 ix86_compute_frame_layout (struct ix86_frame *frame)
5030 HOST_WIDE_INT total_size;
5031 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5032 HOST_WIDE_INT offset;
5033 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5034 HOST_WIDE_INT size = get_frame_size ();
5036 frame->nregs = ix86_nsaved_regs ();
5039 /* During reload iteration the amount of registers saved can change.
5040 Recompute the value as needed. Do not recompute when amount of registers
5041 didn't change as reload does mutiple calls to the function and does not
5042 expect the decision to change within single iteration. */
5044 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5046 int count = frame->nregs;
5048 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5049 /* The fast prologue uses move instead of push to save registers. This
5050 is significantly longer, but also executes faster as modern hardware
5051 can execute the moves in parallel, but can't do that for push/pop.
5053 Be careful about choosing what prologue to emit: When function takes
5054 many instructions to execute we may use slow version as well as in
5055 case function is known to be outside hot spot (this is known with
5056 feedback only). Weight the size of function by number of registers
5057 to save as it is cheap to use one or two push instructions but very
5058 slow to use many of them. */
5060 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5061 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5062 || (flag_branch_probabilities
5063 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5064 cfun->machine->use_fast_prologue_epilogue = false;
5066 cfun->machine->use_fast_prologue_epilogue
5067 = !expensive_function_p (count);
5069 if (TARGET_PROLOGUE_USING_MOVE
5070 && cfun->machine->use_fast_prologue_epilogue)
5071 frame->save_regs_using_mov = true;
5073 frame->save_regs_using_mov = false;
5076 /* Skip return address and saved base pointer. */
5077 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5079 frame->hard_frame_pointer_offset = offset;
5081 /* Do some sanity checking of stack_alignment_needed and
5082 preferred_alignment, since i386 port is the only using those features
5083 that may break easily. */
5085 if (size && !stack_alignment_needed)
5087 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5089 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5091 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5094 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5095 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5097 /* Register save area */
5098 offset += frame->nregs * UNITS_PER_WORD;
5101 if (ix86_save_varrargs_registers)
5103 offset += X86_64_VARARGS_SIZE;
5104 frame->va_arg_size = X86_64_VARARGS_SIZE;
5107 frame->va_arg_size = 0;
5109 /* Align start of frame for local function. */
5110 frame->padding1 = ((offset + stack_alignment_needed - 1)
5111 & -stack_alignment_needed) - offset;
5113 offset += frame->padding1;
5115 /* Frame pointer points here. */
5116 frame->frame_pointer_offset = offset;
5120 /* Add outgoing arguments area. Can be skipped if we eliminated
5121 all the function calls as dead code.
5122 Skipping is however impossible when function calls alloca. Alloca
5123 expander assumes that last current_function_outgoing_args_size
5124 of stack frame are unused. */
5125 if (ACCUMULATE_OUTGOING_ARGS
5126 && (!current_function_is_leaf || current_function_calls_alloca))
5128 offset += current_function_outgoing_args_size;
5129 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5132 frame->outgoing_arguments_size = 0;
5134 /* Align stack boundary. Only needed if we're calling another function
5136 if (!current_function_is_leaf || current_function_calls_alloca)
5137 frame->padding2 = ((offset + preferred_alignment - 1)
5138 & -preferred_alignment) - offset;
5140 frame->padding2 = 0;
5142 offset += frame->padding2;
5144 /* We've reached end of stack frame. */
5145 frame->stack_pointer_offset = offset;
5147 /* Size prologue needs to allocate. */
5148 frame->to_allocate =
5149 (size + frame->padding1 + frame->padding2
5150 + frame->outgoing_arguments_size + frame->va_arg_size);
5152 if ((!frame->to_allocate && frame->nregs <= 1)
5153 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5154 frame->save_regs_using_mov = false;
5156 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5157 && current_function_is_leaf)
5159 frame->red_zone_size = frame->to_allocate;
5160 if (frame->save_regs_using_mov)
5161 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5162 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5163 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5166 frame->red_zone_size = 0;
5167 frame->to_allocate -= frame->red_zone_size;
5168 frame->stack_pointer_offset -= frame->red_zone_size;
5170 fprintf (stderr, "nregs: %i\n", frame->nregs);
5171 fprintf (stderr, "size: %i\n", size);
5172 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5173 fprintf (stderr, "padding1: %i\n", frame->padding1);
5174 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5175 fprintf (stderr, "padding2: %i\n", frame->padding2);
5176 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5177 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5178 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5179 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5180 frame->hard_frame_pointer_offset);
5181 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5185 /* Emit code to save registers in the prologue. */
5188 ix86_emit_save_regs (void)
5193 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5194 if (ix86_save_reg (regno, true))
5196 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5197 RTX_FRAME_RELATED_P (insn) = 1;
5201 /* Emit code to save registers using MOV insns. First register
5202 is restored from POINTER + OFFSET. */
5204 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5209 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5210 if (ix86_save_reg (regno, true))
5212 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5214 gen_rtx_REG (Pmode, regno));
5215 RTX_FRAME_RELATED_P (insn) = 1;
5216 offset += UNITS_PER_WORD;
5220 /* Expand prologue or epilogue stack adjustment.
5221 The pattern exist to put a dependency on all ebp-based memory accesses.
5222 STYLE should be negative if instructions should be marked as frame related,
5223 zero if %r11 register is live and cannot be freely used and positive
5227 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5232 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5233 else if (x86_64_immediate_operand (offset, DImode))
5234 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5238 /* r11 is used by indirect sibcall return as well, set before the
5239 epilogue and used after the epilogue. ATM indirect sibcall
5240 shouldn't be used together with huge frame sizes in one
5241 function because of the frame_size check in sibcall.c. */
5244 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5245 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5247 RTX_FRAME_RELATED_P (insn) = 1;
5248 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5252 RTX_FRAME_RELATED_P (insn) = 1;
5255 /* Expand the prologue into a bunch of separate insns. */
5258 ix86_expand_prologue (void)
5262 struct ix86_frame frame;
5263 HOST_WIDE_INT allocate;
5265 ix86_compute_frame_layout (&frame);
5267 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5268 slower on all targets. Also sdb doesn't like it. */
5270 if (frame_pointer_needed)
5272 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5273 RTX_FRAME_RELATED_P (insn) = 1;
5275 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5276 RTX_FRAME_RELATED_P (insn) = 1;
5279 allocate = frame.to_allocate;
5281 if (!frame.save_regs_using_mov)
5282 ix86_emit_save_regs ();
5284 allocate += frame.nregs * UNITS_PER_WORD;
5286 /* When using red zone we may start register saving before allocating
5287 the stack frame saving one cycle of the prologue. */
5288 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5289 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5290 : stack_pointer_rtx,
5291 -frame.nregs * UNITS_PER_WORD);
5295 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5296 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5297 GEN_INT (-allocate), -1);
5300 /* Only valid for Win32. */
5301 rtx eax = gen_rtx_REG (SImode, 0);
5302 bool eax_live = ix86_eax_live_at_start_p ();
5309 emit_insn (gen_push (eax));
5313 insn = emit_move_insn (eax, GEN_INT (allocate));
5314 RTX_FRAME_RELATED_P (insn) = 1;
5316 insn = emit_insn (gen_allocate_stack_worker (eax));
5317 RTX_FRAME_RELATED_P (insn) = 1;
5321 rtx t = plus_constant (stack_pointer_rtx, allocate);
5322 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5326 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5328 if (!frame_pointer_needed || !frame.to_allocate)
5329 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5331 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5332 -frame.nregs * UNITS_PER_WORD);
5335 pic_reg_used = false;
5336 if (pic_offset_table_rtx
5337 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5338 || current_function_profile))
5340 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5342 if (alt_pic_reg_used != INVALID_REGNUM)
5343 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5345 pic_reg_used = true;
5350 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5352 /* Even with accurate pre-reload life analysis, we can wind up
5353 deleting all references to the pic register after reload.
5354 Consider if cross-jumping unifies two sides of a branch
5355 controlled by a comparison vs the only read from a global.
5356 In which case, allow the set_got to be deleted, though we're
5357 too late to do anything about the ebx save in the prologue. */
5358 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5361 /* Prevent function calls from be scheduled before the call to mcount.
5362 In the pic_reg_used case, make sure that the got load isn't deleted. */
5363 if (current_function_profile)
5364 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5367 /* Emit code to restore saved registers using MOV insns. First register
5368 is restored from POINTER + OFFSET. */
5370 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5371 int maybe_eh_return)
5374 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5376 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5377 if (ix86_save_reg (regno, maybe_eh_return))
5379 /* Ensure that adjust_address won't be forced to produce pointer
5380 out of range allowed by x86-64 instruction set. */
5381 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5385 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5386 emit_move_insn (r11, GEN_INT (offset));
5387 emit_insn (gen_adddi3 (r11, r11, pointer));
5388 base_address = gen_rtx_MEM (Pmode, r11);
5391 emit_move_insn (gen_rtx_REG (Pmode, regno),
5392 adjust_address (base_address, Pmode, offset));
5393 offset += UNITS_PER_WORD;
5397 /* Restore function stack, frame, and registers. */
5400 ix86_expand_epilogue (int style)
5403 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5404 struct ix86_frame frame;
5405 HOST_WIDE_INT offset;
5407 ix86_compute_frame_layout (&frame);
5409 /* Calculate start of saved registers relative to ebp. Special care
5410 must be taken for the normal return case of a function using
5411 eh_return: the eax and edx registers are marked as saved, but not
5412 restored along this path. */
5413 offset = frame.nregs;
5414 if (current_function_calls_eh_return && style != 2)
5416 offset *= -UNITS_PER_WORD;
5418 /* If we're only restoring one register and sp is not valid then
5419 using a move instruction to restore the register since it's
5420 less work than reloading sp and popping the register.
5422 The default code result in stack adjustment using add/lea instruction,
5423 while this code results in LEAVE instruction (or discrete equivalent),
5424 so it is profitable in some other cases as well. Especially when there
5425 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5426 and there is exactly one register to pop. This heuristic may need some
5427 tuning in future. */
5428 if ((!sp_valid && frame.nregs <= 1)
5429 || (TARGET_EPILOGUE_USING_MOVE
5430 && cfun->machine->use_fast_prologue_epilogue
5431 && (frame.nregs > 1 || frame.to_allocate))
5432 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5433 || (frame_pointer_needed && TARGET_USE_LEAVE
5434 && cfun->machine->use_fast_prologue_epilogue
5435 && frame.nregs == 1)
5436 || current_function_calls_eh_return)
5438 /* Restore registers. We can use ebp or esp to address the memory
5439 locations. If both are available, default to ebp, since offsets
5440 are known to be small. Only exception is esp pointing directly to the
5441 end of block of saved registers, where we may simplify addressing
5444 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5445 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5446 frame.to_allocate, style == 2);
5448 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5449 offset, style == 2);
5451 /* eh_return epilogues need %ecx added to the stack pointer. */
5454 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5456 if (frame_pointer_needed)
5458 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5459 tmp = plus_constant (tmp, UNITS_PER_WORD);
5460 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5462 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5463 emit_move_insn (hard_frame_pointer_rtx, tmp);
5465 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5470 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5471 tmp = plus_constant (tmp, (frame.to_allocate
5472 + frame.nregs * UNITS_PER_WORD));
5473 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5476 else if (!frame_pointer_needed)
5477 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5478 GEN_INT (frame.to_allocate
5479 + frame.nregs * UNITS_PER_WORD),
5481 /* If not an i386, mov & pop is faster than "leave". */
5482 else if (TARGET_USE_LEAVE || optimize_size
5483 || !cfun->machine->use_fast_prologue_epilogue)
5484 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5487 pro_epilogue_adjust_stack (stack_pointer_rtx,
5488 hard_frame_pointer_rtx,
5491 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5493 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5498 /* First step is to deallocate the stack frame so that we can
5499 pop the registers. */
5502 if (!frame_pointer_needed)
5504 pro_epilogue_adjust_stack (stack_pointer_rtx,
5505 hard_frame_pointer_rtx,
5506 GEN_INT (offset), style);
5508 else if (frame.to_allocate)
5509 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5510 GEN_INT (frame.to_allocate), style);
5512 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5513 if (ix86_save_reg (regno, false))
5516 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5518 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5520 if (frame_pointer_needed)
5522 /* Leave results in shorter dependency chains on CPUs that are
5523 able to grok it fast. */
5524 if (TARGET_USE_LEAVE)
5525 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5526 else if (TARGET_64BIT)
5527 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5529 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5533 /* Sibcall epilogues don't want a return instruction. */
5537 if (current_function_pops_args && current_function_args_size)
5539 rtx popc = GEN_INT (current_function_pops_args);
5541 /* i386 can only pop 64K bytes. If asked to pop more, pop
5542 return address, do explicit add, and jump indirectly to the
5545 if (current_function_pops_args >= 65536)
5547 rtx ecx = gen_rtx_REG (SImode, 2);
5549 /* There is no "pascal" calling convention in 64bit ABI. */
5553 emit_insn (gen_popsi1 (ecx));
5554 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5555 emit_jump_insn (gen_return_indirect_internal (ecx));
5558 emit_jump_insn (gen_return_pop_internal (popc));
5561 emit_jump_insn (gen_return_internal ());
5564 /* Reset from the function's potential modifications. */
5567 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5568 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5570 if (pic_offset_table_rtx)
5571 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5574 /* Extract the parts of an RTL expression that is a valid memory address
5575 for an instruction. Return 0 if the structure of the address is
5576 grossly off. Return -1 if the address contains ASHIFT, so it is not
5577 strictly valid, but still used for computing length of lea instruction. */
5580 ix86_decompose_address (rtx addr, struct ix86_address *out)
5582 rtx base = NULL_RTX;
5583 rtx index = NULL_RTX;
5584 rtx disp = NULL_RTX;
5585 HOST_WIDE_INT scale = 1;
5586 rtx scale_rtx = NULL_RTX;
5588 enum ix86_address_seg seg = SEG_DEFAULT;
5590 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5592 else if (GET_CODE (addr) == PLUS)
5602 addends[n++] = XEXP (op, 1);
5605 while (GET_CODE (op) == PLUS);
5610 for (i = n; i >= 0; --i)
5613 switch (GET_CODE (op))
5618 index = XEXP (op, 0);
5619 scale_rtx = XEXP (op, 1);
5623 if (XINT (op, 1) == UNSPEC_TP
5624 && TARGET_TLS_DIRECT_SEG_REFS
5625 && seg == SEG_DEFAULT)
5626 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5655 else if (GET_CODE (addr) == MULT)
5657 index = XEXP (addr, 0); /* index*scale */
5658 scale_rtx = XEXP (addr, 1);
5660 else if (GET_CODE (addr) == ASHIFT)
5664 /* We're called for lea too, which implements ashift on occasion. */
5665 index = XEXP (addr, 0);
5666 tmp = XEXP (addr, 1);
5667 if (GET_CODE (tmp) != CONST_INT)
5669 scale = INTVAL (tmp);
5670 if ((unsigned HOST_WIDE_INT) scale > 3)
5676 disp = addr; /* displacement */
5678 /* Extract the integral value of scale. */
5681 if (GET_CODE (scale_rtx) != CONST_INT)
5683 scale = INTVAL (scale_rtx);
5686 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5687 if (base && index && scale == 1
5688 && (index == arg_pointer_rtx
5689 || index == frame_pointer_rtx
5690 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5697 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5698 if ((base == hard_frame_pointer_rtx
5699 || base == frame_pointer_rtx
5700 || base == arg_pointer_rtx) && !disp)
5703 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5704 Avoid this by transforming to [%esi+0]. */
5705 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5706 && base && !index && !disp
5708 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5711 /* Special case: encode reg+reg instead of reg*2. */
5712 if (!base && index && scale && scale == 2)
5713 base = index, scale = 1;
5715 /* Special case: scaling cannot be encoded without base or displacement. */
5716 if (!base && !disp && index && scale != 1)
5728 /* Return cost of the memory address x.
5729 For i386, it is better to use a complex address than let gcc copy
5730 the address into a reg and make a new pseudo. But not if the address
5731 requires to two regs - that would mean more pseudos with longer
5734 ix86_address_cost (rtx x)
5736 struct ix86_address parts;
5739 if (!ix86_decompose_address (x, &parts))
5742 /* More complex memory references are better. */
5743 if (parts.disp && parts.disp != const0_rtx)
5745 if (parts.seg != SEG_DEFAULT)
5748 /* Attempt to minimize number of registers in the address. */
5750 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5752 && (!REG_P (parts.index)
5753 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5757 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5759 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5760 && parts.base != parts.index)
5763 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5764 since it's predecode logic can't detect the length of instructions
5765 and it degenerates to vector decoded. Increase cost of such
5766 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5767 to split such addresses or even refuse such addresses at all.
5769 Following addressing modes are affected:
5774 The first and last case may be avoidable by explicitly coding the zero in
5775 memory address, but I don't have AMD-K6 machine handy to check this
5779 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5780 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5781 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5787 /* If X is a machine specific address (i.e. a symbol or label being
5788 referenced as a displacement from the GOT implemented using an
5789 UNSPEC), then return the base term. Otherwise return X. */
5792 ix86_find_base_term (rtx x)
5798 if (GET_CODE (x) != CONST)
5801 if (GET_CODE (term) == PLUS
5802 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5803 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5804 term = XEXP (term, 0);
5805 if (GET_CODE (term) != UNSPEC
5806 || XINT (term, 1) != UNSPEC_GOTPCREL)
5809 term = XVECEXP (term, 0, 0);
5811 if (GET_CODE (term) != SYMBOL_REF
5812 && GET_CODE (term) != LABEL_REF)
5818 term = ix86_delegitimize_address (x);
5820 if (GET_CODE (term) != SYMBOL_REF
5821 && GET_CODE (term) != LABEL_REF)
5827 /* Determine if a given RTX is a valid constant. We already know this
5828 satisfies CONSTANT_P. */
5831 legitimate_constant_p (rtx x)
5835 switch (GET_CODE (x))
5838 /* TLS symbols are not constant. */
5839 if (tls_symbolic_operand (x, Pmode))
5844 inner = XEXP (x, 0);
5846 /* Offsets of TLS symbols are never valid.
5847 Discourage CSE from creating them. */
5848 if (GET_CODE (inner) == PLUS
5849 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5852 if (GET_CODE (inner) == PLUS
5853 || GET_CODE (inner) == MINUS)
5855 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5857 inner = XEXP (inner, 0);
5860 /* Only some unspecs are valid as "constants". */
5861 if (GET_CODE (inner) == UNSPEC)
5862 switch (XINT (inner, 1))
5866 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5868 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5878 /* Otherwise we handle everything else in the move patterns. */
5882 /* Determine if it's legal to put X into the constant pool. This
5883 is not possible for the address of thread-local symbols, which
5884 is checked above. */
5887 ix86_cannot_force_const_mem (rtx x)
5889 return !legitimate_constant_p (x);
5892 /* Determine if a given RTX is a valid constant address. */
5895 constant_address_p (rtx x)
5897 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5900 /* Nonzero if the constant value X is a legitimate general operand
5901 when generating PIC code. It is given that flag_pic is on and
5902 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5905 legitimate_pic_operand_p (rtx x)
5909 switch (GET_CODE (x))
5912 inner = XEXP (x, 0);
5914 /* Only some unspecs are valid as "constants". */
5915 if (GET_CODE (inner) == UNSPEC)
5916 switch (XINT (inner, 1))
5919 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5927 return legitimate_pic_address_disp_p (x);
5934 /* Determine if a given CONST RTX is a valid memory displacement
5938 legitimate_pic_address_disp_p (rtx disp)
5942 /* In 64bit mode we can allow direct addresses of symbols and labels
5943 when they are not dynamic symbols. */
5946 /* TLS references should always be enclosed in UNSPEC. */
5947 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5949 if (GET_CODE (disp) == SYMBOL_REF
5950 && ix86_cmodel == CM_SMALL_PIC
5951 && SYMBOL_REF_LOCAL_P (disp))
5953 if (GET_CODE (disp) == LABEL_REF)
5955 if (GET_CODE (disp) == CONST
5956 && GET_CODE (XEXP (disp, 0)) == PLUS)
5958 rtx op0 = XEXP (XEXP (disp, 0), 0);
5959 rtx op1 = XEXP (XEXP (disp, 0), 1);
5961 /* TLS references should always be enclosed in UNSPEC. */
5962 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5964 if (((GET_CODE (op0) == SYMBOL_REF
5965 && ix86_cmodel == CM_SMALL_PIC
5966 && SYMBOL_REF_LOCAL_P (op0))
5967 || GET_CODE (op0) == LABEL_REF)
5968 && GET_CODE (op1) == CONST_INT
5969 && INTVAL (op1) < 16*1024*1024
5970 && INTVAL (op1) >= -16*1024*1024)
5974 if (GET_CODE (disp) != CONST)
5976 disp = XEXP (disp, 0);
5980 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5981 of GOT tables. We should not need these anyway. */
5982 if (GET_CODE (disp) != UNSPEC
5983 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5986 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5987 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5993 if (GET_CODE (disp) == PLUS)
5995 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5997 disp = XEXP (disp, 0);
6001 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6002 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6004 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6005 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6006 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6008 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6009 if (! strcmp (sym_name, "<pic base>"))
6014 if (GET_CODE (disp) != UNSPEC)
6017 switch (XINT (disp, 1))
6022 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6024 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6025 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6026 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6028 case UNSPEC_GOTTPOFF:
6029 case UNSPEC_GOTNTPOFF:
6030 case UNSPEC_INDNTPOFF:
6033 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6035 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6037 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6043 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6044 memory address for an instruction. The MODE argument is the machine mode
6045 for the MEM expression that wants to use this address.
6047 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6048 convert common non-canonical forms to canonical form so that they will
6052 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6054 struct ix86_address parts;
6055 rtx base, index, disp;
6056 HOST_WIDE_INT scale;
6057 const char *reason = NULL;
6058 rtx reason_rtx = NULL_RTX;
6060 if (TARGET_DEBUG_ADDR)
6063 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6064 GET_MODE_NAME (mode), strict);
6068 if (ix86_decompose_address (addr, &parts) <= 0)
6070 reason = "decomposition failed";
6075 index = parts.index;
6077 scale = parts.scale;
6079 /* Validate base register.
6081 Don't allow SUBREG's here, it can lead to spill failures when the base
6082 is one word out of a two word structure, which is represented internally
6089 if (GET_CODE (base) != REG)
6091 reason = "base is not a register";
6095 if (GET_MODE (base) != Pmode)
6097 reason = "base is not in Pmode";
6101 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6102 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6104 reason = "base is not valid";
6109 /* Validate index register.
6111 Don't allow SUBREG's here, it can lead to spill failures when the index
6112 is one word out of a two word structure, which is represented internally
6119 if (GET_CODE (index) != REG)
6121 reason = "index is not a register";
6125 if (GET_MODE (index) != Pmode)
6127 reason = "index is not in Pmode";
6131 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6132 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6134 reason = "index is not valid";
6139 /* Validate scale factor. */
6142 reason_rtx = GEN_INT (scale);
6145 reason = "scale without index";
6149 if (scale != 2 && scale != 4 && scale != 8)
6151 reason = "scale is not a valid multiplier";
6156 /* Validate displacement. */
6161 if (GET_CODE (disp) == CONST
6162 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6163 switch (XINT (XEXP (disp, 0), 1))
6167 case UNSPEC_GOTPCREL:
6170 goto is_legitimate_pic;
6172 case UNSPEC_GOTTPOFF:
6173 case UNSPEC_GOTNTPOFF:
6174 case UNSPEC_INDNTPOFF:
6180 reason = "invalid address unspec";
6184 else if (flag_pic && (SYMBOLIC_CONST (disp)
6186 && !machopic_operand_p (disp)
6191 if (TARGET_64BIT && (index || base))
6193 /* foo@dtpoff(%rX) is ok. */
6194 if (GET_CODE (disp) != CONST
6195 || GET_CODE (XEXP (disp, 0)) != PLUS
6196 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6197 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6198 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6199 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6201 reason = "non-constant pic memory reference";
6205 else if (! legitimate_pic_address_disp_p (disp))
6207 reason = "displacement is an invalid pic construct";
6211 /* This code used to verify that a symbolic pic displacement
6212 includes the pic_offset_table_rtx register.
6214 While this is good idea, unfortunately these constructs may
6215 be created by "adds using lea" optimization for incorrect
6224 This code is nonsensical, but results in addressing
6225 GOT table with pic_offset_table_rtx base. We can't
6226 just refuse it easily, since it gets matched by
6227 "addsi3" pattern, that later gets split to lea in the
6228 case output register differs from input. While this
6229 can be handled by separate addsi pattern for this case
6230 that never results in lea, this seems to be easier and
6231 correct fix for crash to disable this test. */
6233 else if (GET_CODE (disp) != LABEL_REF
6234 && GET_CODE (disp) != CONST_INT
6235 && (GET_CODE (disp) != CONST
6236 || !legitimate_constant_p (disp))
6237 && (GET_CODE (disp) != SYMBOL_REF
6238 || !legitimate_constant_p (disp)))
6240 reason = "displacement is not constant";
6243 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6245 reason = "displacement is out of range";
6250 /* Everything looks valid. */
6251 if (TARGET_DEBUG_ADDR)
6252 fprintf (stderr, "Success.\n");
6256 if (TARGET_DEBUG_ADDR)
6258 fprintf (stderr, "Error: %s\n", reason);
6259 debug_rtx (reason_rtx);
6264 /* Return an unique alias set for the GOT. */
6266 static HOST_WIDE_INT
6267 ix86_GOT_alias_set (void)
6269 static HOST_WIDE_INT set = -1;
6271 set = new_alias_set ();
6275 /* Return a legitimate reference for ORIG (an address) using the
6276 register REG. If REG is 0, a new pseudo is generated.
6278 There are two types of references that must be handled:
6280 1. Global data references must load the address from the GOT, via
6281 the PIC reg. An insn is emitted to do this load, and the reg is
6284 2. Static data references, constant pool addresses, and code labels
6285 compute the address as an offset from the GOT, whose base is in
6286 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6287 differentiate them from global data objects. The returned
6288 address is the PIC reg + an unspec constant.
6290 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6291 reg also appears in the address. */
6294 legitimize_pic_address (rtx orig, rtx reg)
6302 reg = gen_reg_rtx (Pmode);
6303 /* Use the generic Mach-O PIC machinery. */
6304 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6307 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6309 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6311 /* This symbol may be referenced via a displacement from the PIC
6312 base address (@GOTOFF). */
6314 if (reload_in_progress)
6315 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6316 if (GET_CODE (addr) == CONST)
6317 addr = XEXP (addr, 0);
6318 if (GET_CODE (addr) == PLUS)
6320 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6321 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6324 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6325 new = gen_rtx_CONST (Pmode, new);
6326 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6330 emit_move_insn (reg, new);
6334 else if (GET_CODE (addr) == SYMBOL_REF)
6338 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6339 new = gen_rtx_CONST (Pmode, new);
6340 new = gen_rtx_MEM (Pmode, new);
6341 RTX_UNCHANGING_P (new) = 1;
6342 set_mem_alias_set (new, ix86_GOT_alias_set ());
6345 reg = gen_reg_rtx (Pmode);
6346 /* Use directly gen_movsi, otherwise the address is loaded
6347 into register for CSE. We don't want to CSE this addresses,
6348 instead we CSE addresses from the GOT table, so skip this. */
6349 emit_insn (gen_movsi (reg, new));
6354 /* This symbol must be referenced via a load from the
6355 Global Offset Table (@GOT). */
6357 if (reload_in_progress)
6358 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6359 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6360 new = gen_rtx_CONST (Pmode, new);
6361 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6362 new = gen_rtx_MEM (Pmode, new);
6363 RTX_UNCHANGING_P (new) = 1;
6364 set_mem_alias_set (new, ix86_GOT_alias_set ());
6367 reg = gen_reg_rtx (Pmode);
6368 emit_move_insn (reg, new);
6374 if (GET_CODE (addr) == CONST)
6376 addr = XEXP (addr, 0);
6378 /* We must match stuff we generate before. Assume the only
6379 unspecs that can get here are ours. Not that we could do
6380 anything with them anyway.... */
6381 if (GET_CODE (addr) == UNSPEC
6382 || (GET_CODE (addr) == PLUS
6383 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6385 if (GET_CODE (addr) != PLUS)
6388 if (GET_CODE (addr) == PLUS)
6390 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6392 /* Check first to see if this is a constant offset from a @GOTOFF
6393 symbol reference. */
6394 if (local_symbolic_operand (op0, Pmode)
6395 && GET_CODE (op1) == CONST_INT)
6399 if (reload_in_progress)
6400 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6401 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6403 new = gen_rtx_PLUS (Pmode, new, op1);
6404 new = gen_rtx_CONST (Pmode, new);
6405 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6409 emit_move_insn (reg, new);
6415 if (INTVAL (op1) < -16*1024*1024
6416 || INTVAL (op1) >= 16*1024*1024)
6417 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6422 base = legitimize_pic_address (XEXP (addr, 0), reg);
6423 new = legitimize_pic_address (XEXP (addr, 1),
6424 base == reg ? NULL_RTX : reg);
6426 if (GET_CODE (new) == CONST_INT)
6427 new = plus_constant (base, INTVAL (new));
6430 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6432 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6433 new = XEXP (new, 1);
6435 new = gen_rtx_PLUS (Pmode, base, new);
6443 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6446 get_thread_pointer (int to_reg)
6450 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6454 reg = gen_reg_rtx (Pmode);
6455 insn = gen_rtx_SET (VOIDmode, reg, tp);
6456 insn = emit_insn (insn);
6461 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6462 false if we expect this to be used for a memory address and true if
6463 we expect to load the address into a register. */
6466 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6468 rtx dest, base, off, pic;
6473 case TLS_MODEL_GLOBAL_DYNAMIC:
6474 dest = gen_reg_rtx (Pmode);
6477 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6480 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6481 insns = get_insns ();
6484 emit_libcall_block (insns, dest, rax, x);
6487 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6490 case TLS_MODEL_LOCAL_DYNAMIC:
6491 base = gen_reg_rtx (Pmode);
6494 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6497 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6498 insns = get_insns ();
6501 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6502 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6503 emit_libcall_block (insns, base, rax, note);
6506 emit_insn (gen_tls_local_dynamic_base_32 (base));
6508 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6509 off = gen_rtx_CONST (Pmode, off);
6511 return gen_rtx_PLUS (Pmode, base, off);
6513 case TLS_MODEL_INITIAL_EXEC:
6517 type = UNSPEC_GOTNTPOFF;
6521 if (reload_in_progress)
6522 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6523 pic = pic_offset_table_rtx;
6524 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6526 else if (!TARGET_GNU_TLS)
6528 pic = gen_reg_rtx (Pmode);
6529 emit_insn (gen_set_got (pic));
6530 type = UNSPEC_GOTTPOFF;
6535 type = UNSPEC_INDNTPOFF;
6538 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6539 off = gen_rtx_CONST (Pmode, off);
6541 off = gen_rtx_PLUS (Pmode, pic, off);
6542 off = gen_rtx_MEM (Pmode, off);
6543 RTX_UNCHANGING_P (off) = 1;
6544 set_mem_alias_set (off, ix86_GOT_alias_set ());
6546 if (TARGET_64BIT || TARGET_GNU_TLS)
6548 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6549 off = force_reg (Pmode, off);
6550 return gen_rtx_PLUS (Pmode, base, off);
6554 base = get_thread_pointer (true);
6555 dest = gen_reg_rtx (Pmode);
6556 emit_insn (gen_subsi3 (dest, base, off));
6560 case TLS_MODEL_LOCAL_EXEC:
6561 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6562 (TARGET_64BIT || TARGET_GNU_TLS)
6563 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6564 off = gen_rtx_CONST (Pmode, off);
6566 if (TARGET_64BIT || TARGET_GNU_TLS)
6568 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6569 return gen_rtx_PLUS (Pmode, base, off);
6573 base = get_thread_pointer (true);
6574 dest = gen_reg_rtx (Pmode);
6575 emit_insn (gen_subsi3 (dest, base, off));
6586 /* Try machine-dependent ways of modifying an illegitimate address
6587 to be legitimate. If we find one, return the new, valid address.
6588 This macro is used in only one place: `memory_address' in explow.c.
6590 OLDX is the address as it was before break_out_memory_refs was called.
6591 In some cases it is useful to look at this to decide what needs to be done.
6593 MODE and WIN are passed so that this macro can use
6594 GO_IF_LEGITIMATE_ADDRESS.
6596 It is always safe for this macro to do nothing. It exists to recognize
6597 opportunities to optimize the output.
6599 For the 80386, we handle X+REG by loading X into a register R and
6600 using R+REG. R will go in a general reg and indexing will be used.
6601 However, if REG is a broken-out memory address or multiplication,
6602 nothing needs to be done because REG can certainly go in a general reg.
6604 When -fpic is used, special handling is needed for symbolic references.
6605 See comments by legitimize_pic_address in i386.c for details. */
6608 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6613 if (TARGET_DEBUG_ADDR)
6615 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6616 GET_MODE_NAME (mode));
6620 log = tls_symbolic_operand (x, mode);
6622 return legitimize_tls_address (x, log, false);
6623 if (GET_CODE (x) == CONST
6624 && GET_CODE (XEXP (x, 0)) == PLUS
6625 && (log = tls_symbolic_operand (XEXP (XEXP (x, 0), 0), Pmode)))
6627 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6628 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6631 if (flag_pic && SYMBOLIC_CONST (x))
6632 return legitimize_pic_address (x, 0);
6634 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6635 if (GET_CODE (x) == ASHIFT
6636 && GET_CODE (XEXP (x, 1)) == CONST_INT
6637 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6640 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6641 GEN_INT (1 << log));
6644 if (GET_CODE (x) == PLUS)
6646 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6648 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6649 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6650 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6653 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6654 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6655 GEN_INT (1 << log));
6658 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6659 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6660 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6663 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6664 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6665 GEN_INT (1 << log));
6668 /* Put multiply first if it isn't already. */
6669 if (GET_CODE (XEXP (x, 1)) == MULT)
6671 rtx tmp = XEXP (x, 0);
6672 XEXP (x, 0) = XEXP (x, 1);
6677 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6678 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6679 created by virtual register instantiation, register elimination, and
6680 similar optimizations. */
6681 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6684 x = gen_rtx_PLUS (Pmode,
6685 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6686 XEXP (XEXP (x, 1), 0)),
6687 XEXP (XEXP (x, 1), 1));
6691 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6692 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6693 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6694 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6695 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6696 && CONSTANT_P (XEXP (x, 1)))
6699 rtx other = NULL_RTX;
6701 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6703 constant = XEXP (x, 1);
6704 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6706 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6708 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6709 other = XEXP (x, 1);
6717 x = gen_rtx_PLUS (Pmode,
6718 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6719 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6720 plus_constant (other, INTVAL (constant)));
6724 if (changed && legitimate_address_p (mode, x, FALSE))
6727 if (GET_CODE (XEXP (x, 0)) == MULT)
6730 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6733 if (GET_CODE (XEXP (x, 1)) == MULT)
6736 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6740 && GET_CODE (XEXP (x, 1)) == REG
6741 && GET_CODE (XEXP (x, 0)) == REG)
6744 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6747 x = legitimize_pic_address (x, 0);
6750 if (changed && legitimate_address_p (mode, x, FALSE))
6753 if (GET_CODE (XEXP (x, 0)) == REG)
6755 rtx temp = gen_reg_rtx (Pmode);
6756 rtx val = force_operand (XEXP (x, 1), temp);
6758 emit_move_insn (temp, val);
6764 else if (GET_CODE (XEXP (x, 1)) == REG)
6766 rtx temp = gen_reg_rtx (Pmode);
6767 rtx val = force_operand (XEXP (x, 0), temp);
6769 emit_move_insn (temp, val);
6779 /* Print an integer constant expression in assembler syntax. Addition
6780 and subtraction are the only arithmetic that may appear in these
6781 expressions. FILE is the stdio stream to write to, X is the rtx, and
6782 CODE is the operand print code from the output string. */
6785 output_pic_addr_const (FILE *file, rtx x, int code)
6789 switch (GET_CODE (x))
6799 /* Mark the decl as referenced so that cgraph will output the function. */
6800 if (SYMBOL_REF_DECL (x))
6801 mark_decl_referenced (SYMBOL_REF_DECL (x));
6803 assemble_name (file, XSTR (x, 0));
6804 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6805 fputs ("@PLT", file);
6812 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6813 assemble_name (asm_out_file, buf);
6817 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6821 /* This used to output parentheses around the expression,
6822 but that does not work on the 386 (either ATT or BSD assembler). */
6823 output_pic_addr_const (file, XEXP (x, 0), code);
6827 if (GET_MODE (x) == VOIDmode)
6829 /* We can use %d if the number is <32 bits and positive. */
6830 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6831 fprintf (file, "0x%lx%08lx",
6832 (unsigned long) CONST_DOUBLE_HIGH (x),
6833 (unsigned long) CONST_DOUBLE_LOW (x));
6835 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6838 /* We can't handle floating point constants;
6839 PRINT_OPERAND must handle them. */
6840 output_operand_lossage ("floating constant misused");
6844 /* Some assemblers need integer constants to appear first. */
6845 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6847 output_pic_addr_const (file, XEXP (x, 0), code);
6849 output_pic_addr_const (file, XEXP (x, 1), code);
6851 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6853 output_pic_addr_const (file, XEXP (x, 1), code);
6855 output_pic_addr_const (file, XEXP (x, 0), code);
6863 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6864 output_pic_addr_const (file, XEXP (x, 0), code);
6866 output_pic_addr_const (file, XEXP (x, 1), code);
6868 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6872 if (XVECLEN (x, 0) != 1)
6874 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6875 switch (XINT (x, 1))
6878 fputs ("@GOT", file);
6881 fputs ("@GOTOFF", file);
6883 case UNSPEC_GOTPCREL:
6884 fputs ("@GOTPCREL(%rip)", file);
6886 case UNSPEC_GOTTPOFF:
6887 /* FIXME: This might be @TPOFF in Sun ld too. */
6888 fputs ("@GOTTPOFF", file);
6891 fputs ("@TPOFF", file);
6895 fputs ("@TPOFF", file);
6897 fputs ("@NTPOFF", file);
6900 fputs ("@DTPOFF", file);
6902 case UNSPEC_GOTNTPOFF:
6904 fputs ("@GOTTPOFF(%rip)", file);
6906 fputs ("@GOTNTPOFF", file);
6908 case UNSPEC_INDNTPOFF:
6909 fputs ("@INDNTPOFF", file);
6912 output_operand_lossage ("invalid UNSPEC as operand");
6918 output_operand_lossage ("invalid expression as operand");
6922 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6923 We need to handle our special PIC relocations. */
6926 i386_dwarf_output_addr_const (FILE *file, rtx x)
6929 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6933 fprintf (file, "%s", ASM_LONG);
6936 output_pic_addr_const (file, x, '\0');
6938 output_addr_const (file, x);
6942 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6943 We need to emit DTP-relative relocations. */
6946 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6948 fputs (ASM_LONG, file);
6949 output_addr_const (file, x);
6950 fputs ("@DTPOFF", file);
6956 fputs (", 0", file);
6963 /* In the name of slightly smaller debug output, and to cater to
6964 general assembler losage, recognize PIC+GOTOFF and turn it back
6965 into a direct symbol reference. */
6968 ix86_delegitimize_address (rtx orig_x)
6972 if (GET_CODE (x) == MEM)
6977 if (GET_CODE (x) != CONST
6978 || GET_CODE (XEXP (x, 0)) != UNSPEC
6979 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6980 || GET_CODE (orig_x) != MEM)
6982 return XVECEXP (XEXP (x, 0), 0, 0);
6985 if (GET_CODE (x) != PLUS
6986 || GET_CODE (XEXP (x, 1)) != CONST)
6989 if (GET_CODE (XEXP (x, 0)) == REG
6990 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6991 /* %ebx + GOT/GOTOFF */
6993 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6995 /* %ebx + %reg * scale + GOT/GOTOFF */
6997 if (GET_CODE (XEXP (y, 0)) == REG
6998 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7000 else if (GET_CODE (XEXP (y, 1)) == REG
7001 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7005 if (GET_CODE (y) != REG
7006 && GET_CODE (y) != MULT
7007 && GET_CODE (y) != ASHIFT)
7013 x = XEXP (XEXP (x, 1), 0);
7014 if (GET_CODE (x) == UNSPEC
7015 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7016 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7019 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7020 return XVECEXP (x, 0, 0);
7023 if (GET_CODE (x) == PLUS
7024 && GET_CODE (XEXP (x, 0)) == UNSPEC
7025 && GET_CODE (XEXP (x, 1)) == CONST_INT
7026 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7027 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7028 && GET_CODE (orig_x) != MEM)))
7030 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7032 return gen_rtx_PLUS (Pmode, y, x);
7040 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7045 if (mode == CCFPmode || mode == CCFPUmode)
7047 enum rtx_code second_code, bypass_code;
7048 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7049 if (bypass_code != NIL || second_code != NIL)
7051 code = ix86_fp_compare_code_to_integer (code);
7055 code = reverse_condition (code);
7066 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7071 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7072 Those same assemblers have the same but opposite losage on cmov. */
7075 suffix = fp ? "nbe" : "a";
7078 if (mode == CCNOmode || mode == CCGOCmode)
7080 else if (mode == CCmode || mode == CCGCmode)
7091 if (mode == CCNOmode || mode == CCGOCmode)
7093 else if (mode == CCmode || mode == CCGCmode)
7102 suffix = fp ? "nb" : "ae";
7105 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7115 suffix = fp ? "u" : "p";
7118 suffix = fp ? "nu" : "np";
7123 fputs (suffix, file);
7126 /* Print the name of register X to FILE based on its machine mode and number.
7127 If CODE is 'w', pretend the mode is HImode.
7128 If CODE is 'b', pretend the mode is QImode.
7129 If CODE is 'k', pretend the mode is SImode.
7130 If CODE is 'q', pretend the mode is DImode.
7131 If CODE is 'h', pretend the reg is the `high' byte register.
7132 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7135 print_reg (rtx x, int code, FILE *file)
7137 if (REGNO (x) == ARG_POINTER_REGNUM
7138 || REGNO (x) == FRAME_POINTER_REGNUM
7139 || REGNO (x) == FLAGS_REG
7140 || REGNO (x) == FPSR_REG)
7143 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7146 if (code == 'w' || MMX_REG_P (x))
7148 else if (code == 'b')
7150 else if (code == 'k')
7152 else if (code == 'q')
7154 else if (code == 'y')
7156 else if (code == 'h')
7159 code = GET_MODE_SIZE (GET_MODE (x));
7161 /* Irritatingly, AMD extended registers use different naming convention
7162 from the normal registers. */
7163 if (REX_INT_REG_P (x))
7170 error ("extended registers have no high halves");
7173 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7176 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7179 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7182 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7185 error ("unsupported operand size for extended register");
7193 if (STACK_TOP_P (x))
7195 fputs ("st(0)", file);
7202 if (! ANY_FP_REG_P (x))
7203 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7208 fputs (hi_reg_name[REGNO (x)], file);
7211 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7213 fputs (qi_reg_name[REGNO (x)], file);
7216 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7218 fputs (qi_high_reg_name[REGNO (x)], file);
7225 /* Locate some local-dynamic symbol still in use by this function
7226 so that we can print its name in some tls_local_dynamic_base
7230 get_some_local_dynamic_name (void)
7234 if (cfun->machine->some_ld_name)
7235 return cfun->machine->some_ld_name;
7237 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7239 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7240 return cfun->machine->some_ld_name;
7246 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7250 if (GET_CODE (x) == SYMBOL_REF
7251 && local_dynamic_symbolic_operand (x, Pmode))
7253 cfun->machine->some_ld_name = XSTR (x, 0);
7261 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7262 C -- print opcode suffix for set/cmov insn.
7263 c -- like C, but print reversed condition
7264 F,f -- likewise, but for floating-point.
7265 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7267 R -- print the prefix for register names.
7268 z -- print the opcode suffix for the size of the current operand.
7269 * -- print a star (in certain assembler syntax)
7270 A -- print an absolute memory reference.
7271 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7272 s -- print a shift double count, followed by the assemblers argument
7274 b -- print the QImode name of the register for the indicated operand.
7275 %b0 would print %al if operands[0] is reg 0.
7276 w -- likewise, print the HImode name of the register.
7277 k -- likewise, print the SImode name of the register.
7278 q -- likewise, print the DImode name of the register.
7279 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7280 y -- print "st(0)" instead of "st" as a register.
7281 D -- print condition for SSE cmp instruction.
7282 P -- if PIC, print an @PLT suffix.
7283 X -- don't print any sort of PIC '@' suffix for a symbol.
7284 & -- print some in-use local-dynamic symbol name.
7288 print_operand (FILE *file, rtx x, int code)
7295 if (ASSEMBLER_DIALECT == ASM_ATT)
7300 assemble_name (file, get_some_local_dynamic_name ());
7304 if (ASSEMBLER_DIALECT == ASM_ATT)
7306 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7308 /* Intel syntax. For absolute addresses, registers should not
7309 be surrounded by braces. */
7310 if (GET_CODE (x) != REG)
7313 PRINT_OPERAND (file, x, 0);
7321 PRINT_OPERAND (file, x, 0);
7326 if (ASSEMBLER_DIALECT == ASM_ATT)
7331 if (ASSEMBLER_DIALECT == ASM_ATT)
7336 if (ASSEMBLER_DIALECT == ASM_ATT)
7341 if (ASSEMBLER_DIALECT == ASM_ATT)
7346 if (ASSEMBLER_DIALECT == ASM_ATT)
7351 if (ASSEMBLER_DIALECT == ASM_ATT)
7356 /* 387 opcodes don't get size suffixes if the operands are
7358 if (STACK_REG_P (x))
7361 /* Likewise if using Intel opcodes. */
7362 if (ASSEMBLER_DIALECT == ASM_INTEL)
7365 /* This is the size of op from size of operand. */
7366 switch (GET_MODE_SIZE (GET_MODE (x)))
7369 #ifdef HAVE_GAS_FILDS_FISTS
7375 if (GET_MODE (x) == SFmode)
7390 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7392 #ifdef GAS_MNEMONICS
7418 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7420 PRINT_OPERAND (file, x, 0);
7426 /* Little bit of braindamage here. The SSE compare instructions
7427 does use completely different names for the comparisons that the
7428 fp conditional moves. */
7429 switch (GET_CODE (x))
7444 fputs ("unord", file);
7448 fputs ("neq", file);
7452 fputs ("nlt", file);
7456 fputs ("nle", file);
7459 fputs ("ord", file);
7467 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7468 if (ASSEMBLER_DIALECT == ASM_ATT)
7470 switch (GET_MODE (x))
7472 case HImode: putc ('w', file); break;
7474 case SFmode: putc ('l', file); break;
7476 case DFmode: putc ('q', file); break;
7484 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7487 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7488 if (ASSEMBLER_DIALECT == ASM_ATT)
7491 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7494 /* Like above, but reverse condition */
7496 /* Check to see if argument to %c is really a constant
7497 and not a condition code which needs to be reversed. */
7498 if (!COMPARISON_P (x))
7500 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7503 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7506 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7507 if (ASSEMBLER_DIALECT == ASM_ATT)
7510 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7516 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7519 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7522 int pred_val = INTVAL (XEXP (x, 0));
7524 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7525 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7527 int taken = pred_val > REG_BR_PROB_BASE / 2;
7528 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7530 /* Emit hints only in the case default branch prediction
7531 heuristics would fail. */
7532 if (taken != cputaken)
7534 /* We use 3e (DS) prefix for taken branches and
7535 2e (CS) prefix for not taken branches. */
7537 fputs ("ds ; ", file);
7539 fputs ("cs ; ", file);
7546 output_operand_lossage ("invalid operand code `%c'", code);
7550 if (GET_CODE (x) == REG)
7551 print_reg (x, code, file);
7553 else if (GET_CODE (x) == MEM)
7555 /* No `byte ptr' prefix for call instructions. */
7556 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7559 switch (GET_MODE_SIZE (GET_MODE (x)))
7561 case 1: size = "BYTE"; break;
7562 case 2: size = "WORD"; break;
7563 case 4: size = "DWORD"; break;
7564 case 8: size = "QWORD"; break;
7565 case 12: size = "XWORD"; break;
7566 case 16: size = "XMMWORD"; break;
7571 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7574 else if (code == 'w')
7576 else if (code == 'k')
7580 fputs (" PTR ", file);
7584 /* Avoid (%rip) for call operands. */
7585 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7586 && GET_CODE (x) != CONST_INT)
7587 output_addr_const (file, x);
7588 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7589 output_operand_lossage ("invalid constraints for operand");
7594 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7599 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7600 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7602 if (ASSEMBLER_DIALECT == ASM_ATT)
7604 fprintf (file, "0x%08lx", l);
7607 /* These float cases don't actually occur as immediate operands. */
7608 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7612 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7613 fprintf (file, "%s", dstr);
7616 else if (GET_CODE (x) == CONST_DOUBLE
7617 && GET_MODE (x) == XFmode)
7621 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7622 fprintf (file, "%s", dstr);
7629 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7631 if (ASSEMBLER_DIALECT == ASM_ATT)
7634 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7635 || GET_CODE (x) == LABEL_REF)
7637 if (ASSEMBLER_DIALECT == ASM_ATT)
7640 fputs ("OFFSET FLAT:", file);
7643 if (GET_CODE (x) == CONST_INT)
7644 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7646 output_pic_addr_const (file, x, code);
7648 output_addr_const (file, x);
7652 /* Print a memory operand whose address is ADDR. */
7655 print_operand_address (FILE *file, rtx addr)
7657 struct ix86_address parts;
7658 rtx base, index, disp;
7661 if (! ix86_decompose_address (addr, &parts))
7665 index = parts.index;
7667 scale = parts.scale;
7675 if (USER_LABEL_PREFIX[0] == 0)
7677 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7683 if (!base && !index)
7685 /* Displacement only requires special attention. */
7687 if (GET_CODE (disp) == CONST_INT)
7689 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7691 if (USER_LABEL_PREFIX[0] == 0)
7693 fputs ("ds:", file);
7695 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7698 output_pic_addr_const (file, disp, 0);
7700 output_addr_const (file, disp);
7702 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7704 && ((GET_CODE (disp) == SYMBOL_REF
7705 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7706 || GET_CODE (disp) == LABEL_REF
7707 || (GET_CODE (disp) == CONST
7708 && GET_CODE (XEXP (disp, 0)) == PLUS
7709 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7710 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7711 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7712 fputs ("(%rip)", file);
7716 if (ASSEMBLER_DIALECT == ASM_ATT)
7721 output_pic_addr_const (file, disp, 0);
7722 else if (GET_CODE (disp) == LABEL_REF)
7723 output_asm_label (disp);
7725 output_addr_const (file, disp);
7730 print_reg (base, 0, file);
7734 print_reg (index, 0, file);
7736 fprintf (file, ",%d", scale);
7742 rtx offset = NULL_RTX;
7746 /* Pull out the offset of a symbol; print any symbol itself. */
7747 if (GET_CODE (disp) == CONST
7748 && GET_CODE (XEXP (disp, 0)) == PLUS
7749 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7751 offset = XEXP (XEXP (disp, 0), 1);
7752 disp = gen_rtx_CONST (VOIDmode,
7753 XEXP (XEXP (disp, 0), 0));
7757 output_pic_addr_const (file, disp, 0);
7758 else if (GET_CODE (disp) == LABEL_REF)
7759 output_asm_label (disp);
7760 else if (GET_CODE (disp) == CONST_INT)
7763 output_addr_const (file, disp);
7769 print_reg (base, 0, file);
7772 if (INTVAL (offset) >= 0)
7774 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7778 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7785 print_reg (index, 0, file);
7787 fprintf (file, "*%d", scale);
7795 output_addr_const_extra (FILE *file, rtx x)
7799 if (GET_CODE (x) != UNSPEC)
7802 op = XVECEXP (x, 0, 0);
7803 switch (XINT (x, 1))
7805 case UNSPEC_GOTTPOFF:
7806 output_addr_const (file, op);
7807 /* FIXME: This might be @TPOFF in Sun ld. */
7808 fputs ("@GOTTPOFF", file);
7811 output_addr_const (file, op);
7812 fputs ("@TPOFF", file);
7815 output_addr_const (file, op);
7817 fputs ("@TPOFF", file);
7819 fputs ("@NTPOFF", file);
7822 output_addr_const (file, op);
7823 fputs ("@DTPOFF", file);
7825 case UNSPEC_GOTNTPOFF:
7826 output_addr_const (file, op);
7828 fputs ("@GOTTPOFF(%rip)", file);
7830 fputs ("@GOTNTPOFF", file);
7832 case UNSPEC_INDNTPOFF:
7833 output_addr_const (file, op);
7834 fputs ("@INDNTPOFF", file);
7844 /* Split one or more DImode RTL references into pairs of SImode
7845 references. The RTL can be REG, offsettable MEM, integer constant, or
7846 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7847 split and "num" is its length. lo_half and hi_half are output arrays
7848 that parallel "operands". */
7851 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7855 rtx op = operands[num];
7857 /* simplify_subreg refuse to split volatile memory addresses,
7858 but we still have to handle it. */
7859 if (GET_CODE (op) == MEM)
7861 lo_half[num] = adjust_address (op, SImode, 0);
7862 hi_half[num] = adjust_address (op, SImode, 4);
7866 lo_half[num] = simplify_gen_subreg (SImode, op,
7867 GET_MODE (op) == VOIDmode
7868 ? DImode : GET_MODE (op), 0);
7869 hi_half[num] = simplify_gen_subreg (SImode, op,
7870 GET_MODE (op) == VOIDmode
7871 ? DImode : GET_MODE (op), 4);
7875 /* Split one or more TImode RTL references into pairs of SImode
7876 references. The RTL can be REG, offsettable MEM, integer constant, or
7877 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7878 split and "num" is its length. lo_half and hi_half are output arrays
7879 that parallel "operands". */
7882 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7886 rtx op = operands[num];
7888 /* simplify_subreg refuse to split volatile memory addresses, but we
7889 still have to handle it. */
7890 if (GET_CODE (op) == MEM)
7892 lo_half[num] = adjust_address (op, DImode, 0);
7893 hi_half[num] = adjust_address (op, DImode, 8);
7897 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7898 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7903 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7904 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7905 is the expression of the binary operation. The output may either be
7906 emitted here, or returned to the caller, like all output_* functions.
7908 There is no guarantee that the operands are the same mode, as they
7909 might be within FLOAT or FLOAT_EXTEND expressions. */
7911 #ifndef SYSV386_COMPAT
7912 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7913 wants to fix the assemblers because that causes incompatibility
7914 with gcc. No-one wants to fix gcc because that causes
7915 incompatibility with assemblers... You can use the option of
7916 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7917 #define SYSV386_COMPAT 1
7921 output_387_binary_op (rtx insn, rtx *operands)
7923 static char buf[30];
7926 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7928 #ifdef ENABLE_CHECKING
7929 /* Even if we do not want to check the inputs, this documents input
7930 constraints. Which helps in understanding the following code. */
7931 if (STACK_REG_P (operands[0])
7932 && ((REG_P (operands[1])
7933 && REGNO (operands[0]) == REGNO (operands[1])
7934 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7935 || (REG_P (operands[2])
7936 && REGNO (operands[0]) == REGNO (operands[2])
7937 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7938 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7944 switch (GET_CODE (operands[3]))
7947 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7948 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7956 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7957 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7965 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7966 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7974 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7975 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7989 if (GET_MODE (operands[0]) == SFmode)
7990 strcat (buf, "ss\t{%2, %0|%0, %2}");
7992 strcat (buf, "sd\t{%2, %0|%0, %2}");
7997 switch (GET_CODE (operands[3]))
8001 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8003 rtx temp = operands[2];
8004 operands[2] = operands[1];
8008 /* know operands[0] == operands[1]. */
8010 if (GET_CODE (operands[2]) == MEM)
8016 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8018 if (STACK_TOP_P (operands[0]))
8019 /* How is it that we are storing to a dead operand[2]?
8020 Well, presumably operands[1] is dead too. We can't
8021 store the result to st(0) as st(0) gets popped on this
8022 instruction. Instead store to operands[2] (which I
8023 think has to be st(1)). st(1) will be popped later.
8024 gcc <= 2.8.1 didn't have this check and generated
8025 assembly code that the Unixware assembler rejected. */
8026 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8028 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8032 if (STACK_TOP_P (operands[0]))
8033 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8035 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8040 if (GET_CODE (operands[1]) == MEM)
8046 if (GET_CODE (operands[2]) == MEM)
8052 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8055 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8056 derived assemblers, confusingly reverse the direction of
8057 the operation for fsub{r} and fdiv{r} when the
8058 destination register is not st(0). The Intel assembler
8059 doesn't have this brain damage. Read !SYSV386_COMPAT to
8060 figure out what the hardware really does. */
8061 if (STACK_TOP_P (operands[0]))
8062 p = "{p\t%0, %2|rp\t%2, %0}";
8064 p = "{rp\t%2, %0|p\t%0, %2}";
8066 if (STACK_TOP_P (operands[0]))
8067 /* As above for fmul/fadd, we can't store to st(0). */
8068 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8070 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8075 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8078 if (STACK_TOP_P (operands[0]))
8079 p = "{rp\t%0, %1|p\t%1, %0}";
8081 p = "{p\t%1, %0|rp\t%0, %1}";
8083 if (STACK_TOP_P (operands[0]))
8084 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8086 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8091 if (STACK_TOP_P (operands[0]))
8093 if (STACK_TOP_P (operands[1]))
8094 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8096 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8099 else if (STACK_TOP_P (operands[1]))
8102 p = "{\t%1, %0|r\t%0, %1}";
8104 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8110 p = "{r\t%2, %0|\t%0, %2}";
8112 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8125 /* Output code to initialize control word copies used by
8126 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8127 is set to control word rounding downwards. */
8129 emit_i387_cw_initialization (rtx normal, rtx round_down)
8131 rtx reg = gen_reg_rtx (HImode);
8133 emit_insn (gen_x86_fnstcw_1 (normal));
8134 emit_move_insn (reg, normal);
8135 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8137 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8139 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8140 emit_move_insn (round_down, reg);
8143 /* Output code for INSN to convert a float to a signed int. OPERANDS
8144 are the insn operands. The output may be [HSD]Imode and the input
8145 operand may be [SDX]Fmode. */
8148 output_fix_trunc (rtx insn, rtx *operands)
8150 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8151 int dimode_p = GET_MODE (operands[0]) == DImode;
8153 /* Jump through a hoop or two for DImode, since the hardware has no
8154 non-popping instruction. We used to do this a different way, but
8155 that was somewhat fragile and broke with post-reload splitters. */
8156 if (dimode_p && !stack_top_dies)
8157 output_asm_insn ("fld\t%y1", operands);
8159 if (!STACK_TOP_P (operands[1]))
8162 if (GET_CODE (operands[0]) != MEM)
8165 output_asm_insn ("fldcw\t%3", operands);
8166 if (stack_top_dies || dimode_p)
8167 output_asm_insn ("fistp%z0\t%0", operands);
8169 output_asm_insn ("fist%z0\t%0", operands);
8170 output_asm_insn ("fldcw\t%2", operands);
8175 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8176 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8177 when fucom should be used. */
8180 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8183 rtx cmp_op0 = operands[0];
8184 rtx cmp_op1 = operands[1];
8185 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8190 cmp_op1 = operands[2];
8194 if (GET_MODE (operands[0]) == SFmode)
8196 return "ucomiss\t{%1, %0|%0, %1}";
8198 return "comiss\t{%1, %0|%0, %1}";
8201 return "ucomisd\t{%1, %0|%0, %1}";
8203 return "comisd\t{%1, %0|%0, %1}";
8206 if (! STACK_TOP_P (cmp_op0))
8209 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8211 if (STACK_REG_P (cmp_op1)
8213 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8214 && REGNO (cmp_op1) != FIRST_STACK_REG)
8216 /* If both the top of the 387 stack dies, and the other operand
8217 is also a stack register that dies, then this must be a
8218 `fcompp' float compare */
8222 /* There is no double popping fcomi variant. Fortunately,
8223 eflags is immune from the fstp's cc clobbering. */
8225 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8227 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8235 return "fucompp\n\tfnstsw\t%0";
8237 return "fcompp\n\tfnstsw\t%0";
8250 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8252 static const char * const alt[24] =
8264 "fcomi\t{%y1, %0|%0, %y1}",
8265 "fcomip\t{%y1, %0|%0, %y1}",
8266 "fucomi\t{%y1, %0|%0, %y1}",
8267 "fucomip\t{%y1, %0|%0, %y1}",
8274 "fcom%z2\t%y2\n\tfnstsw\t%0",
8275 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8276 "fucom%z2\t%y2\n\tfnstsw\t%0",
8277 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8279 "ficom%z2\t%y2\n\tfnstsw\t%0",
8280 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8288 mask = eflags_p << 3;
8289 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8290 mask |= unordered_p << 1;
8291 mask |= stack_top_dies;
8304 ix86_output_addr_vec_elt (FILE *file, int value)
8306 const char *directive = ASM_LONG;
8311 directive = ASM_QUAD;
8317 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8321 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8324 fprintf (file, "%s%s%d-%s%d\n",
8325 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8326 else if (HAVE_AS_GOTOFF_IN_DATA)
8327 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8329 else if (TARGET_MACHO)
8331 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8332 machopic_output_function_base_name (file);
8333 fprintf(file, "\n");
8337 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8338 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8341 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8345 ix86_expand_clear (rtx dest)
8349 /* We play register width games, which are only valid after reload. */
8350 if (!reload_completed)
8353 /* Avoid HImode and its attendant prefix byte. */
8354 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8355 dest = gen_rtx_REG (SImode, REGNO (dest));
8357 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8359 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8360 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8362 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8363 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8369 /* X is an unchanging MEM. If it is a constant pool reference, return
8370 the constant pool rtx, else NULL. */
8373 maybe_get_pool_constant (rtx x)
8375 x = ix86_delegitimize_address (XEXP (x, 0));
8377 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8378 return get_pool_constant (x);
8384 ix86_expand_move (enum machine_mode mode, rtx operands[])
8386 int strict = (reload_in_progress || reload_completed);
8388 enum tls_model model;
8393 model = tls_symbolic_operand (op1, Pmode);
8396 op1 = legitimize_tls_address (op1, model, true);
8397 op1 = force_operand (op1, op0);
8402 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8407 rtx temp = ((reload_in_progress
8408 || ((op0 && GET_CODE (op0) == REG)
8410 ? op0 : gen_reg_rtx (Pmode));
8411 op1 = machopic_indirect_data_reference (op1, temp);
8412 op1 = machopic_legitimize_pic_address (op1, mode,
8413 temp == op1 ? 0 : temp);
8415 else if (MACHOPIC_INDIRECT)
8416 op1 = machopic_indirect_data_reference (op1, 0);
8420 if (GET_CODE (op0) == MEM)
8421 op1 = force_reg (Pmode, op1);
8423 op1 = legitimize_address (op1, op1, Pmode);
8424 #endif /* TARGET_MACHO */
8428 if (GET_CODE (op0) == MEM
8429 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8430 || !push_operand (op0, mode))
8431 && GET_CODE (op1) == MEM)
8432 op1 = force_reg (mode, op1);
8434 if (push_operand (op0, mode)
8435 && ! general_no_elim_operand (op1, mode))
8436 op1 = copy_to_mode_reg (mode, op1);
8438 /* Force large constants in 64bit compilation into register
8439 to get them CSEed. */
8440 if (TARGET_64BIT && mode == DImode
8441 && immediate_operand (op1, mode)
8442 && !x86_64_zero_extended_value (op1)
8443 && !register_operand (op0, mode)
8444 && optimize && !reload_completed && !reload_in_progress)
8445 op1 = copy_to_mode_reg (mode, op1);
8447 if (FLOAT_MODE_P (mode))
8449 /* If we are loading a floating point constant to a register,
8450 force the value to memory now, since we'll get better code
8451 out the back end. */
8455 else if (GET_CODE (op1) == CONST_DOUBLE)
8457 op1 = validize_mem (force_const_mem (mode, op1));
8458 if (!register_operand (op0, mode))
8460 rtx temp = gen_reg_rtx (mode);
8461 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8462 emit_move_insn (op0, temp);
8469 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8473 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8475 /* Force constants other than zero into memory. We do not know how
8476 the instructions used to build constants modify the upper 64 bits
8477 of the register, once we have that information we may be able
8478 to handle some of them more efficiently. */
8479 if ((reload_in_progress | reload_completed) == 0
8480 && register_operand (operands[0], mode)
8481 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8482 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8484 /* Make operand1 a register if it isn't already. */
8486 && !register_operand (operands[0], mode)
8487 && !register_operand (operands[1], mode))
8489 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8490 emit_move_insn (operands[0], temp);
8494 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8497 /* Attempt to expand a binary operator. Make the expansion closer to the
8498 actual machine, then just general_operand, which will allow 3 separate
8499 memory references (one output, two input) in a single insn. */
8502 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8505 int matching_memory;
8506 rtx src1, src2, dst, op, clob;
8512 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8513 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8514 && (rtx_equal_p (dst, src2)
8515 || immediate_operand (src1, mode)))
8522 /* If the destination is memory, and we do not have matching source
8523 operands, do things in registers. */
8524 matching_memory = 0;
8525 if (GET_CODE (dst) == MEM)
8527 if (rtx_equal_p (dst, src1))
8528 matching_memory = 1;
8529 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8530 && rtx_equal_p (dst, src2))
8531 matching_memory = 2;
8533 dst = gen_reg_rtx (mode);
8536 /* Both source operands cannot be in memory. */
8537 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8539 if (matching_memory != 2)
8540 src2 = force_reg (mode, src2);
8542 src1 = force_reg (mode, src1);
8545 /* If the operation is not commutable, source 1 cannot be a constant
8546 or non-matching memory. */
8547 if ((CONSTANT_P (src1)
8548 || (!matching_memory && GET_CODE (src1) == MEM))
8549 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8550 src1 = force_reg (mode, src1);
8552 /* If optimizing, copy to regs to improve CSE */
8553 if (optimize && ! no_new_pseudos)
8555 if (GET_CODE (dst) == MEM)
8556 dst = gen_reg_rtx (mode);
8557 if (GET_CODE (src1) == MEM)
8558 src1 = force_reg (mode, src1);
8559 if (GET_CODE (src2) == MEM)
8560 src2 = force_reg (mode, src2);
8563 /* Emit the instruction. */
8565 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8566 if (reload_in_progress)
8568 /* Reload doesn't know about the flags register, and doesn't know that
8569 it doesn't want to clobber it. We can only do this with PLUS. */
8576 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8577 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8580 /* Fix up the destination if needed. */
8581 if (dst != operands[0])
8582 emit_move_insn (operands[0], dst);
8585 /* Return TRUE or FALSE depending on whether the binary operator meets the
8586 appropriate constraints. */
8589 ix86_binary_operator_ok (enum rtx_code code,
8590 enum machine_mode mode ATTRIBUTE_UNUSED,
8593 /* Both source operands cannot be in memory. */
8594 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8596 /* If the operation is not commutable, source 1 cannot be a constant. */
8597 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8599 /* If the destination is memory, we must have a matching source operand. */
8600 if (GET_CODE (operands[0]) == MEM
8601 && ! (rtx_equal_p (operands[0], operands[1])
8602 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8603 && rtx_equal_p (operands[0], operands[2]))))
8605 /* If the operation is not commutable and the source 1 is memory, we must
8606 have a matching destination. */
8607 if (GET_CODE (operands[1]) == MEM
8608 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8609 && ! rtx_equal_p (operands[0], operands[1]))
8614 /* Attempt to expand a unary operator. Make the expansion closer to the
8615 actual machine, then just general_operand, which will allow 2 separate
8616 memory references (one output, one input) in a single insn. */
8619 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8622 int matching_memory;
8623 rtx src, dst, op, clob;
8628 /* If the destination is memory, and we do not have matching source
8629 operands, do things in registers. */
8630 matching_memory = 0;
8631 if (GET_CODE (dst) == MEM)
8633 if (rtx_equal_p (dst, src))
8634 matching_memory = 1;
8636 dst = gen_reg_rtx (mode);
8639 /* When source operand is memory, destination must match. */
8640 if (!matching_memory && GET_CODE (src) == MEM)
8641 src = force_reg (mode, src);
8643 /* If optimizing, copy to regs to improve CSE */
8644 if (optimize && ! no_new_pseudos)
8646 if (GET_CODE (dst) == MEM)
8647 dst = gen_reg_rtx (mode);
8648 if (GET_CODE (src) == MEM)
8649 src = force_reg (mode, src);
8652 /* Emit the instruction. */
8654 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8655 if (reload_in_progress || code == NOT)
8657 /* Reload doesn't know about the flags register, and doesn't know that
8658 it doesn't want to clobber it. */
8665 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8666 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8669 /* Fix up the destination if needed. */
8670 if (dst != operands[0])
8671 emit_move_insn (operands[0], dst);
8674 /* Return TRUE or FALSE depending on whether the unary operator meets the
8675 appropriate constraints. */
8678 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8679 enum machine_mode mode ATTRIBUTE_UNUSED,
8680 rtx operands[2] ATTRIBUTE_UNUSED)
8682 /* If one of operands is memory, source and destination must match. */
8683 if ((GET_CODE (operands[0]) == MEM
8684 || GET_CODE (operands[1]) == MEM)
8685 && ! rtx_equal_p (operands[0], operands[1]))
8690 /* Return TRUE or FALSE depending on whether the first SET in INSN
8691 has source and destination with matching CC modes, and that the
8692 CC mode is at least as constrained as REQ_MODE. */
8695 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8698 enum machine_mode set_mode;
8700 set = PATTERN (insn);
8701 if (GET_CODE (set) == PARALLEL)
8702 set = XVECEXP (set, 0, 0);
8703 if (GET_CODE (set) != SET)
8705 if (GET_CODE (SET_SRC (set)) != COMPARE)
8708 set_mode = GET_MODE (SET_DEST (set));
8712 if (req_mode != CCNOmode
8713 && (req_mode != CCmode
8714 || XEXP (SET_SRC (set), 1) != const0_rtx))
8718 if (req_mode == CCGCmode)
8722 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8726 if (req_mode == CCZmode)
8736 return (GET_MODE (SET_SRC (set)) == set_mode);
8739 /* Generate insn patterns to do an integer compare of OPERANDS. */
8742 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8744 enum machine_mode cmpmode;
8747 cmpmode = SELECT_CC_MODE (code, op0, op1);
8748 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8750 /* This is very simple, but making the interface the same as in the
8751 FP case makes the rest of the code easier. */
8752 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8753 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8755 /* Return the test that should be put into the flags user, i.e.
8756 the bcc, scc, or cmov instruction. */
8757 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8760 /* Figure out whether to use ordered or unordered fp comparisons.
8761 Return the appropriate mode to use. */
8764 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8766 /* ??? In order to make all comparisons reversible, we do all comparisons
8767 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8768 all forms trapping and nontrapping comparisons, we can make inequality
8769 comparisons trapping again, since it results in better code when using
8770 FCOM based compares. */
8771 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8775 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8777 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8778 return ix86_fp_compare_mode (code);
8781 /* Only zero flag is needed. */
8783 case NE: /* ZF!=0 */
8785 /* Codes needing carry flag. */
8786 case GEU: /* CF=0 */
8787 case GTU: /* CF=0 & ZF=0 */
8788 case LTU: /* CF=1 */
8789 case LEU: /* CF=1 | ZF=1 */
8791 /* Codes possibly doable only with sign flag when
8792 comparing against zero. */
8793 case GE: /* SF=OF or SF=0 */
8794 case LT: /* SF<>OF or SF=1 */
8795 if (op1 == const0_rtx)
8798 /* For other cases Carry flag is not required. */
8800 /* Codes doable only with sign flag when comparing
8801 against zero, but we miss jump instruction for it
8802 so we need to use relational tests against overflow
8803 that thus needs to be zero. */
8804 case GT: /* ZF=0 & SF=OF */
8805 case LE: /* ZF=1 | SF<>OF */
8806 if (op1 == const0_rtx)
8810 /* strcmp pattern do (use flags) and combine may ask us for proper
8819 /* Return the fixed registers used for condition codes. */
8822 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8829 /* If two condition code modes are compatible, return a condition code
8830 mode which is compatible with both. Otherwise, return
8833 static enum machine_mode
8834 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8839 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8842 if ((m1 == CCGCmode && m2 == CCGOCmode)
8843 || (m1 == CCGOCmode && m2 == CCGCmode))
8871 /* These are only compatible with themselves, which we already
8877 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8880 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8882 enum rtx_code swapped_code = swap_condition (code);
8883 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8884 || (ix86_fp_comparison_cost (swapped_code)
8885 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8888 /* Swap, force into registers, or otherwise massage the two operands
8889 to a fp comparison. The operands are updated in place; the new
8890 comparison code is returned. */
8892 static enum rtx_code
8893 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8895 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8896 rtx op0 = *pop0, op1 = *pop1;
8897 enum machine_mode op_mode = GET_MODE (op0);
8898 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8900 /* All of the unordered compare instructions only work on registers.
8901 The same is true of the XFmode compare instructions. The same is
8902 true of the fcomi compare instructions. */
8905 && (fpcmp_mode == CCFPUmode
8906 || op_mode == XFmode
8907 || ix86_use_fcomi_compare (code)))
8909 op0 = force_reg (op_mode, op0);
8910 op1 = force_reg (op_mode, op1);
8914 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8915 things around if they appear profitable, otherwise force op0
8918 if (standard_80387_constant_p (op0) == 0
8919 || (GET_CODE (op0) == MEM
8920 && ! (standard_80387_constant_p (op1) == 0
8921 || GET_CODE (op1) == MEM)))
8924 tmp = op0, op0 = op1, op1 = tmp;
8925 code = swap_condition (code);
8928 if (GET_CODE (op0) != REG)
8929 op0 = force_reg (op_mode, op0);
8931 if (CONSTANT_P (op1))
8933 if (standard_80387_constant_p (op1))
8934 op1 = force_reg (op_mode, op1);
8936 op1 = validize_mem (force_const_mem (op_mode, op1));
8940 /* Try to rearrange the comparison to make it cheaper. */
8941 if (ix86_fp_comparison_cost (code)
8942 > ix86_fp_comparison_cost (swap_condition (code))
8943 && (GET_CODE (op1) == REG || !no_new_pseudos))
8946 tmp = op0, op0 = op1, op1 = tmp;
8947 code = swap_condition (code);
8948 if (GET_CODE (op0) != REG)
8949 op0 = force_reg (op_mode, op0);
8957 /* Convert comparison codes we use to represent FP comparison to integer
8958 code that will result in proper branch. Return UNKNOWN if no such code
8960 static enum rtx_code
8961 ix86_fp_compare_code_to_integer (enum rtx_code code)
8990 /* Split comparison code CODE into comparisons we can do using branch
8991 instructions. BYPASS_CODE is comparison code for branch that will
8992 branch around FIRST_CODE and SECOND_CODE. If some of branches
8993 is not required, set value to NIL.
8994 We never require more than two branches. */
8996 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8997 enum rtx_code *first_code,
8998 enum rtx_code *second_code)
9004 /* The fcomi comparison sets flags as follows:
9014 case GT: /* GTU - CF=0 & ZF=0 */
9015 case GE: /* GEU - CF=0 */
9016 case ORDERED: /* PF=0 */
9017 case UNORDERED: /* PF=1 */
9018 case UNEQ: /* EQ - ZF=1 */
9019 case UNLT: /* LTU - CF=1 */
9020 case UNLE: /* LEU - CF=1 | ZF=1 */
9021 case LTGT: /* EQ - ZF=0 */
9023 case LT: /* LTU - CF=1 - fails on unordered */
9025 *bypass_code = UNORDERED;
9027 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9029 *bypass_code = UNORDERED;
9031 case EQ: /* EQ - ZF=1 - fails on unordered */
9033 *bypass_code = UNORDERED;
9035 case NE: /* NE - ZF=0 - fails on unordered */
9037 *second_code = UNORDERED;
9039 case UNGE: /* GEU - CF=0 - fails on unordered */
9041 *second_code = UNORDERED;
9043 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9045 *second_code = UNORDERED;
9050 if (!TARGET_IEEE_FP)
9057 /* Return cost of comparison done fcom + arithmetics operations on AX.
9058 All following functions do use number of instructions as a cost metrics.
9059 In future this should be tweaked to compute bytes for optimize_size and
9060 take into account performance of various instructions on various CPUs. */
9062 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9064 if (!TARGET_IEEE_FP)
9066 /* The cost of code output by ix86_expand_fp_compare. */
9094 /* Return cost of comparison done using fcomi operation.
9095 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9097 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9099 enum rtx_code bypass_code, first_code, second_code;
9100 /* Return arbitrarily high cost when instruction is not supported - this
9101 prevents gcc from using it. */
9104 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9105 return (bypass_code != NIL || second_code != NIL) + 2;
9108 /* Return cost of comparison done using sahf operation.
9109 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9111 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9113 enum rtx_code bypass_code, first_code, second_code;
9114 /* Return arbitrarily high cost when instruction is not preferred - this
9115 avoids gcc from using it. */
9116 if (!TARGET_USE_SAHF && !optimize_size)
9118 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9119 return (bypass_code != NIL || second_code != NIL) + 3;
9122 /* Compute cost of the comparison done using any method.
9123 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9125 ix86_fp_comparison_cost (enum rtx_code code)
9127 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9130 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9131 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9133 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9134 if (min > sahf_cost)
9136 if (min > fcomi_cost)
9141 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9144 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9145 rtx *second_test, rtx *bypass_test)
9147 enum machine_mode fpcmp_mode, intcmp_mode;
9149 int cost = ix86_fp_comparison_cost (code);
9150 enum rtx_code bypass_code, first_code, second_code;
9152 fpcmp_mode = ix86_fp_compare_mode (code);
9153 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9156 *second_test = NULL_RTX;
9158 *bypass_test = NULL_RTX;
9160 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9162 /* Do fcomi/sahf based test when profitable. */
9163 if ((bypass_code == NIL || bypass_test)
9164 && (second_code == NIL || second_test)
9165 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9169 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9170 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9176 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9177 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9179 scratch = gen_reg_rtx (HImode);
9180 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9181 emit_insn (gen_x86_sahf_1 (scratch));
9184 /* The FP codes work out to act like unsigned. */
9185 intcmp_mode = fpcmp_mode;
9187 if (bypass_code != NIL)
9188 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9189 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9191 if (second_code != NIL)
9192 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9193 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9198 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9199 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9200 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9202 scratch = gen_reg_rtx (HImode);
9203 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9205 /* In the unordered case, we have to check C2 for NaN's, which
9206 doesn't happen to work out to anything nice combination-wise.
9207 So do some bit twiddling on the value we've got in AH to come
9208 up with an appropriate set of condition codes. */
9210 intcmp_mode = CCNOmode;
9215 if (code == GT || !TARGET_IEEE_FP)
9217 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9222 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9223 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9224 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9225 intcmp_mode = CCmode;
9231 if (code == LT && TARGET_IEEE_FP)
9233 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9234 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9235 intcmp_mode = CCmode;
9240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9246 if (code == GE || !TARGET_IEEE_FP)
9248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9253 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9254 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9261 if (code == LE && TARGET_IEEE_FP)
9263 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9264 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9265 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9266 intcmp_mode = CCmode;
9271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9277 if (code == EQ && TARGET_IEEE_FP)
9279 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9280 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9281 intcmp_mode = CCmode;
9286 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9293 if (code == NE && TARGET_IEEE_FP)
9295 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9296 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9302 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9312 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9321 /* Return the test that should be put into the flags user, i.e.
9322 the bcc, scc, or cmov instruction. */
9323 return gen_rtx_fmt_ee (code, VOIDmode,
9324 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9329 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9332 op0 = ix86_compare_op0;
9333 op1 = ix86_compare_op1;
9336 *second_test = NULL_RTX;
9338 *bypass_test = NULL_RTX;
9340 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9341 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9342 second_test, bypass_test);
9344 ret = ix86_expand_int_compare (code, op0, op1);
9349 /* Return true if the CODE will result in nontrivial jump sequence. */
9351 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9353 enum rtx_code bypass_code, first_code, second_code;
9356 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9357 return bypass_code != NIL || second_code != NIL;
9361 ix86_expand_branch (enum rtx_code code, rtx label)
9365 switch (GET_MODE (ix86_compare_op0))
9371 tmp = ix86_expand_compare (code, NULL, NULL);
9372 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9373 gen_rtx_LABEL_REF (VOIDmode, label),
9375 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9384 enum rtx_code bypass_code, first_code, second_code;
9386 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9389 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9391 /* Check whether we will use the natural sequence with one jump. If
9392 so, we can expand jump early. Otherwise delay expansion by
9393 creating compound insn to not confuse optimizers. */
9394 if (bypass_code == NIL && second_code == NIL
9397 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9398 gen_rtx_LABEL_REF (VOIDmode, label),
9403 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9404 ix86_compare_op0, ix86_compare_op1);
9405 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9406 gen_rtx_LABEL_REF (VOIDmode, label),
9408 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9410 use_fcomi = ix86_use_fcomi_compare (code);
9411 vec = rtvec_alloc (3 + !use_fcomi);
9412 RTVEC_ELT (vec, 0) = tmp;
9414 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9416 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9419 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9421 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9429 /* Expand DImode branch into multiple compare+branch. */
9431 rtx lo[2], hi[2], label2;
9432 enum rtx_code code1, code2, code3;
9434 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9436 tmp = ix86_compare_op0;
9437 ix86_compare_op0 = ix86_compare_op1;
9438 ix86_compare_op1 = tmp;
9439 code = swap_condition (code);
9441 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9442 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9444 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9445 avoid two branches. This costs one extra insn, so disable when
9446 optimizing for size. */
9448 if ((code == EQ || code == NE)
9450 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9455 if (hi[1] != const0_rtx)
9456 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9457 NULL_RTX, 0, OPTAB_WIDEN);
9460 if (lo[1] != const0_rtx)
9461 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9462 NULL_RTX, 0, OPTAB_WIDEN);
9464 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9465 NULL_RTX, 0, OPTAB_WIDEN);
9467 ix86_compare_op0 = tmp;
9468 ix86_compare_op1 = const0_rtx;
9469 ix86_expand_branch (code, label);
9473 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9474 op1 is a constant and the low word is zero, then we can just
9475 examine the high word. */
9477 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9480 case LT: case LTU: case GE: case GEU:
9481 ix86_compare_op0 = hi[0];
9482 ix86_compare_op1 = hi[1];
9483 ix86_expand_branch (code, label);
9489 /* Otherwise, we need two or three jumps. */
9491 label2 = gen_label_rtx ();
9494 code2 = swap_condition (code);
9495 code3 = unsigned_condition (code);
9499 case LT: case GT: case LTU: case GTU:
9502 case LE: code1 = LT; code2 = GT; break;
9503 case GE: code1 = GT; code2 = LT; break;
9504 case LEU: code1 = LTU; code2 = GTU; break;
9505 case GEU: code1 = GTU; code2 = LTU; break;
9507 case EQ: code1 = NIL; code2 = NE; break;
9508 case NE: code2 = NIL; break;
9516 * if (hi(a) < hi(b)) goto true;
9517 * if (hi(a) > hi(b)) goto false;
9518 * if (lo(a) < lo(b)) goto true;
9522 ix86_compare_op0 = hi[0];
9523 ix86_compare_op1 = hi[1];
9526 ix86_expand_branch (code1, label);
9528 ix86_expand_branch (code2, label2);
9530 ix86_compare_op0 = lo[0];
9531 ix86_compare_op1 = lo[1];
9532 ix86_expand_branch (code3, label);
9535 emit_label (label2);
9544 /* Split branch based on floating point condition. */
9546 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9547 rtx target1, rtx target2, rtx tmp)
9550 rtx label = NULL_RTX;
9552 int bypass_probability = -1, second_probability = -1, probability = -1;
9555 if (target2 != pc_rtx)
9558 code = reverse_condition_maybe_unordered (code);
9563 condition = ix86_expand_fp_compare (code, op1, op2,
9564 tmp, &second, &bypass);
9566 if (split_branch_probability >= 0)
9568 /* Distribute the probabilities across the jumps.
9569 Assume the BYPASS and SECOND to be always test
9571 probability = split_branch_probability;
9573 /* Value of 1 is low enough to make no need for probability
9574 to be updated. Later we may run some experiments and see
9575 if unordered values are more frequent in practice. */
9577 bypass_probability = 1;
9579 second_probability = 1;
9581 if (bypass != NULL_RTX)
9583 label = gen_label_rtx ();
9584 i = emit_jump_insn (gen_rtx_SET
9586 gen_rtx_IF_THEN_ELSE (VOIDmode,
9588 gen_rtx_LABEL_REF (VOIDmode,
9591 if (bypass_probability >= 0)
9593 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9594 GEN_INT (bypass_probability),
9597 i = emit_jump_insn (gen_rtx_SET
9599 gen_rtx_IF_THEN_ELSE (VOIDmode,
9600 condition, target1, target2)));
9601 if (probability >= 0)
9603 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9604 GEN_INT (probability),
9606 if (second != NULL_RTX)
9608 i = emit_jump_insn (gen_rtx_SET
9610 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9612 if (second_probability >= 0)
9614 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9615 GEN_INT (second_probability),
9618 if (label != NULL_RTX)
9623 ix86_expand_setcc (enum rtx_code code, rtx dest)
9625 rtx ret, tmp, tmpreg, equiv;
9626 rtx second_test, bypass_test;
9628 if (GET_MODE (ix86_compare_op0) == DImode
9630 return 0; /* FAIL */
9632 if (GET_MODE (dest) != QImode)
9635 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9636 PUT_MODE (ret, QImode);
9641 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9642 if (bypass_test || second_test)
9644 rtx test = second_test;
9646 rtx tmp2 = gen_reg_rtx (QImode);
9653 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9655 PUT_MODE (test, QImode);
9656 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9659 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9661 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9664 /* Attach a REG_EQUAL note describing the comparison result. */
9665 equiv = simplify_gen_relational (code, QImode,
9666 GET_MODE (ix86_compare_op0),
9667 ix86_compare_op0, ix86_compare_op1);
9668 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9670 return 1; /* DONE */
9673 /* Expand comparison setting or clearing carry flag. Return true when
9674 successful and set pop for the operation. */
9676 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9678 enum machine_mode mode =
9679 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9681 /* Do not handle DImode compares that go trought special path. Also we can't
9682 deal with FP compares yet. This is possible to add. */
9683 if ((mode == DImode && !TARGET_64BIT))
9685 if (FLOAT_MODE_P (mode))
9687 rtx second_test = NULL, bypass_test = NULL;
9688 rtx compare_op, compare_seq;
9690 /* Shortcut: following common codes never translate into carry flag compares. */
9691 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9692 || code == ORDERED || code == UNORDERED)
9695 /* These comparisons require zero flag; swap operands so they won't. */
9696 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9702 code = swap_condition (code);
9705 /* Try to expand the comparison and verify that we end up with carry flag
9706 based comparison. This is fails to be true only when we decide to expand
9707 comparison using arithmetic that is not too common scenario. */
9709 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9710 &second_test, &bypass_test);
9711 compare_seq = get_insns ();
9714 if (second_test || bypass_test)
9716 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9717 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9718 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9720 code = GET_CODE (compare_op);
9721 if (code != LTU && code != GEU)
9723 emit_insn (compare_seq);
9727 if (!INTEGRAL_MODE_P (mode))
9735 /* Convert a==0 into (unsigned)a<1. */
9738 if (op1 != const0_rtx)
9741 code = (code == EQ ? LTU : GEU);
9744 /* Convert a>b into b<a or a>=b-1. */
9747 if (GET_CODE (op1) == CONST_INT)
9749 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9750 /* Bail out on overflow. We still can swap operands but that
9751 would force loading of the constant into register. */
9752 if (op1 == const0_rtx
9753 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9755 code = (code == GTU ? GEU : LTU);
9762 code = (code == GTU ? LTU : GEU);
9766 /* Convert a>=0 into (unsigned)a<0x80000000. */
9769 if (mode == DImode || op1 != const0_rtx)
9771 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9772 code = (code == LT ? GEU : LTU);
9776 if (mode == DImode || op1 != constm1_rtx)
9778 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9779 code = (code == LE ? GEU : LTU);
9785 /* Swapping operands may cause constant to appear as first operand. */
9786 if (!nonimmediate_operand (op0, VOIDmode))
9790 op0 = force_reg (mode, op0);
9792 ix86_compare_op0 = op0;
9793 ix86_compare_op1 = op1;
9794 *pop = ix86_expand_compare (code, NULL, NULL);
9795 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9801 ix86_expand_int_movcc (rtx operands[])
9803 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9804 rtx compare_seq, compare_op;
9805 rtx second_test, bypass_test;
9806 enum machine_mode mode = GET_MODE (operands[0]);
9807 bool sign_bit_compare_p = false;;
9810 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9811 compare_seq = get_insns ();
9814 compare_code = GET_CODE (compare_op);
9816 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9817 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9818 sign_bit_compare_p = true;
9820 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9821 HImode insns, we'd be swallowed in word prefix ops. */
9823 if ((mode != HImode || TARGET_FAST_PREFIX)
9824 && (mode != DImode || TARGET_64BIT)
9825 && GET_CODE (operands[2]) == CONST_INT
9826 && GET_CODE (operands[3]) == CONST_INT)
9828 rtx out = operands[0];
9829 HOST_WIDE_INT ct = INTVAL (operands[2]);
9830 HOST_WIDE_INT cf = INTVAL (operands[3]);
9834 /* Sign bit compares are better done using shifts than we do by using
9836 if (sign_bit_compare_p
9837 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9838 ix86_compare_op1, &compare_op))
9840 /* Detect overlap between destination and compare sources. */
9843 if (!sign_bit_compare_p)
9847 compare_code = GET_CODE (compare_op);
9849 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9850 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9853 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9856 /* To simplify rest of code, restrict to the GEU case. */
9857 if (compare_code == LTU)
9859 HOST_WIDE_INT tmp = ct;
9862 compare_code = reverse_condition (compare_code);
9863 code = reverse_condition (code);
9868 PUT_CODE (compare_op,
9869 reverse_condition_maybe_unordered
9870 (GET_CODE (compare_op)));
9872 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9876 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9877 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9878 tmp = gen_reg_rtx (mode);
9881 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9883 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9887 if (code == GT || code == GE)
9888 code = reverse_condition (code);
9891 HOST_WIDE_INT tmp = ct;
9896 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9897 ix86_compare_op1, VOIDmode, 0, -1);
9910 tmp = expand_simple_binop (mode, PLUS,
9912 copy_rtx (tmp), 1, OPTAB_DIRECT);
9923 tmp = expand_simple_binop (mode, IOR,
9925 copy_rtx (tmp), 1, OPTAB_DIRECT);
9927 else if (diff == -1 && ct)
9937 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9939 tmp = expand_simple_binop (mode, PLUS,
9940 copy_rtx (tmp), GEN_INT (cf),
9941 copy_rtx (tmp), 1, OPTAB_DIRECT);
9949 * andl cf - ct, dest
9959 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9962 tmp = expand_simple_binop (mode, AND,
9964 gen_int_mode (cf - ct, mode),
9965 copy_rtx (tmp), 1, OPTAB_DIRECT);
9967 tmp = expand_simple_binop (mode, PLUS,
9968 copy_rtx (tmp), GEN_INT (ct),
9969 copy_rtx (tmp), 1, OPTAB_DIRECT);
9972 if (!rtx_equal_p (tmp, out))
9973 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9975 return 1; /* DONE */
9981 tmp = ct, ct = cf, cf = tmp;
9983 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9985 /* We may be reversing unordered compare to normal compare, that
9986 is not valid in general (we may convert non-trapping condition
9987 to trapping one), however on i386 we currently emit all
9988 comparisons unordered. */
9989 compare_code = reverse_condition_maybe_unordered (compare_code);
9990 code = reverse_condition_maybe_unordered (code);
9994 compare_code = reverse_condition (compare_code);
9995 code = reverse_condition (code);
10000 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10001 && GET_CODE (ix86_compare_op1) == CONST_INT)
10003 if (ix86_compare_op1 == const0_rtx
10004 && (code == LT || code == GE))
10005 compare_code = code;
10006 else if (ix86_compare_op1 == constm1_rtx)
10010 else if (code == GT)
10015 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10016 if (compare_code != NIL
10017 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10018 && (cf == -1 || ct == -1))
10020 /* If lea code below could be used, only optimize
10021 if it results in a 2 insn sequence. */
10023 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10024 || diff == 3 || diff == 5 || diff == 9)
10025 || (compare_code == LT && ct == -1)
10026 || (compare_code == GE && cf == -1))
10029 * notl op1 (if necessary)
10037 code = reverse_condition (code);
10040 out = emit_store_flag (out, code, ix86_compare_op0,
10041 ix86_compare_op1, VOIDmode, 0, -1);
10043 out = expand_simple_binop (mode, IOR,
10045 out, 1, OPTAB_DIRECT);
10046 if (out != operands[0])
10047 emit_move_insn (operands[0], out);
10049 return 1; /* DONE */
10054 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10055 || diff == 3 || diff == 5 || diff == 9)
10056 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10057 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10063 * lea cf(dest*(ct-cf)),dest
10067 * This also catches the degenerate setcc-only case.
10073 out = emit_store_flag (out, code, ix86_compare_op0,
10074 ix86_compare_op1, VOIDmode, 0, 1);
10077 /* On x86_64 the lea instruction operates on Pmode, so we need
10078 to get arithmetics done in proper mode to match. */
10080 tmp = copy_rtx (out);
10084 out1 = copy_rtx (out);
10085 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10089 tmp = gen_rtx_PLUS (mode, tmp, out1);
10095 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10098 if (!rtx_equal_p (tmp, out))
10101 out = force_operand (tmp, copy_rtx (out));
10103 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10105 if (!rtx_equal_p (out, operands[0]))
10106 emit_move_insn (operands[0], copy_rtx (out));
10108 return 1; /* DONE */
10112 * General case: Jumpful:
10113 * xorl dest,dest cmpl op1, op2
10114 * cmpl op1, op2 movl ct, dest
10115 * setcc dest jcc 1f
10116 * decl dest movl cf, dest
10117 * andl (cf-ct),dest 1:
10120 * Size 20. Size 14.
10122 * This is reasonably steep, but branch mispredict costs are
10123 * high on modern cpus, so consider failing only if optimizing
10127 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10128 && BRANCH_COST >= 2)
10134 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10135 /* We may be reversing unordered compare to normal compare,
10136 that is not valid in general (we may convert non-trapping
10137 condition to trapping one), however on i386 we currently
10138 emit all comparisons unordered. */
10139 code = reverse_condition_maybe_unordered (code);
10142 code = reverse_condition (code);
10143 if (compare_code != NIL)
10144 compare_code = reverse_condition (compare_code);
10148 if (compare_code != NIL)
10150 /* notl op1 (if needed)
10155 For x < 0 (resp. x <= -1) there will be no notl,
10156 so if possible swap the constants to get rid of the
10158 True/false will be -1/0 while code below (store flag
10159 followed by decrement) is 0/-1, so the constants need
10160 to be exchanged once more. */
10162 if (compare_code == GE || !cf)
10164 code = reverse_condition (code);
10169 HOST_WIDE_INT tmp = cf;
10174 out = emit_store_flag (out, code, ix86_compare_op0,
10175 ix86_compare_op1, VOIDmode, 0, -1);
10179 out = emit_store_flag (out, code, ix86_compare_op0,
10180 ix86_compare_op1, VOIDmode, 0, 1);
10182 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10183 copy_rtx (out), 1, OPTAB_DIRECT);
10186 out = expand_simple_binop (mode, AND, copy_rtx (out),
10187 gen_int_mode (cf - ct, mode),
10188 copy_rtx (out), 1, OPTAB_DIRECT);
10190 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10191 copy_rtx (out), 1, OPTAB_DIRECT);
10192 if (!rtx_equal_p (out, operands[0]))
10193 emit_move_insn (operands[0], copy_rtx (out));
10195 return 1; /* DONE */
10199 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10201 /* Try a few things more with specific constants and a variable. */
10204 rtx var, orig_out, out, tmp;
10206 if (BRANCH_COST <= 2)
10207 return 0; /* FAIL */
10209 /* If one of the two operands is an interesting constant, load a
10210 constant with the above and mask it in with a logical operation. */
10212 if (GET_CODE (operands[2]) == CONST_INT)
10215 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10216 operands[3] = constm1_rtx, op = and_optab;
10217 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10218 operands[3] = const0_rtx, op = ior_optab;
10220 return 0; /* FAIL */
10222 else if (GET_CODE (operands[3]) == CONST_INT)
10225 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10226 operands[2] = constm1_rtx, op = and_optab;
10227 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10228 operands[2] = const0_rtx, op = ior_optab;
10230 return 0; /* FAIL */
10233 return 0; /* FAIL */
10235 orig_out = operands[0];
10236 tmp = gen_reg_rtx (mode);
10239 /* Recurse to get the constant loaded. */
10240 if (ix86_expand_int_movcc (operands) == 0)
10241 return 0; /* FAIL */
10243 /* Mask in the interesting variable. */
10244 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10246 if (!rtx_equal_p (out, orig_out))
10247 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10249 return 1; /* DONE */
10253 * For comparison with above,
10263 if (! nonimmediate_operand (operands[2], mode))
10264 operands[2] = force_reg (mode, operands[2]);
10265 if (! nonimmediate_operand (operands[3], mode))
10266 operands[3] = force_reg (mode, operands[3]);
10268 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10270 rtx tmp = gen_reg_rtx (mode);
10271 emit_move_insn (tmp, operands[3]);
10274 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10276 rtx tmp = gen_reg_rtx (mode);
10277 emit_move_insn (tmp, operands[2]);
10281 if (! register_operand (operands[2], VOIDmode)
10283 || ! register_operand (operands[3], VOIDmode)))
10284 operands[2] = force_reg (mode, operands[2]);
10287 && ! register_operand (operands[3], VOIDmode))
10288 operands[3] = force_reg (mode, operands[3]);
10290 emit_insn (compare_seq);
10291 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10292 gen_rtx_IF_THEN_ELSE (mode,
10293 compare_op, operands[2],
10296 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10297 gen_rtx_IF_THEN_ELSE (mode,
10299 copy_rtx (operands[3]),
10300 copy_rtx (operands[0]))));
10302 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10303 gen_rtx_IF_THEN_ELSE (mode,
10305 copy_rtx (operands[2]),
10306 copy_rtx (operands[0]))));
10308 return 1; /* DONE */
10312 ix86_expand_fp_movcc (rtx operands[])
10314 enum rtx_code code;
10316 rtx compare_op, second_test, bypass_test;
10318 /* For SF/DFmode conditional moves based on comparisons
10319 in same mode, we may want to use SSE min/max instructions. */
10320 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10321 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10322 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10323 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10324 && (!TARGET_IEEE_FP
10325 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10326 /* We may be called from the post-reload splitter. */
10327 && (!REG_P (operands[0])
10328 || SSE_REG_P (operands[0])
10329 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10331 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10332 code = GET_CODE (operands[1]);
10334 /* See if we have (cross) match between comparison operands and
10335 conditional move operands. */
10336 if (rtx_equal_p (operands[2], op1))
10341 code = reverse_condition_maybe_unordered (code);
10343 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10345 /* Check for min operation. */
10346 if (code == LT || code == UNLE)
10354 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10355 if (memory_operand (op0, VOIDmode))
10356 op0 = force_reg (GET_MODE (operands[0]), op0);
10357 if (GET_MODE (operands[0]) == SFmode)
10358 emit_insn (gen_minsf3 (operands[0], op0, op1));
10360 emit_insn (gen_mindf3 (operands[0], op0, op1));
10363 /* Check for max operation. */
10364 if (code == GT || code == UNGE)
10372 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10373 if (memory_operand (op0, VOIDmode))
10374 op0 = force_reg (GET_MODE (operands[0]), op0);
10375 if (GET_MODE (operands[0]) == SFmode)
10376 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10378 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10382 /* Manage condition to be sse_comparison_operator. In case we are
10383 in non-ieee mode, try to canonicalize the destination operand
10384 to be first in the comparison - this helps reload to avoid extra
10386 if (!sse_comparison_operator (operands[1], VOIDmode)
10387 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10389 rtx tmp = ix86_compare_op0;
10390 ix86_compare_op0 = ix86_compare_op1;
10391 ix86_compare_op1 = tmp;
10392 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10393 VOIDmode, ix86_compare_op0,
10396 /* Similarly try to manage result to be first operand of conditional
10397 move. We also don't support the NE comparison on SSE, so try to
10399 if ((rtx_equal_p (operands[0], operands[3])
10400 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10401 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10403 rtx tmp = operands[2];
10404 operands[2] = operands[3];
10406 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10407 (GET_CODE (operands[1])),
10408 VOIDmode, ix86_compare_op0,
10411 if (GET_MODE (operands[0]) == SFmode)
10412 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10413 operands[2], operands[3],
10414 ix86_compare_op0, ix86_compare_op1));
10416 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10417 operands[2], operands[3],
10418 ix86_compare_op0, ix86_compare_op1));
10422 /* The floating point conditional move instructions don't directly
10423 support conditions resulting from a signed integer comparison. */
10425 code = GET_CODE (operands[1]);
10426 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10428 /* The floating point conditional move instructions don't directly
10429 support signed integer comparisons. */
10431 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10433 if (second_test != NULL || bypass_test != NULL)
10435 tmp = gen_reg_rtx (QImode);
10436 ix86_expand_setcc (code, tmp);
10438 ix86_compare_op0 = tmp;
10439 ix86_compare_op1 = const0_rtx;
10440 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10442 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10444 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10445 emit_move_insn (tmp, operands[3]);
10448 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10450 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10451 emit_move_insn (tmp, operands[2]);
10455 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10456 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10461 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10462 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10467 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10468 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10476 /* Expand conditional increment or decrement using adb/sbb instructions.
10477 The default case using setcc followed by the conditional move can be
10478 done by generic code. */
10480 ix86_expand_int_addcc (rtx operands[])
10482 enum rtx_code code = GET_CODE (operands[1]);
10484 rtx val = const0_rtx;
10485 bool fpcmp = false;
10486 enum machine_mode mode = GET_MODE (operands[0]);
10488 if (operands[3] != const1_rtx
10489 && operands[3] != constm1_rtx)
10491 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10492 ix86_compare_op1, &compare_op))
10494 code = GET_CODE (compare_op);
10496 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10497 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10500 code = ix86_fp_compare_code_to_integer (code);
10507 PUT_CODE (compare_op,
10508 reverse_condition_maybe_unordered
10509 (GET_CODE (compare_op)));
10511 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10513 PUT_MODE (compare_op, mode);
10515 /* Construct either adc or sbb insn. */
10516 if ((code == LTU) == (operands[3] == constm1_rtx))
10518 switch (GET_MODE (operands[0]))
10521 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10524 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10527 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10530 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10538 switch (GET_MODE (operands[0]))
10541 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10544 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10547 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10550 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10556 return 1; /* DONE */
10560 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10561 works for floating pointer parameters and nonoffsetable memories.
10562 For pushes, it returns just stack offsets; the values will be saved
10563 in the right order. Maximally three parts are generated. */
10566 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10571 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10573 size = (GET_MODE_SIZE (mode) + 4) / 8;
10575 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10577 if (size < 2 || size > 3)
10580 /* Optimize constant pool reference to immediates. This is used by fp
10581 moves, that force all constants to memory to allow combining. */
10582 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10584 rtx tmp = maybe_get_pool_constant (operand);
10589 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10591 /* The only non-offsetable memories we handle are pushes. */
10592 if (! push_operand (operand, VOIDmode))
10595 operand = copy_rtx (operand);
10596 PUT_MODE (operand, Pmode);
10597 parts[0] = parts[1] = parts[2] = operand;
10599 else if (!TARGET_64BIT)
10601 if (mode == DImode)
10602 split_di (&operand, 1, &parts[0], &parts[1]);
10605 if (REG_P (operand))
10607 if (!reload_completed)
10609 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10610 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10612 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10614 else if (offsettable_memref_p (operand))
10616 operand = adjust_address (operand, SImode, 0);
10617 parts[0] = operand;
10618 parts[1] = adjust_address (operand, SImode, 4);
10620 parts[2] = adjust_address (operand, SImode, 8);
10622 else if (GET_CODE (operand) == CONST_DOUBLE)
10627 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10631 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10632 parts[2] = gen_int_mode (l[2], SImode);
10635 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10640 parts[1] = gen_int_mode (l[1], SImode);
10641 parts[0] = gen_int_mode (l[0], SImode);
10649 if (mode == TImode)
10650 split_ti (&operand, 1, &parts[0], &parts[1]);
10651 if (mode == XFmode || mode == TFmode)
10653 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10654 if (REG_P (operand))
10656 if (!reload_completed)
10658 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10659 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10661 else if (offsettable_memref_p (operand))
10663 operand = adjust_address (operand, DImode, 0);
10664 parts[0] = operand;
10665 parts[1] = adjust_address (operand, upper_mode, 8);
10667 else if (GET_CODE (operand) == CONST_DOUBLE)
10672 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10673 real_to_target (l, &r, mode);
10674 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10675 if (HOST_BITS_PER_WIDE_INT >= 64)
10678 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10679 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10682 parts[0] = immed_double_const (l[0], l[1], DImode);
10683 if (upper_mode == SImode)
10684 parts[1] = gen_int_mode (l[2], SImode);
10685 else if (HOST_BITS_PER_WIDE_INT >= 64)
10688 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10689 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10692 parts[1] = immed_double_const (l[2], l[3], DImode);
10702 /* Emit insns to perform a move or push of DI, DF, and XF values.
10703 Return false when normal moves are needed; true when all required
10704 insns have been emitted. Operands 2-4 contain the input values
10705 int the correct order; operands 5-7 contain the output values. */
10708 ix86_split_long_move (rtx operands[])
10713 int collisions = 0;
10714 enum machine_mode mode = GET_MODE (operands[0]);
10716 /* The DFmode expanders may ask us to move double.
10717 For 64bit target this is single move. By hiding the fact
10718 here we simplify i386.md splitters. */
10719 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10721 /* Optimize constant pool reference to immediates. This is used by
10722 fp moves, that force all constants to memory to allow combining. */
10724 if (GET_CODE (operands[1]) == MEM
10725 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10726 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10727 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10728 if (push_operand (operands[0], VOIDmode))
10730 operands[0] = copy_rtx (operands[0]);
10731 PUT_MODE (operands[0], Pmode);
10734 operands[0] = gen_lowpart (DImode, operands[0]);
10735 operands[1] = gen_lowpart (DImode, operands[1]);
10736 emit_move_insn (operands[0], operands[1]);
10740 /* The only non-offsettable memory we handle is push. */
10741 if (push_operand (operands[0], VOIDmode))
10743 else if (GET_CODE (operands[0]) == MEM
10744 && ! offsettable_memref_p (operands[0]))
10747 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10748 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10750 /* When emitting push, take care for source operands on the stack. */
10751 if (push && GET_CODE (operands[1]) == MEM
10752 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10755 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10756 XEXP (part[1][2], 0));
10757 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10758 XEXP (part[1][1], 0));
10761 /* We need to do copy in the right order in case an address register
10762 of the source overlaps the destination. */
10763 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10765 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10767 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10770 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10773 /* Collision in the middle part can be handled by reordering. */
10774 if (collisions == 1 && nparts == 3
10775 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10778 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10779 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10782 /* If there are more collisions, we can't handle it by reordering.
10783 Do an lea to the last part and use only one colliding move. */
10784 else if (collisions > 1)
10790 base = part[0][nparts - 1];
10792 /* Handle the case when the last part isn't valid for lea.
10793 Happens in 64-bit mode storing the 12-byte XFmode. */
10794 if (GET_MODE (base) != Pmode)
10795 base = gen_rtx_REG (Pmode, REGNO (base));
10797 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10798 part[1][0] = replace_equiv_address (part[1][0], base);
10799 part[1][1] = replace_equiv_address (part[1][1],
10800 plus_constant (base, UNITS_PER_WORD));
10802 part[1][2] = replace_equiv_address (part[1][2],
10803 plus_constant (base, 8));
10813 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10814 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10815 emit_move_insn (part[0][2], part[1][2]);
10820 /* In 64bit mode we don't have 32bit push available. In case this is
10821 register, it is OK - we will just use larger counterpart. We also
10822 retype memory - these comes from attempt to avoid REX prefix on
10823 moving of second half of TFmode value. */
10824 if (GET_MODE (part[1][1]) == SImode)
10826 if (GET_CODE (part[1][1]) == MEM)
10827 part[1][1] = adjust_address (part[1][1], DImode, 0);
10828 else if (REG_P (part[1][1]))
10829 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10832 if (GET_MODE (part[1][0]) == SImode)
10833 part[1][0] = part[1][1];
10836 emit_move_insn (part[0][1], part[1][1]);
10837 emit_move_insn (part[0][0], part[1][0]);
10841 /* Choose correct order to not overwrite the source before it is copied. */
10842 if ((REG_P (part[0][0])
10843 && REG_P (part[1][1])
10844 && (REGNO (part[0][0]) == REGNO (part[1][1])
10846 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10848 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10852 operands[2] = part[0][2];
10853 operands[3] = part[0][1];
10854 operands[4] = part[0][0];
10855 operands[5] = part[1][2];
10856 operands[6] = part[1][1];
10857 operands[7] = part[1][0];
10861 operands[2] = part[0][1];
10862 operands[3] = part[0][0];
10863 operands[5] = part[1][1];
10864 operands[6] = part[1][0];
10871 operands[2] = part[0][0];
10872 operands[3] = part[0][1];
10873 operands[4] = part[0][2];
10874 operands[5] = part[1][0];
10875 operands[6] = part[1][1];
10876 operands[7] = part[1][2];
10880 operands[2] = part[0][0];
10881 operands[3] = part[0][1];
10882 operands[5] = part[1][0];
10883 operands[6] = part[1][1];
10886 emit_move_insn (operands[2], operands[5]);
10887 emit_move_insn (operands[3], operands[6]);
10889 emit_move_insn (operands[4], operands[7]);
10895 ix86_split_ashldi (rtx *operands, rtx scratch)
10897 rtx low[2], high[2];
10900 if (GET_CODE (operands[2]) == CONST_INT)
10902 split_di (operands, 2, low, high);
10903 count = INTVAL (operands[2]) & 63;
10907 emit_move_insn (high[0], low[1]);
10908 emit_move_insn (low[0], const0_rtx);
10911 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10915 if (!rtx_equal_p (operands[0], operands[1]))
10916 emit_move_insn (operands[0], operands[1]);
10917 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10918 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10923 if (!rtx_equal_p (operands[0], operands[1]))
10924 emit_move_insn (operands[0], operands[1]);
10926 split_di (operands, 1, low, high);
10928 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10929 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10931 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10933 if (! no_new_pseudos)
10934 scratch = force_reg (SImode, const0_rtx);
10936 emit_move_insn (scratch, const0_rtx);
10938 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10942 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10947 ix86_split_ashrdi (rtx *operands, rtx scratch)
10949 rtx low[2], high[2];
10952 if (GET_CODE (operands[2]) == CONST_INT)
10954 split_di (operands, 2, low, high);
10955 count = INTVAL (operands[2]) & 63;
10959 emit_move_insn (high[0], high[1]);
10960 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10961 emit_move_insn (low[0], high[0]);
10964 else if (count >= 32)
10966 emit_move_insn (low[0], high[1]);
10968 if (! reload_completed)
10969 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10972 emit_move_insn (high[0], low[0]);
10973 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10977 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10981 if (!rtx_equal_p (operands[0], operands[1]))
10982 emit_move_insn (operands[0], operands[1]);
10983 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10984 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10989 if (!rtx_equal_p (operands[0], operands[1]))
10990 emit_move_insn (operands[0], operands[1]);
10992 split_di (operands, 1, low, high);
10994 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10995 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10997 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10999 if (! no_new_pseudos)
11000 scratch = gen_reg_rtx (SImode);
11001 emit_move_insn (scratch, high[0]);
11002 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11003 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11007 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11012 ix86_split_lshrdi (rtx *operands, rtx scratch)
11014 rtx low[2], high[2];
11017 if (GET_CODE (operands[2]) == CONST_INT)
11019 split_di (operands, 2, low, high);
11020 count = INTVAL (operands[2]) & 63;
11024 emit_move_insn (low[0], high[1]);
11025 emit_move_insn (high[0], const0_rtx);
11028 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11032 if (!rtx_equal_p (operands[0], operands[1]))
11033 emit_move_insn (operands[0], operands[1]);
11034 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11035 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11040 if (!rtx_equal_p (operands[0], operands[1]))
11041 emit_move_insn (operands[0], operands[1]);
11043 split_di (operands, 1, low, high);
11045 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11046 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11048 /* Heh. By reversing the arguments, we can reuse this pattern. */
11049 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11051 if (! no_new_pseudos)
11052 scratch = force_reg (SImode, const0_rtx);
11054 emit_move_insn (scratch, const0_rtx);
11056 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11060 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11064 /* Helper function for the string operations below. Dest VARIABLE whether
11065 it is aligned to VALUE bytes. If true, jump to the label. */
11067 ix86_expand_aligntest (rtx variable, int value)
11069 rtx label = gen_label_rtx ();
11070 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11071 if (GET_MODE (variable) == DImode)
11072 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11074 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11075 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11080 /* Adjust COUNTER by the VALUE. */
11082 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11084 if (GET_MODE (countreg) == DImode)
11085 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11087 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11090 /* Zero extend possibly SImode EXP to Pmode register. */
11092 ix86_zero_extend_to_Pmode (rtx exp)
11095 if (GET_MODE (exp) == VOIDmode)
11096 return force_reg (Pmode, exp);
11097 if (GET_MODE (exp) == Pmode)
11098 return copy_to_mode_reg (Pmode, exp);
11099 r = gen_reg_rtx (Pmode);
11100 emit_insn (gen_zero_extendsidi2 (r, exp));
11104 /* Expand string move (memcpy) operation. Use i386 string operations when
11105 profitable. expand_clrstr contains similar code. */
11107 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11109 rtx srcreg, destreg, countreg, srcexp, destexp;
11110 enum machine_mode counter_mode;
11111 HOST_WIDE_INT align = 0;
11112 unsigned HOST_WIDE_INT count = 0;
11114 if (GET_CODE (align_exp) == CONST_INT)
11115 align = INTVAL (align_exp);
11117 /* Can't use any of this if the user has appropriated esi or edi. */
11118 if (global_regs[4] || global_regs[5])
11121 /* This simple hack avoids all inlining code and simplifies code below. */
11122 if (!TARGET_ALIGN_STRINGOPS)
11125 if (GET_CODE (count_exp) == CONST_INT)
11127 count = INTVAL (count_exp);
11128 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11132 /* Figure out proper mode for counter. For 32bits it is always SImode,
11133 for 64bits use SImode when possible, otherwise DImode.
11134 Set count to number of bytes copied when known at compile time. */
11135 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11136 || x86_64_zero_extended_value (count_exp))
11137 counter_mode = SImode;
11139 counter_mode = DImode;
11141 if (counter_mode != SImode && counter_mode != DImode)
11144 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11145 if (destreg != XEXP (dst, 0))
11146 dst = replace_equiv_address_nv (dst, destreg);
11147 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11148 if (srcreg != XEXP (src, 0))
11149 src = replace_equiv_address_nv (src, srcreg);
11151 /* When optimizing for size emit simple rep ; movsb instruction for
11152 counts not divisible by 4. */
11154 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11156 emit_insn (gen_cld ());
11157 countreg = ix86_zero_extend_to_Pmode (count_exp);
11158 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11159 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11160 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11164 /* For constant aligned (or small unaligned) copies use rep movsl
11165 followed by code copying the rest. For PentiumPro ensure 8 byte
11166 alignment to allow rep movsl acceleration. */
11168 else if (count != 0
11170 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11171 || optimize_size || count < (unsigned int) 64))
11173 unsigned HOST_WIDE_INT offset = 0;
11174 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11175 rtx srcmem, dstmem;
11177 emit_insn (gen_cld ());
11178 if (count & ~(size - 1))
11180 countreg = copy_to_mode_reg (counter_mode,
11181 GEN_INT ((count >> (size == 4 ? 2 : 3))
11182 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11183 countreg = ix86_zero_extend_to_Pmode (countreg);
11185 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11186 GEN_INT (size == 4 ? 2 : 3));
11187 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11188 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11190 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11191 countreg, destexp, srcexp));
11192 offset = count & ~(size - 1);
11194 if (size == 8 && (count & 0x04))
11196 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11198 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11200 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11205 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11207 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11209 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11214 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11216 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11218 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11221 /* The generic code based on the glibc implementation:
11222 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11223 allowing accelerated copying there)
11224 - copy the data using rep movsl
11225 - copy the rest. */
11230 rtx srcmem, dstmem;
11231 int desired_alignment = (TARGET_PENTIUMPRO
11232 && (count == 0 || count >= (unsigned int) 260)
11233 ? 8 : UNITS_PER_WORD);
11234 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11235 dst = change_address (dst, BLKmode, destreg);
11236 src = change_address (src, BLKmode, srcreg);
11238 /* In case we don't know anything about the alignment, default to
11239 library version, since it is usually equally fast and result in
11242 Also emit call when we know that the count is large and call overhead
11243 will not be important. */
11244 if (!TARGET_INLINE_ALL_STRINGOPS
11245 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11248 if (TARGET_SINGLE_STRINGOP)
11249 emit_insn (gen_cld ());
11251 countreg2 = gen_reg_rtx (Pmode);
11252 countreg = copy_to_mode_reg (counter_mode, count_exp);
11254 /* We don't use loops to align destination and to copy parts smaller
11255 than 4 bytes, because gcc is able to optimize such code better (in
11256 the case the destination or the count really is aligned, gcc is often
11257 able to predict the branches) and also it is friendlier to the
11258 hardware branch prediction.
11260 Using loops is beneficial for generic case, because we can
11261 handle small counts using the loops. Many CPUs (such as Athlon)
11262 have large REP prefix setup costs.
11264 This is quite costly. Maybe we can revisit this decision later or
11265 add some customizability to this code. */
11267 if (count == 0 && align < desired_alignment)
11269 label = gen_label_rtx ();
11270 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11271 LEU, 0, counter_mode, 1, label);
11275 rtx label = ix86_expand_aligntest (destreg, 1);
11276 srcmem = change_address (src, QImode, srcreg);
11277 dstmem = change_address (dst, QImode, destreg);
11278 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11279 ix86_adjust_counter (countreg, 1);
11280 emit_label (label);
11281 LABEL_NUSES (label) = 1;
11285 rtx label = ix86_expand_aligntest (destreg, 2);
11286 srcmem = change_address (src, HImode, srcreg);
11287 dstmem = change_address (dst, HImode, destreg);
11288 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11289 ix86_adjust_counter (countreg, 2);
11290 emit_label (label);
11291 LABEL_NUSES (label) = 1;
11293 if (align <= 4 && desired_alignment > 4)
11295 rtx label = ix86_expand_aligntest (destreg, 4);
11296 srcmem = change_address (src, SImode, srcreg);
11297 dstmem = change_address (dst, SImode, destreg);
11298 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11299 ix86_adjust_counter (countreg, 4);
11300 emit_label (label);
11301 LABEL_NUSES (label) = 1;
11304 if (label && desired_alignment > 4 && !TARGET_64BIT)
11306 emit_label (label);
11307 LABEL_NUSES (label) = 1;
11310 if (!TARGET_SINGLE_STRINGOP)
11311 emit_insn (gen_cld ());
11314 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11316 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11320 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11321 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11323 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11324 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11325 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11326 countreg2, destexp, srcexp));
11330 emit_label (label);
11331 LABEL_NUSES (label) = 1;
11333 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11335 srcmem = change_address (src, SImode, srcreg);
11336 dstmem = change_address (dst, SImode, destreg);
11337 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11339 if ((align <= 4 || count == 0) && TARGET_64BIT)
11341 rtx label = ix86_expand_aligntest (countreg, 4);
11342 srcmem = change_address (src, SImode, srcreg);
11343 dstmem = change_address (dst, SImode, destreg);
11344 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11345 emit_label (label);
11346 LABEL_NUSES (label) = 1;
11348 if (align > 2 && count != 0 && (count & 2))
11350 srcmem = change_address (src, HImode, srcreg);
11351 dstmem = change_address (dst, HImode, destreg);
11352 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11354 if (align <= 2 || count == 0)
11356 rtx label = ix86_expand_aligntest (countreg, 2);
11357 srcmem = change_address (src, HImode, srcreg);
11358 dstmem = change_address (dst, HImode, destreg);
11359 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11360 emit_label (label);
11361 LABEL_NUSES (label) = 1;
11363 if (align > 1 && count != 0 && (count & 1))
11365 srcmem = change_address (src, QImode, srcreg);
11366 dstmem = change_address (dst, QImode, destreg);
11367 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11369 if (align <= 1 || count == 0)
11371 rtx label = ix86_expand_aligntest (countreg, 1);
11372 srcmem = change_address (src, QImode, srcreg);
11373 dstmem = change_address (dst, QImode, destreg);
11374 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11375 emit_label (label);
11376 LABEL_NUSES (label) = 1;
11383 /* Expand string clear operation (bzero). Use i386 string operations when
11384 profitable. expand_movstr contains similar code. */
11386 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11388 rtx destreg, zeroreg, countreg, destexp;
11389 enum machine_mode counter_mode;
11390 HOST_WIDE_INT align = 0;
11391 unsigned HOST_WIDE_INT count = 0;
11393 if (GET_CODE (align_exp) == CONST_INT)
11394 align = INTVAL (align_exp);
11396 /* Can't use any of this if the user has appropriated esi. */
11397 if (global_regs[4])
11400 /* This simple hack avoids all inlining code and simplifies code below. */
11401 if (!TARGET_ALIGN_STRINGOPS)
11404 if (GET_CODE (count_exp) == CONST_INT)
11406 count = INTVAL (count_exp);
11407 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11410 /* Figure out proper mode for counter. For 32bits it is always SImode,
11411 for 64bits use SImode when possible, otherwise DImode.
11412 Set count to number of bytes copied when known at compile time. */
11413 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11414 || x86_64_zero_extended_value (count_exp))
11415 counter_mode = SImode;
11417 counter_mode = DImode;
11419 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11420 if (destreg != XEXP (dst, 0))
11421 dst = replace_equiv_address_nv (dst, destreg);
11423 emit_insn (gen_cld ());
11425 /* When optimizing for size emit simple rep ; movsb instruction for
11426 counts not divisible by 4. */
11428 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11430 countreg = ix86_zero_extend_to_Pmode (count_exp);
11431 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11432 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11433 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11435 else if (count != 0
11437 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11438 || optimize_size || count < (unsigned int) 64))
11440 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11441 unsigned HOST_WIDE_INT offset = 0;
11443 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11444 if (count & ~(size - 1))
11446 countreg = copy_to_mode_reg (counter_mode,
11447 GEN_INT ((count >> (size == 4 ? 2 : 3))
11448 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11449 countreg = ix86_zero_extend_to_Pmode (countreg);
11450 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11451 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11452 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11453 offset = count & ~(size - 1);
11455 if (size == 8 && (count & 0x04))
11457 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11459 emit_insn (gen_strset (destreg, mem,
11460 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11465 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11467 emit_insn (gen_strset (destreg, mem,
11468 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11473 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11475 emit_insn (gen_strset (destreg, mem,
11476 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11483 /* Compute desired alignment of the string operation. */
11484 int desired_alignment = (TARGET_PENTIUMPRO
11485 && (count == 0 || count >= (unsigned int) 260)
11486 ? 8 : UNITS_PER_WORD);
11488 /* In case we don't know anything about the alignment, default to
11489 library version, since it is usually equally fast and result in
11492 Also emit call when we know that the count is large and call overhead
11493 will not be important. */
11494 if (!TARGET_INLINE_ALL_STRINGOPS
11495 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11498 if (TARGET_SINGLE_STRINGOP)
11499 emit_insn (gen_cld ());
11501 countreg2 = gen_reg_rtx (Pmode);
11502 countreg = copy_to_mode_reg (counter_mode, count_exp);
11503 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11504 /* Get rid of MEM_OFFSET, it won't be accurate. */
11505 dst = change_address (dst, BLKmode, destreg);
11507 if (count == 0 && align < desired_alignment)
11509 label = gen_label_rtx ();
11510 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11511 LEU, 0, counter_mode, 1, label);
11515 rtx label = ix86_expand_aligntest (destreg, 1);
11516 emit_insn (gen_strset (destreg, dst,
11517 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11518 ix86_adjust_counter (countreg, 1);
11519 emit_label (label);
11520 LABEL_NUSES (label) = 1;
11524 rtx label = ix86_expand_aligntest (destreg, 2);
11525 emit_insn (gen_strset (destreg, dst,
11526 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11527 ix86_adjust_counter (countreg, 2);
11528 emit_label (label);
11529 LABEL_NUSES (label) = 1;
11531 if (align <= 4 && desired_alignment > 4)
11533 rtx label = ix86_expand_aligntest (destreg, 4);
11534 emit_insn (gen_strset (destreg, dst,
11536 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11538 ix86_adjust_counter (countreg, 4);
11539 emit_label (label);
11540 LABEL_NUSES (label) = 1;
11543 if (label && desired_alignment > 4 && !TARGET_64BIT)
11545 emit_label (label);
11546 LABEL_NUSES (label) = 1;
11550 if (!TARGET_SINGLE_STRINGOP)
11551 emit_insn (gen_cld ());
11554 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11556 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11560 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11561 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11563 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11564 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11568 emit_label (label);
11569 LABEL_NUSES (label) = 1;
11572 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11573 emit_insn (gen_strset (destreg, dst,
11574 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11575 if (TARGET_64BIT && (align <= 4 || count == 0))
11577 rtx label = ix86_expand_aligntest (countreg, 4);
11578 emit_insn (gen_strset (destreg, dst,
11579 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11580 emit_label (label);
11581 LABEL_NUSES (label) = 1;
11583 if (align > 2 && count != 0 && (count & 2))
11584 emit_insn (gen_strset (destreg, dst,
11585 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11586 if (align <= 2 || count == 0)
11588 rtx label = ix86_expand_aligntest (countreg, 2);
11589 emit_insn (gen_strset (destreg, dst,
11590 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11591 emit_label (label);
11592 LABEL_NUSES (label) = 1;
11594 if (align > 1 && count != 0 && (count & 1))
11595 emit_insn (gen_strset (destreg, dst,
11596 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11597 if (align <= 1 || count == 0)
11599 rtx label = ix86_expand_aligntest (countreg, 1);
11600 emit_insn (gen_strset (destreg, dst,
11601 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11602 emit_label (label);
11603 LABEL_NUSES (label) = 1;
11609 /* Expand strlen. */
11611 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11613 rtx addr, scratch1, scratch2, scratch3, scratch4;
11615 /* The generic case of strlen expander is long. Avoid it's
11616 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11618 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11619 && !TARGET_INLINE_ALL_STRINGOPS
11621 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11624 addr = force_reg (Pmode, XEXP (src, 0));
11625 scratch1 = gen_reg_rtx (Pmode);
11627 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11630 /* Well it seems that some optimizer does not combine a call like
11631 foo(strlen(bar), strlen(bar));
11632 when the move and the subtraction is done here. It does calculate
11633 the length just once when these instructions are done inside of
11634 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11635 often used and I use one fewer register for the lifetime of
11636 output_strlen_unroll() this is better. */
11638 emit_move_insn (out, addr);
11640 ix86_expand_strlensi_unroll_1 (out, src, align);
11642 /* strlensi_unroll_1 returns the address of the zero at the end of
11643 the string, like memchr(), so compute the length by subtracting
11644 the start address. */
11646 emit_insn (gen_subdi3 (out, out, addr));
11648 emit_insn (gen_subsi3 (out, out, addr));
11653 scratch2 = gen_reg_rtx (Pmode);
11654 scratch3 = gen_reg_rtx (Pmode);
11655 scratch4 = force_reg (Pmode, constm1_rtx);
11657 emit_move_insn (scratch3, addr);
11658 eoschar = force_reg (QImode, eoschar);
11660 emit_insn (gen_cld ());
11661 src = replace_equiv_address_nv (src, scratch3);
11663 /* If .md starts supporting :P, this can be done in .md. */
11664 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11665 scratch4), UNSPEC_SCAS);
11666 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11669 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11670 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11674 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11675 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11681 /* Expand the appropriate insns for doing strlen if not just doing
11684 out = result, initialized with the start address
11685 align_rtx = alignment of the address.
11686 scratch = scratch register, initialized with the startaddress when
11687 not aligned, otherwise undefined
11689 This is just the body. It needs the initializations mentioned above and
11690 some address computing at the end. These things are done in i386.md. */
11693 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11697 rtx align_2_label = NULL_RTX;
11698 rtx align_3_label = NULL_RTX;
11699 rtx align_4_label = gen_label_rtx ();
11700 rtx end_0_label = gen_label_rtx ();
11702 rtx tmpreg = gen_reg_rtx (SImode);
11703 rtx scratch = gen_reg_rtx (SImode);
11707 if (GET_CODE (align_rtx) == CONST_INT)
11708 align = INTVAL (align_rtx);
11710 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11712 /* Is there a known alignment and is it less than 4? */
11715 rtx scratch1 = gen_reg_rtx (Pmode);
11716 emit_move_insn (scratch1, out);
11717 /* Is there a known alignment and is it not 2? */
11720 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11721 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11723 /* Leave just the 3 lower bits. */
11724 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11725 NULL_RTX, 0, OPTAB_WIDEN);
11727 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11728 Pmode, 1, align_4_label);
11729 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11730 Pmode, 1, align_2_label);
11731 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11732 Pmode, 1, align_3_label);
11736 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11737 check if is aligned to 4 - byte. */
11739 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11740 NULL_RTX, 0, OPTAB_WIDEN);
11742 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11743 Pmode, 1, align_4_label);
11746 mem = change_address (src, QImode, out);
11748 /* Now compare the bytes. */
11750 /* Compare the first n unaligned byte on a byte per byte basis. */
11751 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11752 QImode, 1, end_0_label);
11754 /* Increment the address. */
11756 emit_insn (gen_adddi3 (out, out, const1_rtx));
11758 emit_insn (gen_addsi3 (out, out, const1_rtx));
11760 /* Not needed with an alignment of 2 */
11763 emit_label (align_2_label);
11765 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11769 emit_insn (gen_adddi3 (out, out, const1_rtx));
11771 emit_insn (gen_addsi3 (out, out, const1_rtx));
11773 emit_label (align_3_label);
11776 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11780 emit_insn (gen_adddi3 (out, out, const1_rtx));
11782 emit_insn (gen_addsi3 (out, out, const1_rtx));
11785 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11786 align this loop. It gives only huge programs, but does not help to
11788 emit_label (align_4_label);
11790 mem = change_address (src, SImode, out);
11791 emit_move_insn (scratch, mem);
11793 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11795 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11797 /* This formula yields a nonzero result iff one of the bytes is zero.
11798 This saves three branches inside loop and many cycles. */
11800 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11801 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11802 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11803 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11804 gen_int_mode (0x80808080, SImode)));
11805 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11810 rtx reg = gen_reg_rtx (SImode);
11811 rtx reg2 = gen_reg_rtx (Pmode);
11812 emit_move_insn (reg, tmpreg);
11813 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11815 /* If zero is not in the first two bytes, move two bytes forward. */
11816 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11817 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11818 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11819 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11820 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11823 /* Emit lea manually to avoid clobbering of flags. */
11824 emit_insn (gen_rtx_SET (SImode, reg2,
11825 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11827 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11828 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11829 emit_insn (gen_rtx_SET (VOIDmode, out,
11830 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11837 rtx end_2_label = gen_label_rtx ();
11838 /* Is zero in the first two bytes? */
11840 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11841 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11842 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11843 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11844 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11846 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11847 JUMP_LABEL (tmp) = end_2_label;
11849 /* Not in the first two. Move two bytes forward. */
11850 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11852 emit_insn (gen_adddi3 (out, out, const2_rtx));
11854 emit_insn (gen_addsi3 (out, out, const2_rtx));
11856 emit_label (end_2_label);
11860 /* Avoid branch in fixing the byte. */
11861 tmpreg = gen_lowpart (QImode, tmpreg);
11862 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11863 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11865 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11867 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11869 emit_label (end_0_label);
11873 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11874 rtx callarg2 ATTRIBUTE_UNUSED,
11875 rtx pop, int sibcall)
11877 rtx use = NULL, call;
11879 if (pop == const0_rtx)
11881 if (TARGET_64BIT && pop)
11885 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11886 fnaddr = machopic_indirect_call_target (fnaddr);
11888 /* Static functions and indirect calls don't need the pic register. */
11889 if (! TARGET_64BIT && flag_pic
11890 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11891 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11892 use_reg (&use, pic_offset_table_rtx);
11894 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11896 rtx al = gen_rtx_REG (QImode, 0);
11897 emit_move_insn (al, callarg2);
11898 use_reg (&use, al);
11900 #endif /* TARGET_MACHO */
11902 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11904 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11905 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11907 if (sibcall && TARGET_64BIT
11908 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11911 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11912 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11913 emit_move_insn (fnaddr, addr);
11914 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11917 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11919 call = gen_rtx_SET (VOIDmode, retval, call);
11922 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11923 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11924 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11927 call = emit_call_insn (call);
11929 CALL_INSN_FUNCTION_USAGE (call) = use;
11933 /* Clear stack slot assignments remembered from previous functions.
11934 This is called from INIT_EXPANDERS once before RTL is emitted for each
11937 static struct machine_function *
11938 ix86_init_machine_status (void)
11940 struct machine_function *f;
11942 f = ggc_alloc_cleared (sizeof (struct machine_function));
11943 f->use_fast_prologue_epilogue_nregs = -1;
11948 /* Return a MEM corresponding to a stack slot with mode MODE.
11949 Allocate a new slot if necessary.
11951 The RTL for a function can have several slots available: N is
11952 which slot to use. */
11955 assign_386_stack_local (enum machine_mode mode, int n)
11957 struct stack_local_entry *s;
11959 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11962 for (s = ix86_stack_locals; s; s = s->next)
11963 if (s->mode == mode && s->n == n)
11966 s = (struct stack_local_entry *)
11967 ggc_alloc (sizeof (struct stack_local_entry));
11970 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11972 s->next = ix86_stack_locals;
11973 ix86_stack_locals = s;
11977 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11979 static GTY(()) rtx ix86_tls_symbol;
11981 ix86_tls_get_addr (void)
11984 if (!ix86_tls_symbol)
11986 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11987 (TARGET_GNU_TLS && !TARGET_64BIT)
11988 ? "___tls_get_addr"
11989 : "__tls_get_addr");
11992 return ix86_tls_symbol;
11995 /* Calculate the length of the memory address in the instruction
11996 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11999 memory_address_length (rtx addr)
12001 struct ix86_address parts;
12002 rtx base, index, disp;
12005 if (GET_CODE (addr) == PRE_DEC
12006 || GET_CODE (addr) == POST_INC
12007 || GET_CODE (addr) == PRE_MODIFY
12008 || GET_CODE (addr) == POST_MODIFY)
12011 if (! ix86_decompose_address (addr, &parts))
12015 index = parts.index;
12020 - esp as the base always wants an index,
12021 - ebp as the base always wants a displacement. */
12023 /* Register Indirect. */
12024 if (base && !index && !disp)
12026 /* esp (for its index) and ebp (for its displacement) need
12027 the two-byte modrm form. */
12028 if (addr == stack_pointer_rtx
12029 || addr == arg_pointer_rtx
12030 || addr == frame_pointer_rtx
12031 || addr == hard_frame_pointer_rtx)
12035 /* Direct Addressing. */
12036 else if (disp && !base && !index)
12041 /* Find the length of the displacement constant. */
12044 if (GET_CODE (disp) == CONST_INT
12045 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12051 /* ebp always wants a displacement. */
12052 else if (base == hard_frame_pointer_rtx)
12055 /* An index requires the two-byte modrm form.... */
12057 /* ...like esp, which always wants an index. */
12058 || base == stack_pointer_rtx
12059 || base == arg_pointer_rtx
12060 || base == frame_pointer_rtx)
12067 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12068 is set, expect that insn have 8bit immediate alternative. */
12070 ix86_attr_length_immediate_default (rtx insn, int shortform)
12074 extract_insn_cached (insn);
12075 for (i = recog_data.n_operands - 1; i >= 0; --i)
12076 if (CONSTANT_P (recog_data.operand[i]))
12081 && GET_CODE (recog_data.operand[i]) == CONST_INT
12082 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12086 switch (get_attr_mode (insn))
12097 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12102 fatal_insn ("unknown insn mode", insn);
12108 /* Compute default value for "length_address" attribute. */
12110 ix86_attr_length_address_default (rtx insn)
12114 if (get_attr_type (insn) == TYPE_LEA)
12116 rtx set = PATTERN (insn);
12117 if (GET_CODE (set) == SET)
12119 else if (GET_CODE (set) == PARALLEL
12120 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12121 set = XVECEXP (set, 0, 0);
12124 #ifdef ENABLE_CHECKING
12130 return memory_address_length (SET_SRC (set));
12133 extract_insn_cached (insn);
12134 for (i = recog_data.n_operands - 1; i >= 0; --i)
12135 if (GET_CODE (recog_data.operand[i]) == MEM)
12137 return memory_address_length (XEXP (recog_data.operand[i], 0));
12143 /* Return the maximum number of instructions a cpu can issue. */
12146 ix86_issue_rate (void)
12150 case PROCESSOR_PENTIUM:
12154 case PROCESSOR_PENTIUMPRO:
12155 case PROCESSOR_PENTIUM4:
12156 case PROCESSOR_ATHLON:
12158 case PROCESSOR_NOCONA:
12166 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12167 by DEP_INSN and nothing set by DEP_INSN. */
12170 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12174 /* Simplify the test for uninteresting insns. */
12175 if (insn_type != TYPE_SETCC
12176 && insn_type != TYPE_ICMOV
12177 && insn_type != TYPE_FCMOV
12178 && insn_type != TYPE_IBR)
12181 if ((set = single_set (dep_insn)) != 0)
12183 set = SET_DEST (set);
12186 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12187 && XVECLEN (PATTERN (dep_insn), 0) == 2
12188 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12189 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12191 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12192 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12197 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12200 /* This test is true if the dependent insn reads the flags but
12201 not any other potentially set register. */
12202 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12205 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12211 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12212 address with operands set by DEP_INSN. */
12215 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12219 if (insn_type == TYPE_LEA
12222 addr = PATTERN (insn);
12223 if (GET_CODE (addr) == SET)
12225 else if (GET_CODE (addr) == PARALLEL
12226 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12227 addr = XVECEXP (addr, 0, 0);
12230 addr = SET_SRC (addr);
12235 extract_insn_cached (insn);
12236 for (i = recog_data.n_operands - 1; i >= 0; --i)
12237 if (GET_CODE (recog_data.operand[i]) == MEM)
12239 addr = XEXP (recog_data.operand[i], 0);
12246 return modified_in_p (addr, dep_insn);
12250 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12252 enum attr_type insn_type, dep_insn_type;
12253 enum attr_memory memory;
12255 int dep_insn_code_number;
12257 /* Anti and output dependencies have zero cost on all CPUs. */
12258 if (REG_NOTE_KIND (link) != 0)
12261 dep_insn_code_number = recog_memoized (dep_insn);
12263 /* If we can't recognize the insns, we can't really do anything. */
12264 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12267 insn_type = get_attr_type (insn);
12268 dep_insn_type = get_attr_type (dep_insn);
12272 case PROCESSOR_PENTIUM:
12273 /* Address Generation Interlock adds a cycle of latency. */
12274 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12277 /* ??? Compares pair with jump/setcc. */
12278 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12281 /* Floating point stores require value to be ready one cycle earlier. */
12282 if (insn_type == TYPE_FMOV
12283 && get_attr_memory (insn) == MEMORY_STORE
12284 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12288 case PROCESSOR_PENTIUMPRO:
12289 memory = get_attr_memory (insn);
12291 /* INT->FP conversion is expensive. */
12292 if (get_attr_fp_int_src (dep_insn))
12295 /* There is one cycle extra latency between an FP op and a store. */
12296 if (insn_type == TYPE_FMOV
12297 && (set = single_set (dep_insn)) != NULL_RTX
12298 && (set2 = single_set (insn)) != NULL_RTX
12299 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12300 && GET_CODE (SET_DEST (set2)) == MEM)
12303 /* Show ability of reorder buffer to hide latency of load by executing
12304 in parallel with previous instruction in case
12305 previous instruction is not needed to compute the address. */
12306 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12307 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12309 /* Claim moves to take one cycle, as core can issue one load
12310 at time and the next load can start cycle later. */
12311 if (dep_insn_type == TYPE_IMOV
12312 || dep_insn_type == TYPE_FMOV)
12320 memory = get_attr_memory (insn);
12322 /* The esp dependency is resolved before the instruction is really
12324 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12325 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12328 /* INT->FP conversion is expensive. */
12329 if (get_attr_fp_int_src (dep_insn))
12332 /* Show ability of reorder buffer to hide latency of load by executing
12333 in parallel with previous instruction in case
12334 previous instruction is not needed to compute the address. */
12335 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12336 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12338 /* Claim moves to take one cycle, as core can issue one load
12339 at time and the next load can start cycle later. */
12340 if (dep_insn_type == TYPE_IMOV
12341 || dep_insn_type == TYPE_FMOV)
12350 case PROCESSOR_ATHLON:
12352 memory = get_attr_memory (insn);
12354 /* Show ability of reorder buffer to hide latency of load by executing
12355 in parallel with previous instruction in case
12356 previous instruction is not needed to compute the address. */
12357 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12358 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12360 enum attr_unit unit = get_attr_unit (insn);
12363 /* Because of the difference between the length of integer and
12364 floating unit pipeline preparation stages, the memory operands
12365 for floating point are cheaper.
12367 ??? For Athlon it the difference is most probably 2. */
12368 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12371 loadcost = TARGET_ATHLON ? 2 : 0;
12373 if (cost >= loadcost)
12386 /* How many alternative schedules to try. This should be as wide as the
12387 scheduling freedom in the DFA, but no wider. Making this value too
12388 large results extra work for the scheduler. */
12391 ia32_multipass_dfa_lookahead (void)
12393 if (ix86_tune == PROCESSOR_PENTIUM)
12396 if (ix86_tune == PROCESSOR_PENTIUMPRO
12397 || ix86_tune == PROCESSOR_K6)
12405 /* Compute the alignment given to a constant that is being placed in memory.
12406 EXP is the constant and ALIGN is the alignment that the object would
12408 The value of this function is used instead of that alignment to align
12412 ix86_constant_alignment (tree exp, int align)
12414 if (TREE_CODE (exp) == REAL_CST)
12416 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12418 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12421 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12422 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12423 return BITS_PER_WORD;
12428 /* Compute the alignment for a static variable.
12429 TYPE is the data type, and ALIGN is the alignment that
12430 the object would ordinarily have. The value of this function is used
12431 instead of that alignment to align the object. */
12434 ix86_data_alignment (tree type, int align)
12436 if (AGGREGATE_TYPE_P (type)
12437 && TYPE_SIZE (type)
12438 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12439 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12440 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12443 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12444 to 16byte boundary. */
12447 if (AGGREGATE_TYPE_P (type)
12448 && TYPE_SIZE (type)
12449 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12450 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12451 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12455 if (TREE_CODE (type) == ARRAY_TYPE)
12457 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12459 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12462 else if (TREE_CODE (type) == COMPLEX_TYPE)
12465 if (TYPE_MODE (type) == DCmode && align < 64)
12467 if (TYPE_MODE (type) == XCmode && align < 128)
12470 else if ((TREE_CODE (type) == RECORD_TYPE
12471 || TREE_CODE (type) == UNION_TYPE
12472 || TREE_CODE (type) == QUAL_UNION_TYPE)
12473 && TYPE_FIELDS (type))
12475 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12477 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12480 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12481 || TREE_CODE (type) == INTEGER_TYPE)
12483 if (TYPE_MODE (type) == DFmode && align < 64)
12485 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12492 /* Compute the alignment for a local variable.
12493 TYPE is the data type, and ALIGN is the alignment that
12494 the object would ordinarily have. The value of this macro is used
12495 instead of that alignment to align the object. */
12498 ix86_local_alignment (tree type, int align)
12500 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12501 to 16byte boundary. */
12504 if (AGGREGATE_TYPE_P (type)
12505 && TYPE_SIZE (type)
12506 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12507 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12508 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12511 if (TREE_CODE (type) == ARRAY_TYPE)
12513 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12515 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12518 else if (TREE_CODE (type) == COMPLEX_TYPE)
12520 if (TYPE_MODE (type) == DCmode && align < 64)
12522 if (TYPE_MODE (type) == XCmode && align < 128)
12525 else if ((TREE_CODE (type) == RECORD_TYPE
12526 || TREE_CODE (type) == UNION_TYPE
12527 || TREE_CODE (type) == QUAL_UNION_TYPE)
12528 && TYPE_FIELDS (type))
12530 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12532 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12535 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12536 || TREE_CODE (type) == INTEGER_TYPE)
12539 if (TYPE_MODE (type) == DFmode && align < 64)
12541 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12547 /* Emit RTL insns to initialize the variable parts of a trampoline.
12548 FNADDR is an RTX for the address of the function's pure code.
12549 CXT is an RTX for the static chain value for the function. */
12551 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12555 /* Compute offset from the end of the jmp to the target function. */
12556 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12557 plus_constant (tramp, 10),
12558 NULL_RTX, 1, OPTAB_DIRECT);
12559 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12560 gen_int_mode (0xb9, QImode));
12561 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12562 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12563 gen_int_mode (0xe9, QImode));
12564 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12569 /* Try to load address using shorter movl instead of movabs.
12570 We may want to support movq for kernel mode, but kernel does not use
12571 trampolines at the moment. */
12572 if (x86_64_zero_extended_value (fnaddr))
12574 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12575 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12576 gen_int_mode (0xbb41, HImode));
12577 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12578 gen_lowpart (SImode, fnaddr));
12583 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12584 gen_int_mode (0xbb49, HImode));
12585 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12589 /* Load static chain using movabs to r10. */
12590 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12591 gen_int_mode (0xba49, HImode));
12592 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12595 /* Jump to the r11 */
12596 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12597 gen_int_mode (0xff49, HImode));
12598 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12599 gen_int_mode (0xe3, QImode));
12601 if (offset > TRAMPOLINE_SIZE)
12605 #ifdef TRANSFER_FROM_TRAMPOLINE
12606 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12607 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12611 #define def_builtin(MASK, NAME, TYPE, CODE) \
12613 if ((MASK) & target_flags \
12614 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12615 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12616 NULL, NULL_TREE); \
12619 struct builtin_description
12621 const unsigned int mask;
12622 const enum insn_code icode;
12623 const char *const name;
12624 const enum ix86_builtins code;
12625 const enum rtx_code comparison;
12626 const unsigned int flag;
12629 static const struct builtin_description bdesc_comi[] =
12631 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12632 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12633 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12634 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12635 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12636 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12637 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12638 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12639 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12640 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12641 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12642 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12643 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12644 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12645 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12646 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12647 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12648 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12649 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12650 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12651 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12652 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12653 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12654 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12657 static const struct builtin_description bdesc_2arg[] =
12660 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12661 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12662 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12663 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12664 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12665 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12666 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12667 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12669 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12670 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12671 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12672 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12673 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12674 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12675 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12676 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12677 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12678 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12679 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12680 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12681 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12682 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12683 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12684 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12685 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12686 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12687 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12688 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12690 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12693 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12695 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12696 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12697 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12698 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12700 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12701 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12702 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12703 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12704 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12707 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12708 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12709 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12710 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12711 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12712 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12713 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12714 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12716 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12717 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12718 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12719 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12720 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12721 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12722 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12723 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12725 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12726 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12727 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12729 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12730 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12731 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12732 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12734 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12735 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12737 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12738 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12739 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12740 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12741 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12742 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12744 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12745 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12746 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12747 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12749 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12750 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12751 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12752 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12753 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12754 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12757 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12758 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12761 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12762 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12763 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12765 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12766 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12767 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12768 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12770 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12772 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12773 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12774 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12775 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12776 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12777 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12779 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12780 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12781 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12782 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12784 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12785 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12788 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12790 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12793 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12795 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12798 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12799 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12800 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12801 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12802 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12803 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12804 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12805 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12806 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12807 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12808 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12809 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12810 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12811 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12812 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12813 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12814 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12815 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12816 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12818 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12821 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12836 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12837 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12838 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12839 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12840 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12842 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12843 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12844 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12845 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12846 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12847 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12848 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12849 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12853 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12859 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12864 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12865 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12873 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12914 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12919 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12920 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12921 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12922 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12923 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12924 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12927 static const struct builtin_description bdesc_1arg[] =
12929 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12930 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12932 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12933 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12934 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12936 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12937 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12938 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12939 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12940 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12941 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12963 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12964 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12973 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12974 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12975 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12979 ix86_init_builtins (void)
12982 ix86_init_mmx_sse_builtins ();
12985 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12986 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12989 ix86_init_mmx_sse_builtins (void)
12991 const struct builtin_description * d;
12994 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12995 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12996 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12997 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12998 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12999 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13000 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13001 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13002 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13003 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13005 tree pchar_type_node = build_pointer_type (char_type_node);
13006 tree pcchar_type_node = build_pointer_type (
13007 build_type_variant (char_type_node, 1, 0));
13008 tree pfloat_type_node = build_pointer_type (float_type_node);
13009 tree pcfloat_type_node = build_pointer_type (
13010 build_type_variant (float_type_node, 1, 0));
13011 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13012 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13013 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13016 tree int_ftype_v4sf_v4sf
13017 = build_function_type_list (integer_type_node,
13018 V4SF_type_node, V4SF_type_node, NULL_TREE);
13019 tree v4si_ftype_v4sf_v4sf
13020 = build_function_type_list (V4SI_type_node,
13021 V4SF_type_node, V4SF_type_node, NULL_TREE);
13022 /* MMX/SSE/integer conversions. */
13023 tree int_ftype_v4sf
13024 = build_function_type_list (integer_type_node,
13025 V4SF_type_node, NULL_TREE);
13026 tree int64_ftype_v4sf
13027 = build_function_type_list (long_long_integer_type_node,
13028 V4SF_type_node, NULL_TREE);
13029 tree int_ftype_v8qi
13030 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13031 tree v4sf_ftype_v4sf_int
13032 = build_function_type_list (V4SF_type_node,
13033 V4SF_type_node, integer_type_node, NULL_TREE);
13034 tree v4sf_ftype_v4sf_int64
13035 = build_function_type_list (V4SF_type_node,
13036 V4SF_type_node, long_long_integer_type_node,
13038 tree v4sf_ftype_v4sf_v2si
13039 = build_function_type_list (V4SF_type_node,
13040 V4SF_type_node, V2SI_type_node, NULL_TREE);
13041 tree int_ftype_v4hi_int
13042 = build_function_type_list (integer_type_node,
13043 V4HI_type_node, integer_type_node, NULL_TREE);
13044 tree v4hi_ftype_v4hi_int_int
13045 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13046 integer_type_node, integer_type_node,
13048 /* Miscellaneous. */
13049 tree v8qi_ftype_v4hi_v4hi
13050 = build_function_type_list (V8QI_type_node,
13051 V4HI_type_node, V4HI_type_node, NULL_TREE);
13052 tree v4hi_ftype_v2si_v2si
13053 = build_function_type_list (V4HI_type_node,
13054 V2SI_type_node, V2SI_type_node, NULL_TREE);
13055 tree v4sf_ftype_v4sf_v4sf_int
13056 = build_function_type_list (V4SF_type_node,
13057 V4SF_type_node, V4SF_type_node,
13058 integer_type_node, NULL_TREE);
13059 tree v2si_ftype_v4hi_v4hi
13060 = build_function_type_list (V2SI_type_node,
13061 V4HI_type_node, V4HI_type_node, NULL_TREE);
13062 tree v4hi_ftype_v4hi_int
13063 = build_function_type_list (V4HI_type_node,
13064 V4HI_type_node, integer_type_node, NULL_TREE);
13065 tree v4hi_ftype_v4hi_di
13066 = build_function_type_list (V4HI_type_node,
13067 V4HI_type_node, long_long_unsigned_type_node,
13069 tree v2si_ftype_v2si_di
13070 = build_function_type_list (V2SI_type_node,
13071 V2SI_type_node, long_long_unsigned_type_node,
13073 tree void_ftype_void
13074 = build_function_type (void_type_node, void_list_node);
13075 tree void_ftype_unsigned
13076 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13077 tree void_ftype_unsigned_unsigned
13078 = build_function_type_list (void_type_node, unsigned_type_node,
13079 unsigned_type_node, NULL_TREE);
13080 tree void_ftype_pcvoid_unsigned_unsigned
13081 = build_function_type_list (void_type_node, const_ptr_type_node,
13082 unsigned_type_node, unsigned_type_node,
13084 tree unsigned_ftype_void
13085 = build_function_type (unsigned_type_node, void_list_node);
13087 = build_function_type (long_long_unsigned_type_node, void_list_node);
13088 tree v4sf_ftype_void
13089 = build_function_type (V4SF_type_node, void_list_node);
13090 tree v2si_ftype_v4sf
13091 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13092 /* Loads/stores. */
13093 tree void_ftype_v8qi_v8qi_pchar
13094 = build_function_type_list (void_type_node,
13095 V8QI_type_node, V8QI_type_node,
13096 pchar_type_node, NULL_TREE);
13097 tree v4sf_ftype_pcfloat
13098 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13099 /* @@@ the type is bogus */
13100 tree v4sf_ftype_v4sf_pv2si
13101 = build_function_type_list (V4SF_type_node,
13102 V4SF_type_node, pv2si_type_node, NULL_TREE);
13103 tree void_ftype_pv2si_v4sf
13104 = build_function_type_list (void_type_node,
13105 pv2si_type_node, V4SF_type_node, NULL_TREE);
13106 tree void_ftype_pfloat_v4sf
13107 = build_function_type_list (void_type_node,
13108 pfloat_type_node, V4SF_type_node, NULL_TREE);
13109 tree void_ftype_pdi_di
13110 = build_function_type_list (void_type_node,
13111 pdi_type_node, long_long_unsigned_type_node,
13113 tree void_ftype_pv2di_v2di
13114 = build_function_type_list (void_type_node,
13115 pv2di_type_node, V2DI_type_node, NULL_TREE);
13116 /* Normal vector unops. */
13117 tree v4sf_ftype_v4sf
13118 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13120 /* Normal vector binops. */
13121 tree v4sf_ftype_v4sf_v4sf
13122 = build_function_type_list (V4SF_type_node,
13123 V4SF_type_node, V4SF_type_node, NULL_TREE);
13124 tree v8qi_ftype_v8qi_v8qi
13125 = build_function_type_list (V8QI_type_node,
13126 V8QI_type_node, V8QI_type_node, NULL_TREE);
13127 tree v4hi_ftype_v4hi_v4hi
13128 = build_function_type_list (V4HI_type_node,
13129 V4HI_type_node, V4HI_type_node, NULL_TREE);
13130 tree v2si_ftype_v2si_v2si
13131 = build_function_type_list (V2SI_type_node,
13132 V2SI_type_node, V2SI_type_node, NULL_TREE);
13133 tree di_ftype_di_di
13134 = build_function_type_list (long_long_unsigned_type_node,
13135 long_long_unsigned_type_node,
13136 long_long_unsigned_type_node, NULL_TREE);
13138 tree v2si_ftype_v2sf
13139 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13140 tree v2sf_ftype_v2si
13141 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13142 tree v2si_ftype_v2si
13143 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13144 tree v2sf_ftype_v2sf
13145 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13146 tree v2sf_ftype_v2sf_v2sf
13147 = build_function_type_list (V2SF_type_node,
13148 V2SF_type_node, V2SF_type_node, NULL_TREE);
13149 tree v2si_ftype_v2sf_v2sf
13150 = build_function_type_list (V2SI_type_node,
13151 V2SF_type_node, V2SF_type_node, NULL_TREE);
13152 tree pint_type_node = build_pointer_type (integer_type_node);
13153 tree pcint_type_node = build_pointer_type (
13154 build_type_variant (integer_type_node, 1, 0));
13155 tree pdouble_type_node = build_pointer_type (double_type_node);
13156 tree pcdouble_type_node = build_pointer_type (
13157 build_type_variant (double_type_node, 1, 0));
13158 tree int_ftype_v2df_v2df
13159 = build_function_type_list (integer_type_node,
13160 V2DF_type_node, V2DF_type_node, NULL_TREE);
13163 = build_function_type (intTI_type_node, void_list_node);
13164 tree v2di_ftype_void
13165 = build_function_type (V2DI_type_node, void_list_node);
13166 tree ti_ftype_ti_ti
13167 = build_function_type_list (intTI_type_node,
13168 intTI_type_node, intTI_type_node, NULL_TREE);
13169 tree void_ftype_pcvoid
13170 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13172 = build_function_type_list (V2DI_type_node,
13173 long_long_unsigned_type_node, NULL_TREE);
13175 = build_function_type_list (long_long_unsigned_type_node,
13176 V2DI_type_node, NULL_TREE);
13177 tree v4sf_ftype_v4si
13178 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13179 tree v4si_ftype_v4sf
13180 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13181 tree v2df_ftype_v4si
13182 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13183 tree v4si_ftype_v2df
13184 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13185 tree v2si_ftype_v2df
13186 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13187 tree v4sf_ftype_v2df
13188 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13189 tree v2df_ftype_v2si
13190 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13191 tree v2df_ftype_v4sf
13192 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13193 tree int_ftype_v2df
13194 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13195 tree int64_ftype_v2df
13196 = build_function_type_list (long_long_integer_type_node,
13197 V2DF_type_node, NULL_TREE);
13198 tree v2df_ftype_v2df_int
13199 = build_function_type_list (V2DF_type_node,
13200 V2DF_type_node, integer_type_node, NULL_TREE);
13201 tree v2df_ftype_v2df_int64
13202 = build_function_type_list (V2DF_type_node,
13203 V2DF_type_node, long_long_integer_type_node,
13205 tree v4sf_ftype_v4sf_v2df
13206 = build_function_type_list (V4SF_type_node,
13207 V4SF_type_node, V2DF_type_node, NULL_TREE);
13208 tree v2df_ftype_v2df_v4sf
13209 = build_function_type_list (V2DF_type_node,
13210 V2DF_type_node, V4SF_type_node, NULL_TREE);
13211 tree v2df_ftype_v2df_v2df_int
13212 = build_function_type_list (V2DF_type_node,
13213 V2DF_type_node, V2DF_type_node,
13216 tree v2df_ftype_v2df_pv2si
13217 = build_function_type_list (V2DF_type_node,
13218 V2DF_type_node, pv2si_type_node, NULL_TREE);
13219 tree void_ftype_pv2si_v2df
13220 = build_function_type_list (void_type_node,
13221 pv2si_type_node, V2DF_type_node, NULL_TREE);
13222 tree void_ftype_pdouble_v2df
13223 = build_function_type_list (void_type_node,
13224 pdouble_type_node, V2DF_type_node, NULL_TREE);
13225 tree void_ftype_pint_int
13226 = build_function_type_list (void_type_node,
13227 pint_type_node, integer_type_node, NULL_TREE);
13228 tree void_ftype_v16qi_v16qi_pchar
13229 = build_function_type_list (void_type_node,
13230 V16QI_type_node, V16QI_type_node,
13231 pchar_type_node, NULL_TREE);
13232 tree v2df_ftype_pcdouble
13233 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13234 tree v2df_ftype_v2df_v2df
13235 = build_function_type_list (V2DF_type_node,
13236 V2DF_type_node, V2DF_type_node, NULL_TREE);
13237 tree v16qi_ftype_v16qi_v16qi
13238 = build_function_type_list (V16QI_type_node,
13239 V16QI_type_node, V16QI_type_node, NULL_TREE);
13240 tree v8hi_ftype_v8hi_v8hi
13241 = build_function_type_list (V8HI_type_node,
13242 V8HI_type_node, V8HI_type_node, NULL_TREE);
13243 tree v4si_ftype_v4si_v4si
13244 = build_function_type_list (V4SI_type_node,
13245 V4SI_type_node, V4SI_type_node, NULL_TREE);
13246 tree v2di_ftype_v2di_v2di
13247 = build_function_type_list (V2DI_type_node,
13248 V2DI_type_node, V2DI_type_node, NULL_TREE);
13249 tree v2di_ftype_v2df_v2df
13250 = build_function_type_list (V2DI_type_node,
13251 V2DF_type_node, V2DF_type_node, NULL_TREE);
13252 tree v2df_ftype_v2df
13253 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13254 tree v2df_ftype_double
13255 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13256 tree v2df_ftype_double_double
13257 = build_function_type_list (V2DF_type_node,
13258 double_type_node, double_type_node, NULL_TREE);
13259 tree int_ftype_v8hi_int
13260 = build_function_type_list (integer_type_node,
13261 V8HI_type_node, integer_type_node, NULL_TREE);
13262 tree v8hi_ftype_v8hi_int_int
13263 = build_function_type_list (V8HI_type_node,
13264 V8HI_type_node, integer_type_node,
13265 integer_type_node, NULL_TREE);
13266 tree v2di_ftype_v2di_int
13267 = build_function_type_list (V2DI_type_node,
13268 V2DI_type_node, integer_type_node, NULL_TREE);
13269 tree v4si_ftype_v4si_int
13270 = build_function_type_list (V4SI_type_node,
13271 V4SI_type_node, integer_type_node, NULL_TREE);
13272 tree v8hi_ftype_v8hi_int
13273 = build_function_type_list (V8HI_type_node,
13274 V8HI_type_node, integer_type_node, NULL_TREE);
13275 tree v8hi_ftype_v8hi_v2di
13276 = build_function_type_list (V8HI_type_node,
13277 V8HI_type_node, V2DI_type_node, NULL_TREE);
13278 tree v4si_ftype_v4si_v2di
13279 = build_function_type_list (V4SI_type_node,
13280 V4SI_type_node, V2DI_type_node, NULL_TREE);
13281 tree v4si_ftype_v8hi_v8hi
13282 = build_function_type_list (V4SI_type_node,
13283 V8HI_type_node, V8HI_type_node, NULL_TREE);
13284 tree di_ftype_v8qi_v8qi
13285 = build_function_type_list (long_long_unsigned_type_node,
13286 V8QI_type_node, V8QI_type_node, NULL_TREE);
13287 tree v2di_ftype_v16qi_v16qi
13288 = build_function_type_list (V2DI_type_node,
13289 V16QI_type_node, V16QI_type_node, NULL_TREE);
13290 tree int_ftype_v16qi
13291 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13292 tree v16qi_ftype_pcchar
13293 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13294 tree void_ftype_pchar_v16qi
13295 = build_function_type_list (void_type_node,
13296 pchar_type_node, V16QI_type_node, NULL_TREE);
13297 tree v4si_ftype_pcint
13298 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13299 tree void_ftype_pcint_v4si
13300 = build_function_type_list (void_type_node,
13301 pcint_type_node, V4SI_type_node, NULL_TREE);
13302 tree v2di_ftype_v2di
13303 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13306 tree float128_type;
13308 /* The __float80 type. */
13309 if (TYPE_MODE (long_double_type_node) == XFmode)
13310 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13314 /* The __float80 type. */
13315 float80_type = make_node (REAL_TYPE);
13316 TYPE_PRECISION (float80_type) = 96;
13317 layout_type (float80_type);
13318 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13321 float128_type = make_node (REAL_TYPE);
13322 TYPE_PRECISION (float128_type) = 128;
13323 layout_type (float128_type);
13324 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13326 /* Add all builtins that are more or less simple operations on two
13328 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13330 /* Use one of the operands; the target can have a different mode for
13331 mask-generating compares. */
13332 enum machine_mode mode;
13337 mode = insn_data[d->icode].operand[1].mode;
13342 type = v16qi_ftype_v16qi_v16qi;
13345 type = v8hi_ftype_v8hi_v8hi;
13348 type = v4si_ftype_v4si_v4si;
13351 type = v2di_ftype_v2di_v2di;
13354 type = v2df_ftype_v2df_v2df;
13357 type = ti_ftype_ti_ti;
13360 type = v4sf_ftype_v4sf_v4sf;
13363 type = v8qi_ftype_v8qi_v8qi;
13366 type = v4hi_ftype_v4hi_v4hi;
13369 type = v2si_ftype_v2si_v2si;
13372 type = di_ftype_di_di;
13379 /* Override for comparisons. */
13380 if (d->icode == CODE_FOR_maskcmpv4sf3
13381 || d->icode == CODE_FOR_maskncmpv4sf3
13382 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13383 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13384 type = v4si_ftype_v4sf_v4sf;
13386 if (d->icode == CODE_FOR_maskcmpv2df3
13387 || d->icode == CODE_FOR_maskncmpv2df3
13388 || d->icode == CODE_FOR_vmmaskcmpv2df3
13389 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13390 type = v2di_ftype_v2df_v2df;
13392 def_builtin (d->mask, d->name, type, d->code);
13395 /* Add the remaining MMX insns with somewhat more complicated types. */
13396 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13397 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13398 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13399 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13400 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13402 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13403 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13404 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13406 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13407 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13409 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13410 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13412 /* comi/ucomi insns. */
13413 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13414 if (d->mask == MASK_SSE2)
13415 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13417 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13419 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13420 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13421 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13423 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13424 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13425 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13426 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13427 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13428 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13429 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13430 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13431 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13432 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13433 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13435 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13436 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13438 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13440 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13441 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13442 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13443 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13444 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13445 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13447 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13448 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13449 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13450 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13452 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13453 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13454 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13455 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13457 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13459 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13461 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13462 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13463 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13464 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13465 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13466 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13468 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13470 /* Original 3DNow! */
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13472 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13473 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13474 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13475 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13476 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13477 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13478 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13479 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13480 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13481 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13482 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13483 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13484 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13485 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13486 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13487 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13488 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13489 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13490 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13492 /* 3DNow! extension as used in the Athlon CPU. */
13493 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13494 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13495 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13496 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13497 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13498 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13500 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13503 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13506 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13510 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13511 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13515 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13517 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13520 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13522 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13524 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13525 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13526 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13529 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13531 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13536 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13551 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13552 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13559 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13583 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13611 /* Prescott New Instructions. */
13612 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13613 void_ftype_pcvoid_unsigned_unsigned,
13614 IX86_BUILTIN_MONITOR);
13615 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13616 void_ftype_unsigned_unsigned,
13617 IX86_BUILTIN_MWAIT);
13618 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13620 IX86_BUILTIN_MOVSHDUP);
13621 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13623 IX86_BUILTIN_MOVSLDUP);
13624 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13625 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13626 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13627 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13628 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13629 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13632 /* Errors in the source file can cause expand_expr to return const0_rtx
13633 where we expect a vector. To avoid crashing, use one of the vector
13634 clear instructions. */
13636 safe_vector_operand (rtx x, enum machine_mode mode)
13638 if (x != const0_rtx)
13640 x = gen_reg_rtx (mode);
13642 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13643 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13644 : gen_rtx_SUBREG (DImode, x, 0)));
13646 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13647 : gen_rtx_SUBREG (V4SFmode, x, 0),
13648 CONST0_RTX (V4SFmode)));
13652 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13655 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13658 tree arg0 = TREE_VALUE (arglist);
13659 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13660 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13661 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13662 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13663 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13664 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13666 if (VECTOR_MODE_P (mode0))
13667 op0 = safe_vector_operand (op0, mode0);
13668 if (VECTOR_MODE_P (mode1))
13669 op1 = safe_vector_operand (op1, mode1);
13672 || GET_MODE (target) != tmode
13673 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13674 target = gen_reg_rtx (tmode);
13676 if (GET_MODE (op1) == SImode && mode1 == TImode)
13678 rtx x = gen_reg_rtx (V4SImode);
13679 emit_insn (gen_sse2_loadd (x, op1));
13680 op1 = gen_lowpart (TImode, x);
13683 /* In case the insn wants input operands in modes different from
13684 the result, abort. */
13685 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13686 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13689 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13690 op0 = copy_to_mode_reg (mode0, op0);
13691 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13692 op1 = copy_to_mode_reg (mode1, op1);
13694 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13695 yet one of the two must not be a memory. This is normally enforced
13696 by expanders, but we didn't bother to create one here. */
13697 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13698 op0 = copy_to_mode_reg (mode0, op0);
13700 pat = GEN_FCN (icode) (target, op0, op1);
13707 /* Subroutine of ix86_expand_builtin to take care of stores. */
13710 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13713 tree arg0 = TREE_VALUE (arglist);
13714 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13715 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13716 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13717 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13718 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13720 if (VECTOR_MODE_P (mode1))
13721 op1 = safe_vector_operand (op1, mode1);
13723 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13724 op1 = copy_to_mode_reg (mode1, op1);
13726 pat = GEN_FCN (icode) (op0, op1);
13732 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13735 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13736 rtx target, int do_load)
13739 tree arg0 = TREE_VALUE (arglist);
13740 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13741 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13742 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13745 || GET_MODE (target) != tmode
13746 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13747 target = gen_reg_rtx (tmode);
13749 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13752 if (VECTOR_MODE_P (mode0))
13753 op0 = safe_vector_operand (op0, mode0);
13755 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13756 op0 = copy_to_mode_reg (mode0, op0);
13759 pat = GEN_FCN (icode) (target, op0);
13766 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13767 sqrtss, rsqrtss, rcpss. */
13770 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13773 tree arg0 = TREE_VALUE (arglist);
13774 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13775 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13776 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13779 || GET_MODE (target) != tmode
13780 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13781 target = gen_reg_rtx (tmode);
13783 if (VECTOR_MODE_P (mode0))
13784 op0 = safe_vector_operand (op0, mode0);
13786 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13787 op0 = copy_to_mode_reg (mode0, op0);
13790 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13791 op1 = copy_to_mode_reg (mode0, op1);
13793 pat = GEN_FCN (icode) (target, op0, op1);
13800 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13803 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13807 tree arg0 = TREE_VALUE (arglist);
13808 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13809 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13810 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13812 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13813 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13814 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13815 enum rtx_code comparison = d->comparison;
13817 if (VECTOR_MODE_P (mode0))
13818 op0 = safe_vector_operand (op0, mode0);
13819 if (VECTOR_MODE_P (mode1))
13820 op1 = safe_vector_operand (op1, mode1);
13822 /* Swap operands if we have a comparison that isn't available in
13826 rtx tmp = gen_reg_rtx (mode1);
13827 emit_move_insn (tmp, op1);
13833 || GET_MODE (target) != tmode
13834 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13835 target = gen_reg_rtx (tmode);
13837 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13838 op0 = copy_to_mode_reg (mode0, op0);
13839 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13840 op1 = copy_to_mode_reg (mode1, op1);
13842 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13843 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13850 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13853 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13857 tree arg0 = TREE_VALUE (arglist);
13858 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13859 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13860 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13862 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13863 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13864 enum rtx_code comparison = d->comparison;
13866 if (VECTOR_MODE_P (mode0))
13867 op0 = safe_vector_operand (op0, mode0);
13868 if (VECTOR_MODE_P (mode1))
13869 op1 = safe_vector_operand (op1, mode1);
13871 /* Swap operands if we have a comparison that isn't available in
13880 target = gen_reg_rtx (SImode);
13881 emit_move_insn (target, const0_rtx);
13882 target = gen_rtx_SUBREG (QImode, target, 0);
13884 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13885 op0 = copy_to_mode_reg (mode0, op0);
13886 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13887 op1 = copy_to_mode_reg (mode1, op1);
13889 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13890 pat = GEN_FCN (d->icode) (op0, op1);
13894 emit_insn (gen_rtx_SET (VOIDmode,
13895 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13896 gen_rtx_fmt_ee (comparison, QImode,
13900 return SUBREG_REG (target);
13903 /* Expand an expression EXP that calls a built-in function,
13904 with result going to TARGET if that's convenient
13905 (and in mode MODE if that's convenient).
13906 SUBTARGET may be used as the target for computing one of EXP's operands.
13907 IGNORE is nonzero if the value is to be ignored. */
13910 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13911 enum machine_mode mode ATTRIBUTE_UNUSED,
13912 int ignore ATTRIBUTE_UNUSED)
13914 const struct builtin_description *d;
13916 enum insn_code icode;
13917 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13918 tree arglist = TREE_OPERAND (exp, 1);
13919 tree arg0, arg1, arg2;
13920 rtx op0, op1, op2, pat;
13921 enum machine_mode tmode, mode0, mode1, mode2;
13922 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13926 case IX86_BUILTIN_EMMS:
13927 emit_insn (gen_emms ());
13930 case IX86_BUILTIN_SFENCE:
13931 emit_insn (gen_sfence ());
13934 case IX86_BUILTIN_PEXTRW:
13935 case IX86_BUILTIN_PEXTRW128:
13936 icode = (fcode == IX86_BUILTIN_PEXTRW
13937 ? CODE_FOR_mmx_pextrw
13938 : CODE_FOR_sse2_pextrw);
13939 arg0 = TREE_VALUE (arglist);
13940 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13941 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13942 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13943 tmode = insn_data[icode].operand[0].mode;
13944 mode0 = insn_data[icode].operand[1].mode;
13945 mode1 = insn_data[icode].operand[2].mode;
13947 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13948 op0 = copy_to_mode_reg (mode0, op0);
13949 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13951 error ("selector must be an integer constant in the range 0..%i",
13952 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13953 return gen_reg_rtx (tmode);
13956 || GET_MODE (target) != tmode
13957 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13958 target = gen_reg_rtx (tmode);
13959 pat = GEN_FCN (icode) (target, op0, op1);
13965 case IX86_BUILTIN_PINSRW:
13966 case IX86_BUILTIN_PINSRW128:
13967 icode = (fcode == IX86_BUILTIN_PINSRW
13968 ? CODE_FOR_mmx_pinsrw
13969 : CODE_FOR_sse2_pinsrw);
13970 arg0 = TREE_VALUE (arglist);
13971 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13972 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13973 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13974 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13975 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13976 tmode = insn_data[icode].operand[0].mode;
13977 mode0 = insn_data[icode].operand[1].mode;
13978 mode1 = insn_data[icode].operand[2].mode;
13979 mode2 = insn_data[icode].operand[3].mode;
13981 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13982 op0 = copy_to_mode_reg (mode0, op0);
13983 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13984 op1 = copy_to_mode_reg (mode1, op1);
13985 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13987 error ("selector must be an integer constant in the range 0..%i",
13988 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13992 || GET_MODE (target) != tmode
13993 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13994 target = gen_reg_rtx (tmode);
13995 pat = GEN_FCN (icode) (target, op0, op1, op2);
14001 case IX86_BUILTIN_MASKMOVQ:
14002 case IX86_BUILTIN_MASKMOVDQU:
14003 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14004 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14005 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14006 : CODE_FOR_sse2_maskmovdqu));
14007 /* Note the arg order is different from the operand order. */
14008 arg1 = TREE_VALUE (arglist);
14009 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14010 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14011 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14012 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14013 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14014 mode0 = insn_data[icode].operand[0].mode;
14015 mode1 = insn_data[icode].operand[1].mode;
14016 mode2 = insn_data[icode].operand[2].mode;
14018 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14019 op0 = copy_to_mode_reg (mode0, op0);
14020 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14021 op1 = copy_to_mode_reg (mode1, op1);
14022 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14023 op2 = copy_to_mode_reg (mode2, op2);
14024 pat = GEN_FCN (icode) (op0, op1, op2);
14030 case IX86_BUILTIN_SQRTSS:
14031 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14032 case IX86_BUILTIN_RSQRTSS:
14033 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14034 case IX86_BUILTIN_RCPSS:
14035 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14037 case IX86_BUILTIN_LOADAPS:
14038 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14040 case IX86_BUILTIN_LOADUPS:
14041 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14043 case IX86_BUILTIN_STOREAPS:
14044 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14046 case IX86_BUILTIN_STOREUPS:
14047 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14049 case IX86_BUILTIN_LOADSS:
14050 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14052 case IX86_BUILTIN_STORESS:
14053 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14055 case IX86_BUILTIN_LOADHPS:
14056 case IX86_BUILTIN_LOADLPS:
14057 case IX86_BUILTIN_LOADHPD:
14058 case IX86_BUILTIN_LOADLPD:
14059 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14060 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14061 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14062 : CODE_FOR_sse2_movsd);
14063 arg0 = TREE_VALUE (arglist);
14064 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14065 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14066 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14067 tmode = insn_data[icode].operand[0].mode;
14068 mode0 = insn_data[icode].operand[1].mode;
14069 mode1 = insn_data[icode].operand[2].mode;
14071 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14072 op0 = copy_to_mode_reg (mode0, op0);
14073 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14075 || GET_MODE (target) != tmode
14076 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14077 target = gen_reg_rtx (tmode);
14078 pat = GEN_FCN (icode) (target, op0, op1);
14084 case IX86_BUILTIN_STOREHPS:
14085 case IX86_BUILTIN_STORELPS:
14086 case IX86_BUILTIN_STOREHPD:
14087 case IX86_BUILTIN_STORELPD:
14088 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14089 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14090 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14091 : CODE_FOR_sse2_movsd);
14092 arg0 = TREE_VALUE (arglist);
14093 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14094 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14095 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14096 mode0 = insn_data[icode].operand[1].mode;
14097 mode1 = insn_data[icode].operand[2].mode;
14099 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14100 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14101 op1 = copy_to_mode_reg (mode1, op1);
14103 pat = GEN_FCN (icode) (op0, op0, op1);
14109 case IX86_BUILTIN_MOVNTPS:
14110 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14111 case IX86_BUILTIN_MOVNTQ:
14112 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14114 case IX86_BUILTIN_LDMXCSR:
14115 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14116 target = assign_386_stack_local (SImode, 0);
14117 emit_move_insn (target, op0);
14118 emit_insn (gen_ldmxcsr (target));
14121 case IX86_BUILTIN_STMXCSR:
14122 target = assign_386_stack_local (SImode, 0);
14123 emit_insn (gen_stmxcsr (target));
14124 return copy_to_mode_reg (SImode, target);
14126 case IX86_BUILTIN_SHUFPS:
14127 case IX86_BUILTIN_SHUFPD:
14128 icode = (fcode == IX86_BUILTIN_SHUFPS
14129 ? CODE_FOR_sse_shufps
14130 : CODE_FOR_sse2_shufpd);
14131 arg0 = TREE_VALUE (arglist);
14132 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14133 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14134 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14135 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14136 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14137 tmode = insn_data[icode].operand[0].mode;
14138 mode0 = insn_data[icode].operand[1].mode;
14139 mode1 = insn_data[icode].operand[2].mode;
14140 mode2 = insn_data[icode].operand[3].mode;
14142 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14143 op0 = copy_to_mode_reg (mode0, op0);
14144 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14145 op1 = copy_to_mode_reg (mode1, op1);
14146 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14148 /* @@@ better error message */
14149 error ("mask must be an immediate");
14150 return gen_reg_rtx (tmode);
14153 || GET_MODE (target) != tmode
14154 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14155 target = gen_reg_rtx (tmode);
14156 pat = GEN_FCN (icode) (target, op0, op1, op2);
14162 case IX86_BUILTIN_PSHUFW:
14163 case IX86_BUILTIN_PSHUFD:
14164 case IX86_BUILTIN_PSHUFHW:
14165 case IX86_BUILTIN_PSHUFLW:
14166 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14167 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14168 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14169 : CODE_FOR_mmx_pshufw);
14170 arg0 = TREE_VALUE (arglist);
14171 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14172 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14173 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14174 tmode = insn_data[icode].operand[0].mode;
14175 mode1 = insn_data[icode].operand[1].mode;
14176 mode2 = insn_data[icode].operand[2].mode;
14178 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14179 op0 = copy_to_mode_reg (mode1, op0);
14180 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14182 /* @@@ better error message */
14183 error ("mask must be an immediate");
14187 || GET_MODE (target) != tmode
14188 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14189 target = gen_reg_rtx (tmode);
14190 pat = GEN_FCN (icode) (target, op0, op1);
14196 case IX86_BUILTIN_PSLLDQI128:
14197 case IX86_BUILTIN_PSRLDQI128:
14198 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14199 : CODE_FOR_sse2_lshrti3);
14200 arg0 = TREE_VALUE (arglist);
14201 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14202 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14203 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14204 tmode = insn_data[icode].operand[0].mode;
14205 mode1 = insn_data[icode].operand[1].mode;
14206 mode2 = insn_data[icode].operand[2].mode;
14208 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14210 op0 = copy_to_reg (op0);
14211 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14213 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14215 error ("shift must be an immediate");
14218 target = gen_reg_rtx (V2DImode);
14219 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14225 case IX86_BUILTIN_FEMMS:
14226 emit_insn (gen_femms ());
14229 case IX86_BUILTIN_PAVGUSB:
14230 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14232 case IX86_BUILTIN_PF2ID:
14233 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14235 case IX86_BUILTIN_PFACC:
14236 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14238 case IX86_BUILTIN_PFADD:
14239 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14241 case IX86_BUILTIN_PFCMPEQ:
14242 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14244 case IX86_BUILTIN_PFCMPGE:
14245 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14247 case IX86_BUILTIN_PFCMPGT:
14248 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14250 case IX86_BUILTIN_PFMAX:
14251 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14253 case IX86_BUILTIN_PFMIN:
14254 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14256 case IX86_BUILTIN_PFMUL:
14257 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14259 case IX86_BUILTIN_PFRCP:
14260 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14262 case IX86_BUILTIN_PFRCPIT1:
14263 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14265 case IX86_BUILTIN_PFRCPIT2:
14266 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14268 case IX86_BUILTIN_PFRSQIT1:
14269 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14271 case IX86_BUILTIN_PFRSQRT:
14272 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14274 case IX86_BUILTIN_PFSUB:
14275 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14277 case IX86_BUILTIN_PFSUBR:
14278 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14280 case IX86_BUILTIN_PI2FD:
14281 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14283 case IX86_BUILTIN_PMULHRW:
14284 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14286 case IX86_BUILTIN_PF2IW:
14287 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14289 case IX86_BUILTIN_PFNACC:
14290 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14292 case IX86_BUILTIN_PFPNACC:
14293 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14295 case IX86_BUILTIN_PI2FW:
14296 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14298 case IX86_BUILTIN_PSWAPDSI:
14299 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14301 case IX86_BUILTIN_PSWAPDSF:
14302 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14304 case IX86_BUILTIN_SSE_ZERO:
14305 target = gen_reg_rtx (V4SFmode);
14306 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14309 case IX86_BUILTIN_MMX_ZERO:
14310 target = gen_reg_rtx (DImode);
14311 emit_insn (gen_mmx_clrdi (target));
14314 case IX86_BUILTIN_CLRTI:
14315 target = gen_reg_rtx (V2DImode);
14316 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14320 case IX86_BUILTIN_SQRTSD:
14321 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14322 case IX86_BUILTIN_LOADAPD:
14323 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14324 case IX86_BUILTIN_LOADUPD:
14325 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14327 case IX86_BUILTIN_STOREAPD:
14328 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14329 case IX86_BUILTIN_STOREUPD:
14330 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14332 case IX86_BUILTIN_LOADSD:
14333 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14335 case IX86_BUILTIN_STORESD:
14336 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14338 case IX86_BUILTIN_SETPD1:
14339 target = assign_386_stack_local (DFmode, 0);
14340 arg0 = TREE_VALUE (arglist);
14341 emit_move_insn (adjust_address (target, DFmode, 0),
14342 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14343 op0 = gen_reg_rtx (V2DFmode);
14344 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14345 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14348 case IX86_BUILTIN_SETPD:
14349 target = assign_386_stack_local (V2DFmode, 0);
14350 arg0 = TREE_VALUE (arglist);
14351 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14352 emit_move_insn (adjust_address (target, DFmode, 0),
14353 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14354 emit_move_insn (adjust_address (target, DFmode, 8),
14355 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14356 op0 = gen_reg_rtx (V2DFmode);
14357 emit_insn (gen_sse2_movapd (op0, target));
14360 case IX86_BUILTIN_LOADRPD:
14361 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14362 gen_reg_rtx (V2DFmode), 1);
14363 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14366 case IX86_BUILTIN_LOADPD1:
14367 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14368 gen_reg_rtx (V2DFmode), 1);
14369 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14372 case IX86_BUILTIN_STOREPD1:
14373 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14374 case IX86_BUILTIN_STORERPD:
14375 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14377 case IX86_BUILTIN_CLRPD:
14378 target = gen_reg_rtx (V2DFmode);
14379 emit_insn (gen_sse_clrv2df (target));
14382 case IX86_BUILTIN_MFENCE:
14383 emit_insn (gen_sse2_mfence ());
14385 case IX86_BUILTIN_LFENCE:
14386 emit_insn (gen_sse2_lfence ());
14389 case IX86_BUILTIN_CLFLUSH:
14390 arg0 = TREE_VALUE (arglist);
14391 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14392 icode = CODE_FOR_sse2_clflush;
14393 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14394 op0 = copy_to_mode_reg (Pmode, op0);
14396 emit_insn (gen_sse2_clflush (op0));
14399 case IX86_BUILTIN_MOVNTPD:
14400 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14401 case IX86_BUILTIN_MOVNTDQ:
14402 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14403 case IX86_BUILTIN_MOVNTI:
14404 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14406 case IX86_BUILTIN_LOADDQA:
14407 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14408 case IX86_BUILTIN_LOADDQU:
14409 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14410 case IX86_BUILTIN_LOADD:
14411 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14413 case IX86_BUILTIN_STOREDQA:
14414 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14415 case IX86_BUILTIN_STOREDQU:
14416 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14417 case IX86_BUILTIN_STORED:
14418 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14420 case IX86_BUILTIN_MONITOR:
14421 arg0 = TREE_VALUE (arglist);
14422 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14423 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14424 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14425 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14426 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14428 op0 = copy_to_mode_reg (SImode, op0);
14430 op1 = copy_to_mode_reg (SImode, op1);
14432 op2 = copy_to_mode_reg (SImode, op2);
14433 emit_insn (gen_monitor (op0, op1, op2));
14436 case IX86_BUILTIN_MWAIT:
14437 arg0 = TREE_VALUE (arglist);
14438 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14439 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14440 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14442 op0 = copy_to_mode_reg (SImode, op0);
14444 op1 = copy_to_mode_reg (SImode, op1);
14445 emit_insn (gen_mwait (op0, op1));
14448 case IX86_BUILTIN_LOADDDUP:
14449 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14451 case IX86_BUILTIN_LDDQU:
14452 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14459 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14460 if (d->code == fcode)
14462 /* Compares are treated specially. */
14463 if (d->icode == CODE_FOR_maskcmpv4sf3
14464 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14465 || d->icode == CODE_FOR_maskncmpv4sf3
14466 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14467 || d->icode == CODE_FOR_maskcmpv2df3
14468 || d->icode == CODE_FOR_vmmaskcmpv2df3
14469 || d->icode == CODE_FOR_maskncmpv2df3
14470 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14471 return ix86_expand_sse_compare (d, arglist, target);
14473 return ix86_expand_binop_builtin (d->icode, arglist, target);
14476 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14477 if (d->code == fcode)
14478 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14480 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14481 if (d->code == fcode)
14482 return ix86_expand_sse_comi (d, arglist, target);
14484 /* @@@ Should really do something sensible here. */
14488 /* Store OPERAND to the memory after reload is completed. This means
14489 that we can't easily use assign_stack_local. */
14491 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14494 if (!reload_completed)
14496 if (TARGET_RED_ZONE)
14498 result = gen_rtx_MEM (mode,
14499 gen_rtx_PLUS (Pmode,
14501 GEN_INT (-RED_ZONE_SIZE)));
14502 emit_move_insn (result, operand);
14504 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14510 operand = gen_lowpart (DImode, operand);
14514 gen_rtx_SET (VOIDmode,
14515 gen_rtx_MEM (DImode,
14516 gen_rtx_PRE_DEC (DImode,
14517 stack_pointer_rtx)),
14523 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14532 split_di (&operand, 1, operands, operands + 1);
14534 gen_rtx_SET (VOIDmode,
14535 gen_rtx_MEM (SImode,
14536 gen_rtx_PRE_DEC (Pmode,
14537 stack_pointer_rtx)),
14540 gen_rtx_SET (VOIDmode,
14541 gen_rtx_MEM (SImode,
14542 gen_rtx_PRE_DEC (Pmode,
14543 stack_pointer_rtx)),
14548 /* It is better to store HImodes as SImodes. */
14549 if (!TARGET_PARTIAL_REG_STALL)
14550 operand = gen_lowpart (SImode, operand);
14554 gen_rtx_SET (VOIDmode,
14555 gen_rtx_MEM (GET_MODE (operand),
14556 gen_rtx_PRE_DEC (SImode,
14557 stack_pointer_rtx)),
14563 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14568 /* Free operand from the memory. */
14570 ix86_free_from_memory (enum machine_mode mode)
14572 if (!TARGET_RED_ZONE)
14576 if (mode == DImode || TARGET_64BIT)
14578 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14582 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14583 to pop or add instruction if registers are available. */
14584 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14585 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14590 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14591 QImode must go into class Q_REGS.
14592 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14593 movdf to do mem-to-mem moves through integer regs. */
14595 ix86_preferred_reload_class (rtx x, enum reg_class class)
14597 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14599 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14601 /* SSE can't load any constant directly yet. */
14602 if (SSE_CLASS_P (class))
14604 /* Floats can load 0 and 1. */
14605 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14607 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14608 if (MAYBE_SSE_CLASS_P (class))
14609 return (reg_class_subset_p (class, GENERAL_REGS)
14610 ? GENERAL_REGS : FLOAT_REGS);
14614 /* General regs can load everything. */
14615 if (reg_class_subset_p (class, GENERAL_REGS))
14616 return GENERAL_REGS;
14617 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14618 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14621 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14623 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14628 /* If we are copying between general and FP registers, we need a memory
14629 location. The same is true for SSE and MMX registers.
14631 The macro can't work reliably when one of the CLASSES is class containing
14632 registers from multiple units (SSE, MMX, integer). We avoid this by never
14633 combining those units in single alternative in the machine description.
14634 Ensure that this constraint holds to avoid unexpected surprises.
14636 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14637 enforce these sanity checks. */
14639 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14640 enum machine_mode mode, int strict)
14642 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14643 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14644 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14645 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14646 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14647 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14654 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14655 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14656 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14657 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14658 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14660 /* Return the cost of moving data from a register in class CLASS1 to
14661 one in class CLASS2.
14663 It is not required that the cost always equal 2 when FROM is the same as TO;
14664 on some machines it is expensive to move between registers if they are not
14665 general registers. */
14667 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14668 enum reg_class class2)
14670 /* In case we require secondary memory, compute cost of the store followed
14671 by load. In order to avoid bad register allocation choices, we need
14672 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14674 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14678 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14679 MEMORY_MOVE_COST (mode, class1, 1));
14680 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14681 MEMORY_MOVE_COST (mode, class2, 1));
14683 /* In case of copying from general_purpose_register we may emit multiple
14684 stores followed by single load causing memory size mismatch stall.
14685 Count this as arbitrarily high cost of 20. */
14686 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14689 /* In the case of FP/MMX moves, the registers actually overlap, and we
14690 have to switch modes in order to treat them differently. */
14691 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14692 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14698 /* Moves between SSE/MMX and integer unit are expensive. */
14699 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14700 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14701 return ix86_cost->mmxsse_to_integer;
14702 if (MAYBE_FLOAT_CLASS_P (class1))
14703 return ix86_cost->fp_move;
14704 if (MAYBE_SSE_CLASS_P (class1))
14705 return ix86_cost->sse_move;
14706 if (MAYBE_MMX_CLASS_P (class1))
14707 return ix86_cost->mmx_move;
14711 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14713 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14715 /* Flags and only flags can only hold CCmode values. */
14716 if (CC_REGNO_P (regno))
14717 return GET_MODE_CLASS (mode) == MODE_CC;
14718 if (GET_MODE_CLASS (mode) == MODE_CC
14719 || GET_MODE_CLASS (mode) == MODE_RANDOM
14720 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14722 if (FP_REGNO_P (regno))
14723 return VALID_FP_MODE_P (mode);
14724 if (SSE_REGNO_P (regno))
14725 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14726 if (MMX_REGNO_P (regno))
14728 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14729 /* We handle both integer and floats in the general purpose registers.
14730 In future we should be able to handle vector modes as well. */
14731 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14733 /* Take care for QImode values - they can be in non-QI regs, but then
14734 they do cause partial register stalls. */
14735 if (regno < 4 || mode != QImode || TARGET_64BIT)
14737 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14740 /* Return the cost of moving data of mode M between a
14741 register and memory. A value of 2 is the default; this cost is
14742 relative to those in `REGISTER_MOVE_COST'.
14744 If moving between registers and memory is more expensive than
14745 between two registers, you should define this macro to express the
14748 Model also increased moving costs of QImode registers in non
14752 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14754 if (FLOAT_CLASS_P (class))
14771 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14773 if (SSE_CLASS_P (class))
14776 switch (GET_MODE_SIZE (mode))
14790 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14792 if (MMX_CLASS_P (class))
14795 switch (GET_MODE_SIZE (mode))
14806 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14808 switch (GET_MODE_SIZE (mode))
14812 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14813 : ix86_cost->movzbl_load);
14815 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14816 : ix86_cost->int_store[0] + 4);
14819 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14821 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14822 if (mode == TFmode)
14824 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14825 * (((int) GET_MODE_SIZE (mode)
14826 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14830 /* Compute a (partial) cost for rtx X. Return true if the complete
14831 cost has been computed, and false if subexpressions should be
14832 scanned. In either case, *TOTAL contains the cost result. */
14835 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14837 enum machine_mode mode = GET_MODE (x);
14845 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14847 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14849 else if (flag_pic && SYMBOLIC_CONST (x)
14851 || (!GET_CODE (x) != LABEL_REF
14852 && (GET_CODE (x) != SYMBOL_REF
14853 || !SYMBOL_REF_LOCAL_P (x)))))
14860 if (mode == VOIDmode)
14863 switch (standard_80387_constant_p (x))
14868 default: /* Other constants */
14873 /* Start with (MEM (SYMBOL_REF)), since that's where
14874 it'll probably end up. Add a penalty for size. */
14875 *total = (COSTS_N_INSNS (1)
14876 + (flag_pic != 0 && !TARGET_64BIT)
14877 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14883 /* The zero extensions is often completely free on x86_64, so make
14884 it as cheap as possible. */
14885 if (TARGET_64BIT && mode == DImode
14886 && GET_MODE (XEXP (x, 0)) == SImode)
14888 else if (TARGET_ZERO_EXTEND_WITH_AND)
14889 *total = COSTS_N_INSNS (ix86_cost->add);
14891 *total = COSTS_N_INSNS (ix86_cost->movzx);
14895 *total = COSTS_N_INSNS (ix86_cost->movsx);
14899 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14900 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14902 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14905 *total = COSTS_N_INSNS (ix86_cost->add);
14908 if ((value == 2 || value == 3)
14909 && ix86_cost->lea <= ix86_cost->shift_const)
14911 *total = COSTS_N_INSNS (ix86_cost->lea);
14921 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14923 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14925 if (INTVAL (XEXP (x, 1)) > 32)
14926 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14928 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14932 if (GET_CODE (XEXP (x, 1)) == AND)
14933 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14935 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14940 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14941 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14943 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14948 if (FLOAT_MODE_P (mode))
14950 *total = COSTS_N_INSNS (ix86_cost->fmul);
14955 rtx op0 = XEXP (x, 0);
14956 rtx op1 = XEXP (x, 1);
14958 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14960 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14961 for (nbits = 0; value != 0; value &= value - 1)
14965 /* This is arbitrary. */
14968 /* Compute costs correctly for widening multiplication. */
14969 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14970 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14971 == GET_MODE_SIZE (mode))
14973 int is_mulwiden = 0;
14974 enum machine_mode inner_mode = GET_MODE (op0);
14976 if (GET_CODE (op0) == GET_CODE (op1))
14977 is_mulwiden = 1, op1 = XEXP (op1, 0);
14978 else if (GET_CODE (op1) == CONST_INT)
14980 if (GET_CODE (op0) == SIGN_EXTEND)
14981 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14984 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14988 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14991 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14992 + nbits * ix86_cost->mult_bit)
14993 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15002 if (FLOAT_MODE_P (mode))
15003 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15005 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15009 if (FLOAT_MODE_P (mode))
15010 *total = COSTS_N_INSNS (ix86_cost->fadd);
15011 else if (GET_MODE_CLASS (mode) == MODE_INT
15012 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15014 if (GET_CODE (XEXP (x, 0)) == PLUS
15015 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15016 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15017 && CONSTANT_P (XEXP (x, 1)))
15019 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15020 if (val == 2 || val == 4 || val == 8)
15022 *total = COSTS_N_INSNS (ix86_cost->lea);
15023 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15024 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15026 *total += rtx_cost (XEXP (x, 1), outer_code);
15030 else if (GET_CODE (XEXP (x, 0)) == MULT
15031 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15033 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15034 if (val == 2 || val == 4 || val == 8)
15036 *total = COSTS_N_INSNS (ix86_cost->lea);
15037 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15038 *total += rtx_cost (XEXP (x, 1), outer_code);
15042 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15044 *total = COSTS_N_INSNS (ix86_cost->lea);
15045 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15046 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15047 *total += rtx_cost (XEXP (x, 1), outer_code);
15054 if (FLOAT_MODE_P (mode))
15056 *total = COSTS_N_INSNS (ix86_cost->fadd);
15064 if (!TARGET_64BIT && mode == DImode)
15066 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15067 + (rtx_cost (XEXP (x, 0), outer_code)
15068 << (GET_MODE (XEXP (x, 0)) != DImode))
15069 + (rtx_cost (XEXP (x, 1), outer_code)
15070 << (GET_MODE (XEXP (x, 1)) != DImode)));
15076 if (FLOAT_MODE_P (mode))
15078 *total = COSTS_N_INSNS (ix86_cost->fchs);
15084 if (!TARGET_64BIT && mode == DImode)
15085 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15087 *total = COSTS_N_INSNS (ix86_cost->add);
15091 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15096 if (FLOAT_MODE_P (mode))
15097 *total = COSTS_N_INSNS (ix86_cost->fabs);
15101 if (FLOAT_MODE_P (mode))
15102 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15106 if (XINT (x, 1) == UNSPEC_TP)
15115 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15117 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15120 fputs ("\tpushl $", asm_out_file);
15121 assemble_name (asm_out_file, XSTR (symbol, 0));
15122 fputc ('\n', asm_out_file);
15128 static int current_machopic_label_num;
15130 /* Given a symbol name and its associated stub, write out the
15131 definition of the stub. */
15134 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15136 unsigned int length;
15137 char *binder_name, *symbol_name, lazy_ptr_name[32];
15138 int label = ++current_machopic_label_num;
15140 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15141 symb = (*targetm.strip_name_encoding) (symb);
15143 length = strlen (stub);
15144 binder_name = alloca (length + 32);
15145 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15147 length = strlen (symb);
15148 symbol_name = alloca (length + 32);
15149 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15151 sprintf (lazy_ptr_name, "L%d$lz", label);
15154 machopic_picsymbol_stub_section ();
15156 machopic_symbol_stub_section ();
15158 fprintf (file, "%s:\n", stub);
15159 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15163 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15164 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15165 fprintf (file, "\tjmp %%edx\n");
15168 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15170 fprintf (file, "%s:\n", binder_name);
15174 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15175 fprintf (file, "\tpushl %%eax\n");
15178 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15180 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15182 machopic_lazy_symbol_ptr_section ();
15183 fprintf (file, "%s:\n", lazy_ptr_name);
15184 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15185 fprintf (file, "\t.long %s\n", binder_name);
15187 #endif /* TARGET_MACHO */
15189 /* Order the registers for register allocator. */
15192 x86_order_regs_for_local_alloc (void)
15197 /* First allocate the local general purpose registers. */
15198 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15199 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15200 reg_alloc_order [pos++] = i;
15202 /* Global general purpose registers. */
15203 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15204 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15205 reg_alloc_order [pos++] = i;
15207 /* x87 registers come first in case we are doing FP math
15209 if (!TARGET_SSE_MATH)
15210 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15211 reg_alloc_order [pos++] = i;
15213 /* SSE registers. */
15214 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15215 reg_alloc_order [pos++] = i;
15216 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15217 reg_alloc_order [pos++] = i;
15219 /* x87 registers. */
15220 if (TARGET_SSE_MATH)
15221 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15222 reg_alloc_order [pos++] = i;
15224 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15225 reg_alloc_order [pos++] = i;
15227 /* Initialize the rest of array as we do not allocate some registers
15229 while (pos < FIRST_PSEUDO_REGISTER)
15230 reg_alloc_order [pos++] = 0;
15233 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15234 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15237 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15238 struct attribute_spec.handler. */
15240 ix86_handle_struct_attribute (tree *node, tree name,
15241 tree args ATTRIBUTE_UNUSED,
15242 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15245 if (DECL_P (*node))
15247 if (TREE_CODE (*node) == TYPE_DECL)
15248 type = &TREE_TYPE (*node);
15253 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15254 || TREE_CODE (*type) == UNION_TYPE)))
15256 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15257 *no_add_attrs = true;
15260 else if ((is_attribute_p ("ms_struct", name)
15261 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15262 || ((is_attribute_p ("gcc_struct", name)
15263 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15265 warning ("`%s' incompatible attribute ignored",
15266 IDENTIFIER_POINTER (name));
15267 *no_add_attrs = true;
15274 ix86_ms_bitfield_layout_p (tree record_type)
15276 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15277 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15278 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15281 /* Returns an expression indicating where the this parameter is
15282 located on entry to the FUNCTION. */
15285 x86_this_parameter (tree function)
15287 tree type = TREE_TYPE (function);
15291 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15292 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15295 if (ix86_function_regparm (type, function) > 0)
15299 parm = TYPE_ARG_TYPES (type);
15300 /* Figure out whether or not the function has a variable number of
15302 for (; parm; parm = TREE_CHAIN (parm))
15303 if (TREE_VALUE (parm) == void_type_node)
15305 /* If not, the this parameter is in the first argument. */
15309 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15311 return gen_rtx_REG (SImode, regno);
15315 if (aggregate_value_p (TREE_TYPE (type), type))
15316 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15318 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15321 /* Determine whether x86_output_mi_thunk can succeed. */
15324 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15325 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15326 HOST_WIDE_INT vcall_offset, tree function)
15328 /* 64-bit can handle anything. */
15332 /* For 32-bit, everything's fine if we have one free register. */
15333 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15336 /* Need a free register for vcall_offset. */
15340 /* Need a free register for GOT references. */
15341 if (flag_pic && !(*targetm.binds_local_p) (function))
15344 /* Otherwise ok. */
15348 /* Output the assembler code for a thunk function. THUNK_DECL is the
15349 declaration for the thunk function itself, FUNCTION is the decl for
15350 the target function. DELTA is an immediate constant offset to be
15351 added to THIS. If VCALL_OFFSET is nonzero, the word at
15352 *(*this + vcall_offset) should be added to THIS. */
15355 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15356 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15357 HOST_WIDE_INT vcall_offset, tree function)
15360 rtx this = x86_this_parameter (function);
15363 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15364 pull it in now and let DELTA benefit. */
15367 else if (vcall_offset)
15369 /* Put the this parameter into %eax. */
15371 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15372 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15375 this_reg = NULL_RTX;
15377 /* Adjust the this parameter by a fixed constant. */
15380 xops[0] = GEN_INT (delta);
15381 xops[1] = this_reg ? this_reg : this;
15384 if (!x86_64_general_operand (xops[0], DImode))
15386 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15388 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15392 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15395 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15398 /* Adjust the this parameter by a value stored in the vtable. */
15402 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15405 int tmp_regno = 2 /* ECX */;
15406 if (lookup_attribute ("fastcall",
15407 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15408 tmp_regno = 0 /* EAX */;
15409 tmp = gen_rtx_REG (SImode, tmp_regno);
15412 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15415 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15417 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15419 /* Adjust the this parameter. */
15420 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15421 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15423 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15424 xops[0] = GEN_INT (vcall_offset);
15426 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15427 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15429 xops[1] = this_reg;
15431 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15433 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15436 /* If necessary, drop THIS back to its stack slot. */
15437 if (this_reg && this_reg != this)
15439 xops[0] = this_reg;
15441 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15444 xops[0] = XEXP (DECL_RTL (function), 0);
15447 if (!flag_pic || (*targetm.binds_local_p) (function))
15448 output_asm_insn ("jmp\t%P0", xops);
15451 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15452 tmp = gen_rtx_CONST (Pmode, tmp);
15453 tmp = gen_rtx_MEM (QImode, tmp);
15455 output_asm_insn ("jmp\t%A0", xops);
15460 if (!flag_pic || (*targetm.binds_local_p) (function))
15461 output_asm_insn ("jmp\t%P0", xops);
15466 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15467 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15468 tmp = gen_rtx_MEM (QImode, tmp);
15470 output_asm_insn ("jmp\t%0", xops);
15473 #endif /* TARGET_MACHO */
15475 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15476 output_set_got (tmp);
15479 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15480 output_asm_insn ("jmp\t{*}%1", xops);
15486 x86_file_start (void)
15488 default_file_start ();
15489 if (X86_FILE_START_VERSION_DIRECTIVE)
15490 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15491 if (X86_FILE_START_FLTUSED)
15492 fputs ("\t.global\t__fltused\n", asm_out_file);
15493 if (ix86_asm_dialect == ASM_INTEL)
15494 fputs ("\t.intel_syntax\n", asm_out_file);
15498 x86_field_alignment (tree field, int computed)
15500 enum machine_mode mode;
15501 tree type = TREE_TYPE (field);
15503 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15505 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15506 ? get_inner_array_type (type) : type);
15507 if (mode == DFmode || mode == DCmode
15508 || GET_MODE_CLASS (mode) == MODE_INT
15509 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15510 return MIN (32, computed);
15514 /* Output assembler code to FILE to increment profiler label # LABELNO
15515 for profiling a function entry. */
15517 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15522 #ifndef NO_PROFILE_COUNTERS
15523 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15525 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15529 #ifndef NO_PROFILE_COUNTERS
15530 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15532 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15536 #ifndef NO_PROFILE_COUNTERS
15537 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15538 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15540 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15544 #ifndef NO_PROFILE_COUNTERS
15545 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15546 PROFILE_COUNT_REGISTER);
15548 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15552 /* We don't have exact information about the insn sizes, but we may assume
15553 quite safely that we are informed about all 1 byte insns and memory
15554 address sizes. This is enough to eliminate unnecessary padding in
15558 min_insn_size (rtx insn)
15562 if (!INSN_P (insn) || !active_insn_p (insn))
15565 /* Discard alignments we've emit and jump instructions. */
15566 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15567 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15569 if (GET_CODE (insn) == JUMP_INSN
15570 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15571 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15574 /* Important case - calls are always 5 bytes.
15575 It is common to have many calls in the row. */
15576 if (GET_CODE (insn) == CALL_INSN
15577 && symbolic_reference_mentioned_p (PATTERN (insn))
15578 && !SIBLING_CALL_P (insn))
15580 if (get_attr_length (insn) <= 1)
15583 /* For normal instructions we may rely on the sizes of addresses
15584 and the presence of symbol to require 4 bytes of encoding.
15585 This is not the case for jumps where references are PC relative. */
15586 if (GET_CODE (insn) != JUMP_INSN)
15588 l = get_attr_length_address (insn);
15589 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15598 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15602 ix86_avoid_jump_misspredicts (void)
15604 rtx insn, start = get_insns ();
15605 int nbytes = 0, njumps = 0;
15608 /* Look for all minimal intervals of instructions containing 4 jumps.
15609 The intervals are bounded by START and INSN. NBYTES is the total
15610 size of instructions in the interval including INSN and not including
15611 START. When the NBYTES is smaller than 16 bytes, it is possible
15612 that the end of START and INSN ends up in the same 16byte page.
15614 The smallest offset in the page INSN can start is the case where START
15615 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15616 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15618 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15621 nbytes += min_insn_size (insn);
15623 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15624 INSN_UID (insn), min_insn_size (insn));
15625 if ((GET_CODE (insn) == JUMP_INSN
15626 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15627 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15628 || GET_CODE (insn) == CALL_INSN)
15635 start = NEXT_INSN (start);
15636 if ((GET_CODE (start) == JUMP_INSN
15637 && GET_CODE (PATTERN (start)) != ADDR_VEC
15638 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15639 || GET_CODE (start) == CALL_INSN)
15640 njumps--, isjump = 1;
15643 nbytes -= min_insn_size (start);
15648 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15649 INSN_UID (start), INSN_UID (insn), nbytes);
15651 if (njumps == 3 && isjump && nbytes < 16)
15653 int padsize = 15 - nbytes + min_insn_size (insn);
15656 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15657 INSN_UID (insn), padsize);
15658 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15663 /* AMD Athlon works faster
15664 when RET is not destination of conditional jump or directly preceded
15665 by other jump instruction. We avoid the penalty by inserting NOP just
15666 before the RET instructions in such cases. */
15668 ix86_pad_returns (void)
15672 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15674 basic_block bb = e->src;
15675 rtx ret = BB_END (bb);
15677 bool replace = false;
15679 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15680 || !maybe_hot_bb_p (bb))
15682 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15683 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15685 if (prev && GET_CODE (prev) == CODE_LABEL)
15688 for (e = bb->pred; e; e = e->pred_next)
15689 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15690 && !(e->flags & EDGE_FALLTHRU))
15695 prev = prev_active_insn (ret);
15697 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15698 || GET_CODE (prev) == CALL_INSN))
15700 /* Empty functions get branch mispredict even when the jump destination
15701 is not visible to us. */
15702 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15707 emit_insn_before (gen_return_internal_long (), ret);
15713 /* Implement machine specific optimizations. We implement padding of returns
15714 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15718 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15719 ix86_pad_returns ();
15720 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15721 ix86_avoid_jump_misspredicts ();
15724 /* Return nonzero when QImode register that must be represented via REX prefix
15727 x86_extended_QIreg_mentioned_p (rtx insn)
15730 extract_insn_cached (insn);
15731 for (i = 0; i < recog_data.n_operands; i++)
15732 if (REG_P (recog_data.operand[i])
15733 && REGNO (recog_data.operand[i]) >= 4)
15738 /* Return nonzero when P points to register encoded via REX prefix.
15739 Called via for_each_rtx. */
15741 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15743 unsigned int regno;
15746 regno = REGNO (*p);
15747 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15750 /* Return true when INSN mentions register that must be encoded using REX
15753 x86_extended_reg_mentioned_p (rtx insn)
15755 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15758 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15759 optabs would emit if we didn't have TFmode patterns. */
15762 x86_emit_floatuns (rtx operands[2])
15764 rtx neglab, donelab, i0, i1, f0, in, out;
15765 enum machine_mode mode, inmode;
15767 inmode = GET_MODE (operands[1]);
15768 if (inmode != SImode
15769 && inmode != DImode)
15773 in = force_reg (inmode, operands[1]);
15774 mode = GET_MODE (out);
15775 neglab = gen_label_rtx ();
15776 donelab = gen_label_rtx ();
15777 i1 = gen_reg_rtx (Pmode);
15778 f0 = gen_reg_rtx (mode);
15780 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15782 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15783 emit_jump_insn (gen_jump (donelab));
15786 emit_label (neglab);
15788 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15789 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15790 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15791 expand_float (f0, i0, 0);
15792 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15794 emit_label (donelab);
15797 /* Return if we do not know how to pass TYPE solely in registers. */
15799 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15801 if (default_must_pass_in_stack (mode, type))
15803 return (!TARGET_64BIT && type && mode == TImode);
15806 /* Initialize vector TARGET via VALS. */
15808 ix86_expand_vector_init (rtx target, rtx vals)
15810 enum machine_mode mode = GET_MODE (target);
15811 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15812 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15815 for (i = n_elts - 1; i >= 0; i--)
15816 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15817 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15820 /* Few special cases first...
15821 ... constants are best loaded from constant pool. */
15824 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15828 /* ... values where only first field is non-constant are best loaded
15829 from the pool and overwritten via move later. */
15832 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15833 GET_MODE_INNER (mode), 0);
15835 op = force_reg (mode, op);
15836 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15837 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15838 switch (GET_MODE (target))
15841 emit_insn (gen_sse2_movsd (target, target, op));
15844 emit_insn (gen_sse_movss (target, target, op));
15852 /* And the busy sequence doing rotations. */
15853 switch (GET_MODE (target))
15858 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15860 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15862 vecop0 = force_reg (V2DFmode, vecop0);
15863 vecop1 = force_reg (V2DFmode, vecop1);
15864 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15870 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15872 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15874 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15876 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15877 rtx tmp1 = gen_reg_rtx (V4SFmode);
15878 rtx tmp2 = gen_reg_rtx (V4SFmode);
15880 vecop0 = force_reg (V4SFmode, vecop0);
15881 vecop1 = force_reg (V4SFmode, vecop1);
15882 vecop2 = force_reg (V4SFmode, vecop2);
15883 vecop3 = force_reg (V4SFmode, vecop3);
15884 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15885 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15886 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15894 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15896 We do this in the new i386 backend to maintain source compatibility
15897 with the old cc0-based compiler. */
15900 ix86_md_asm_clobbers (tree clobbers)
15902 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15904 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15906 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15911 /* Worker function for REVERSE_CONDITION. */
15914 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15916 return (mode != CCFPmode && mode != CCFPUmode
15917 ? reverse_condition (code)
15918 : reverse_condition_maybe_unordered (code));
15921 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15925 output_387_reg_move (rtx insn, rtx *operands)
15927 if (REG_P (operands[1])
15928 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15930 if (REGNO (operands[0]) == FIRST_STACK_REG
15931 && TARGET_USE_FFREEP)
15932 return "ffreep\t%y0";
15933 return "fstp\t%y0";
15935 if (STACK_TOP_P (operands[0]))
15936 return "fld%z1\t%y1";
15940 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15941 FP status register is set. */
15944 ix86_emit_fp_unordered_jump (rtx label)
15946 rtx reg = gen_reg_rtx (HImode);
15949 emit_insn (gen_x86_fnstsw_1 (reg));
15951 if (TARGET_USE_SAHF)
15953 emit_insn (gen_x86_sahf_1 (reg));
15955 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15956 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15960 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15962 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15963 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15966 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15967 gen_rtx_LABEL_REF (VOIDmode, label),
15969 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15970 emit_jump_insn (temp);
15973 /* Output code to perform a log1p XFmode calculation. */
15975 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15977 rtx label1 = gen_label_rtx ();
15978 rtx label2 = gen_label_rtx ();
15980 rtx tmp = gen_reg_rtx (XFmode);
15981 rtx tmp2 = gen_reg_rtx (XFmode);
15983 emit_insn (gen_absxf2 (tmp, op1));
15984 emit_insn (gen_cmpxf (tmp,
15985 CONST_DOUBLE_FROM_REAL_VALUE (
15986 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15988 emit_jump_insn (gen_bge (label1));
15990 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15991 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15992 emit_jump (label2);
15994 emit_label (label1);
15995 emit_move_insn (tmp, CONST1_RTX (XFmode));
15996 emit_insn (gen_addxf3 (tmp, op1, tmp));
15997 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15998 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16000 emit_label (label2);
16003 #include "gt-i386.h"