1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
461 struct processor_costs nocona_cost = {
462 1, /* cost of an add instruction */
463 1, /* cost of a lea instruction */
464 1, /* variable shift costs */
465 1, /* constant shift costs */
466 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
467 0, /* cost of multiply per each bit set */
468 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
469 1, /* cost of movsx */
470 1, /* cost of movzx */
471 16, /* "large" insn */
473 4, /* cost for loading QImode using movzbl */
474 {4, 4, 4}, /* cost of loading integer registers
475 in QImode, HImode and SImode.
476 Relative to reg-reg move (2). */
477 {4, 4, 4}, /* cost of storing integer registers */
478 3, /* cost of reg,reg fld/fst */
479 {12, 12, 12}, /* cost of loading fp registers
480 in SFmode, DFmode and XFmode */
481 {4, 4, 4}, /* cost of loading integer registers */
482 6, /* cost of moving MMX register */
483 {12, 12}, /* cost of loading MMX registers
484 in SImode and DImode */
485 {12, 12}, /* cost of storing MMX registers
486 in SImode and DImode */
487 6, /* cost of moving SSE register */
488 {12, 12, 12}, /* cost of loading SSE registers
489 in SImode, DImode and TImode */
490 {12, 12, 12}, /* cost of storing SSE registers
491 in SImode, DImode and TImode */
492 8, /* MMX or SSE register to integer */
493 128, /* size of prefetch block */
494 8, /* number of parallel prefetches */
496 6, /* cost of FADD and FSUB insns. */
497 8, /* cost of FMUL instruction. */
498 40, /* cost of FDIV instruction. */
499 3, /* cost of FABS instruction. */
500 3, /* cost of FCHS instruction. */
501 44, /* cost of FSQRT instruction. */
504 const struct processor_costs *ix86_cost = &pentium_cost;
506 /* Processor feature/optimization bitmasks. */
507 #define m_386 (1<<PROCESSOR_I386)
508 #define m_486 (1<<PROCESSOR_I486)
509 #define m_PENT (1<<PROCESSOR_PENTIUM)
510 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
511 #define m_K6 (1<<PROCESSOR_K6)
512 #define m_ATHLON (1<<PROCESSOR_ATHLON)
513 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
514 #define m_K8 (1<<PROCESSOR_K8)
515 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
516 #define m_NOCONA (1<<PROCESSOR_NOCONA)
518 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
519 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
520 const int x86_zero_extend_with_and = m_486 | m_PENT;
521 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
522 const int x86_double_with_add = ~m_386;
523 const int x86_use_bit_test = m_386;
524 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
525 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
526 const int x86_3dnow_a = m_ATHLON_K8;
527 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_branch_hints = m_PENT4 | m_NOCONA;
529 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
530 const int x86_partial_reg_stall = m_PPRO;
531 const int x86_use_loop = m_K6;
532 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
533 const int x86_use_mov0 = m_K6;
534 const int x86_use_cltd = ~(m_PENT | m_K6);
535 const int x86_read_modify_write = ~m_PENT;
536 const int x86_read_modify = ~(m_PENT | m_PPRO);
537 const int x86_split_long_moves = m_PPRO;
538 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
539 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
540 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
541 const int x86_qimode_math = ~(0);
542 const int x86_promote_qi_regs = 0;
543 const int x86_himode_math = ~(m_PPRO);
544 const int x86_promote_hi_regs = m_PPRO;
545 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
546 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
547 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
548 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
550 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
551 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
552 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
553 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
554 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
555 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
556 const int x86_shift1 = ~m_486;
557 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
559 /* Set for machines where the type and dependencies are resolved on SSE register
560 parts instead of whole registers, so we may maintain just lower part of
561 scalar values in proper format leaving the upper part undefined. */
562 const int x86_sse_partial_regs = m_ATHLON_K8;
563 /* Athlon optimizes partial-register FPS special case, thus avoiding the
564 need for extra instructions beforehand */
565 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
566 const int x86_sse_typeless_stores = m_ATHLON_K8;
567 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
568 const int x86_use_ffreep = m_ATHLON_K8;
569 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
570 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
571 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
572 /* Some CPU cores are not able to predict more than 4 branch instructions in
573 the 16 byte window. */
574 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
576 /* In case the average insn count for single function invocation is
577 lower than this constant, emit fast (but longer) prologue and
579 #define FAST_PROLOGUE_INSN_COUNT 20
581 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
582 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
583 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
584 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
586 /* Array of the smallest class containing reg number REGNO, indexed by
587 REGNO. Used by REGNO_REG_CLASS in i386.h. */
589 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
592 AREG, DREG, CREG, BREG,
594 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
596 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
597 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
600 /* flags, fpsr, dirflag, frame */
601 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
602 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
604 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
606 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
607 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
612 /* The "default" register map used in 32bit mode. */
614 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
616 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
617 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
618 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
619 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
620 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
621 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
622 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
625 static int const x86_64_int_parameter_registers[6] =
627 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
628 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
631 static int const x86_64_int_return_registers[4] =
633 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
636 /* The "default" register map used in 64bit mode. */
637 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
639 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
640 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
641 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
642 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
643 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
644 8,9,10,11,12,13,14,15, /* extended integer registers */
645 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
648 /* Define the register numbers to be used in Dwarf debugging information.
649 The SVR4 reference port C compiler uses the following register numbers
650 in its Dwarf output code:
651 0 for %eax (gcc regno = 0)
652 1 for %ecx (gcc regno = 2)
653 2 for %edx (gcc regno = 1)
654 3 for %ebx (gcc regno = 3)
655 4 for %esp (gcc regno = 7)
656 5 for %ebp (gcc regno = 6)
657 6 for %esi (gcc regno = 4)
658 7 for %edi (gcc regno = 5)
659 The following three DWARF register numbers are never generated by
660 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
661 believes these numbers have these meanings.
662 8 for %eip (no gcc equivalent)
663 9 for %eflags (gcc regno = 17)
664 10 for %trapno (no gcc equivalent)
665 It is not at all clear how we should number the FP stack registers
666 for the x86 architecture. If the version of SDB on x86/svr4 were
667 a bit less brain dead with respect to floating-point then we would
668 have a precedent to follow with respect to DWARF register numbers
669 for x86 FP registers, but the SDB on x86/svr4 is so completely
670 broken with respect to FP registers that it is hardly worth thinking
671 of it as something to strive for compatibility with.
672 The version of x86/svr4 SDB I have at the moment does (partially)
673 seem to believe that DWARF register number 11 is associated with
674 the x86 register %st(0), but that's about all. Higher DWARF
675 register numbers don't seem to be associated with anything in
676 particular, and even for DWARF regno 11, SDB only seems to under-
677 stand that it should say that a variable lives in %st(0) (when
678 asked via an `=' command) if we said it was in DWARF regno 11,
679 but SDB still prints garbage when asked for the value of the
680 variable in question (via a `/' command).
681 (Also note that the labels SDB prints for various FP stack regs
682 when doing an `x' command are all wrong.)
683 Note that these problems generally don't affect the native SVR4
684 C compiler because it doesn't allow the use of -O with -g and
685 because when it is *not* optimizing, it allocates a memory
686 location for each floating-point variable, and the memory
687 location is what gets described in the DWARF AT_location
688 attribute for the variable in question.
689 Regardless of the severe mental illness of the x86/svr4 SDB, we
690 do something sensible here and we use the following DWARF
691 register numbers. Note that these are all stack-top-relative
693 11 for %st(0) (gcc regno = 8)
694 12 for %st(1) (gcc regno = 9)
695 13 for %st(2) (gcc regno = 10)
696 14 for %st(3) (gcc regno = 11)
697 15 for %st(4) (gcc regno = 12)
698 16 for %st(5) (gcc regno = 13)
699 17 for %st(6) (gcc regno = 14)
700 18 for %st(7) (gcc regno = 15)
702 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
704 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
705 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
706 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
707 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
708 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
709 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
710 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
713 /* Test and compare insns in i386.md store the information needed to
714 generate branch and scc insns here. */
716 rtx ix86_compare_op0 = NULL_RTX;
717 rtx ix86_compare_op1 = NULL_RTX;
719 #define MAX_386_STACK_LOCALS 3
720 /* Size of the register save area. */
721 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
723 /* Define the structure for the machine field in struct function. */
725 struct stack_local_entry GTY(())
730 struct stack_local_entry *next;
733 /* Structure describing stack frame layout.
734 Stack grows downward:
740 saved frame pointer if frame_pointer_needed
741 <- HARD_FRAME_POINTER
747 > to_allocate <- FRAME_POINTER
759 int outgoing_arguments_size;
762 HOST_WIDE_INT to_allocate;
763 /* The offsets relative to ARG_POINTER. */
764 HOST_WIDE_INT frame_pointer_offset;
765 HOST_WIDE_INT hard_frame_pointer_offset;
766 HOST_WIDE_INT stack_pointer_offset;
768 /* When save_regs_using_mov is set, emit prologue using
769 move instead of push instructions. */
770 bool save_regs_using_mov;
773 /* Used to enable/disable debugging features. */
774 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
775 /* Code model option as passed by user. */
776 const char *ix86_cmodel_string;
778 enum cmodel ix86_cmodel;
780 const char *ix86_asm_string;
781 enum asm_dialect ix86_asm_dialect = ASM_ATT;
783 const char *ix86_tls_dialect_string;
784 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
786 /* Which unit we are generating floating point math for. */
787 enum fpmath_unit ix86_fpmath;
789 /* Which cpu are we scheduling for. */
790 enum processor_type ix86_tune;
791 /* Which instruction set architecture to use. */
792 enum processor_type ix86_arch;
794 /* Strings to hold which cpu and instruction set architecture to use. */
795 const char *ix86_tune_string; /* for -mtune=<xxx> */
796 const char *ix86_arch_string; /* for -march=<xxx> */
797 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
799 /* # of registers to use to pass arguments. */
800 const char *ix86_regparm_string;
802 /* true if sse prefetch instruction is not NOOP. */
803 int x86_prefetch_sse;
805 /* ix86_regparm_string as a number */
808 /* Alignment to use for loops and jumps: */
810 /* Power of two alignment for loops. */
811 const char *ix86_align_loops_string;
813 /* Power of two alignment for non-loop jumps. */
814 const char *ix86_align_jumps_string;
816 /* Power of two alignment for stack boundary in bytes. */
817 const char *ix86_preferred_stack_boundary_string;
819 /* Preferred alignment for stack boundary in bits. */
820 int ix86_preferred_stack_boundary;
822 /* Values 1-5: see jump.c */
823 int ix86_branch_cost;
824 const char *ix86_branch_cost_string;
826 /* Power of two alignment for functions. */
827 const char *ix86_align_funcs_string;
829 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
830 static char internal_label_prefix[16];
831 static int internal_label_prefix_len;
833 static int local_symbolic_operand (rtx, enum machine_mode);
834 static int tls_symbolic_operand_1 (rtx, enum tls_model);
835 static void output_pic_addr_const (FILE *, rtx, int);
836 static void put_condition_code (enum rtx_code, enum machine_mode,
838 static const char *get_some_local_dynamic_name (void);
839 static int get_some_local_dynamic_name_1 (rtx *, void *);
840 static rtx maybe_get_pool_constant (rtx);
841 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
842 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
844 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
845 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
847 static rtx get_thread_pointer (int);
848 static rtx legitimize_tls_address (rtx, enum tls_model, int);
849 static void get_pc_thunk_name (char [32], unsigned int);
850 static rtx gen_push (rtx);
851 static int memory_address_length (rtx addr);
852 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
853 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
854 static struct machine_function * ix86_init_machine_status (void);
855 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
856 static int ix86_nsaved_regs (void);
857 static void ix86_emit_save_regs (void);
858 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
859 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
860 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
861 static HOST_WIDE_INT ix86_GOT_alias_set (void);
862 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
863 static rtx ix86_expand_aligntest (rtx, int);
864 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
865 static int ix86_issue_rate (void);
866 static int ix86_adjust_cost (rtx, rtx, rtx, int);
867 static int ia32_use_dfa_pipeline_interface (void);
868 static int ia32_multipass_dfa_lookahead (void);
869 static void ix86_init_mmx_sse_builtins (void);
870 static rtx x86_this_parameter (tree);
871 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
872 HOST_WIDE_INT, tree);
873 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
874 static void x86_file_start (void);
875 static void ix86_reorg (void);
876 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
877 static tree ix86_build_builtin_va_list (void);
878 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
883 rtx base, index, disp;
885 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
888 static int ix86_decompose_address (rtx, struct ix86_address *);
889 static int ix86_address_cost (rtx);
890 static bool ix86_cannot_force_const_mem (rtx);
891 static rtx ix86_delegitimize_address (rtx);
893 struct builtin_description;
894 static rtx ix86_expand_sse_comi (const struct builtin_description *,
896 static rtx ix86_expand_sse_compare (const struct builtin_description *,
898 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
899 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
900 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
901 static rtx ix86_expand_store_builtin (enum insn_code, tree);
902 static rtx safe_vector_operand (rtx, enum machine_mode);
903 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
904 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
905 enum rtx_code *, enum rtx_code *);
906 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
907 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
908 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
909 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
910 static int ix86_fp_comparison_cost (enum rtx_code code);
911 static unsigned int ix86_select_alt_pic_regnum (void);
912 static int ix86_save_reg (unsigned int, int);
913 static void ix86_compute_frame_layout (struct ix86_frame *);
914 static int ix86_comp_type_attributes (tree, tree);
915 static int ix86_function_regparm (tree, tree);
916 const struct attribute_spec ix86_attribute_table[];
917 static bool ix86_function_ok_for_sibcall (tree, tree);
918 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
919 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
920 static int ix86_value_regno (enum machine_mode);
921 static bool contains_128bit_aligned_vector_p (tree);
922 static bool ix86_ms_bitfield_layout_p (tree);
923 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
924 static int extended_reg_mentioned_1 (rtx *, void *);
925 static bool ix86_rtx_costs (rtx, int, int, int *);
926 static int min_insn_size (rtx);
927 static tree ix86_md_asm_clobbers (tree clobbers);
929 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
930 static void ix86_svr3_asm_out_constructor (rtx, int);
933 /* Register class used for passing given 64bit part of the argument.
934 These represent classes as documented by the PS ABI, with the exception
935 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
936 use SF or DFmode move instead of DImode to avoid reformatting penalties.
938 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
939 whenever possible (upper half does contain padding).
941 enum x86_64_reg_class
944 X86_64_INTEGER_CLASS,
945 X86_64_INTEGERSI_CLASS,
954 static const char * const x86_64_reg_class_name[] =
955 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
957 #define MAX_CLASSES 4
958 static int classify_argument (enum machine_mode, tree,
959 enum x86_64_reg_class [MAX_CLASSES], int);
960 static int examine_argument (enum machine_mode, tree, int, int *, int *);
961 static rtx construct_container (enum machine_mode, tree, int, int, int,
963 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
964 enum x86_64_reg_class);
966 /* Table of constants used by fldpi, fldln2, etc.... */
967 static REAL_VALUE_TYPE ext_80387_constants_table [5];
968 static bool ext_80387_constants_init = 0;
969 static void init_ext_80387_constants (void);
971 /* Initialize the GCC target structure. */
972 #undef TARGET_ATTRIBUTE_TABLE
973 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
974 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
975 # undef TARGET_MERGE_DECL_ATTRIBUTES
976 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
979 #undef TARGET_COMP_TYPE_ATTRIBUTES
980 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
982 #undef TARGET_INIT_BUILTINS
983 #define TARGET_INIT_BUILTINS ix86_init_builtins
985 #undef TARGET_EXPAND_BUILTIN
986 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
988 #undef TARGET_ASM_FUNCTION_EPILOGUE
989 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
991 #undef TARGET_ASM_OPEN_PAREN
992 #define TARGET_ASM_OPEN_PAREN ""
993 #undef TARGET_ASM_CLOSE_PAREN
994 #define TARGET_ASM_CLOSE_PAREN ""
996 #undef TARGET_ASM_ALIGNED_HI_OP
997 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
998 #undef TARGET_ASM_ALIGNED_SI_OP
999 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1001 #undef TARGET_ASM_ALIGNED_DI_OP
1002 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1005 #undef TARGET_ASM_UNALIGNED_HI_OP
1006 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1007 #undef TARGET_ASM_UNALIGNED_SI_OP
1008 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1009 #undef TARGET_ASM_UNALIGNED_DI_OP
1010 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1012 #undef TARGET_SCHED_ADJUST_COST
1013 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1014 #undef TARGET_SCHED_ISSUE_RATE
1015 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1016 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
1017 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
1018 ia32_use_dfa_pipeline_interface
1019 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1020 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1021 ia32_multipass_dfa_lookahead
1023 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1024 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1027 #undef TARGET_HAVE_TLS
1028 #define TARGET_HAVE_TLS true
1030 #undef TARGET_CANNOT_FORCE_CONST_MEM
1031 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1033 #undef TARGET_DELEGITIMIZE_ADDRESS
1034 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1036 #undef TARGET_MS_BITFIELD_LAYOUT_P
1037 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1039 #undef TARGET_ASM_OUTPUT_MI_THUNK
1040 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1041 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1044 #undef TARGET_ASM_FILE_START
1045 #define TARGET_ASM_FILE_START x86_file_start
1047 #undef TARGET_RTX_COSTS
1048 #define TARGET_RTX_COSTS ix86_rtx_costs
1049 #undef TARGET_ADDRESS_COST
1050 #define TARGET_ADDRESS_COST ix86_address_cost
1052 #undef TARGET_FIXED_CONDITION_CODE_REGS
1053 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1054 #undef TARGET_CC_MODES_COMPATIBLE
1055 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1057 #undef TARGET_MACHINE_DEPENDENT_REORG
1058 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1060 #undef TARGET_BUILD_BUILTIN_VA_LIST
1061 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1063 #undef TARGET_MD_ASM_CLOBBERS
1064 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1066 #undef TARGET_PROMOTE_PROTOTYPES
1067 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1069 #undef TARGET_SETUP_INCOMING_VARARGS
1070 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1072 struct gcc_target targetm = TARGET_INITIALIZER;
1075 /* The svr4 ABI for the i386 says that records and unions are returned
1077 #ifndef DEFAULT_PCC_STRUCT_RETURN
1078 #define DEFAULT_PCC_STRUCT_RETURN 1
1081 /* Sometimes certain combinations of command options do not make
1082 sense on a particular target machine. You can define a macro
1083 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1084 defined, is executed once just after all the command options have
1087 Don't use this macro to turn on various extra optimizations for
1088 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1091 override_options (void)
1094 /* Comes from final.c -- no real reason to change it. */
1095 #define MAX_CODE_ALIGN 16
1099 const struct processor_costs *cost; /* Processor costs */
1100 const int target_enable; /* Target flags to enable. */
1101 const int target_disable; /* Target flags to disable. */
1102 const int align_loop; /* Default alignments. */
1103 const int align_loop_max_skip;
1104 const int align_jump;
1105 const int align_jump_max_skip;
1106 const int align_func;
1108 const processor_target_table[PROCESSOR_max] =
1110 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1111 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1112 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1113 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1114 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1115 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1116 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1117 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1118 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1121 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1124 const char *const name; /* processor name or nickname. */
1125 const enum processor_type processor;
1126 const enum pta_flags
1132 PTA_PREFETCH_SSE = 16,
1138 const processor_alias_table[] =
1140 {"i386", PROCESSOR_I386, 0},
1141 {"i486", PROCESSOR_I486, 0},
1142 {"i586", PROCESSOR_PENTIUM, 0},
1143 {"pentium", PROCESSOR_PENTIUM, 0},
1144 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1145 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1146 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1147 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1148 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1149 {"i686", PROCESSOR_PENTIUMPRO, 0},
1150 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1151 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1152 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1153 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1154 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1155 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1156 | PTA_MMX | PTA_PREFETCH_SSE},
1157 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1158 | PTA_MMX | PTA_PREFETCH_SSE},
1159 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1160 | PTA_MMX | PTA_PREFETCH_SSE},
1161 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1162 | PTA_MMX | PTA_PREFETCH_SSE},
1163 {"k6", PROCESSOR_K6, PTA_MMX},
1164 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1165 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1166 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1168 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1169 | PTA_3DNOW | PTA_3DNOW_A},
1170 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1171 | PTA_3DNOW_A | PTA_SSE},
1172 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1173 | PTA_3DNOW_A | PTA_SSE},
1174 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1175 | PTA_3DNOW_A | PTA_SSE},
1176 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1177 | PTA_SSE | PTA_SSE2 },
1178 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1179 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1180 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1181 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1182 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1183 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1184 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1185 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1188 int const pta_size = ARRAY_SIZE (processor_alias_table);
1190 /* Set the default values for switches whose default depends on TARGET_64BIT
1191 in case they weren't overwritten by command line options. */
1194 if (flag_omit_frame_pointer == 2)
1195 flag_omit_frame_pointer = 1;
1196 if (flag_asynchronous_unwind_tables == 2)
1197 flag_asynchronous_unwind_tables = 1;
1198 if (flag_pcc_struct_return == 2)
1199 flag_pcc_struct_return = 0;
1203 if (flag_omit_frame_pointer == 2)
1204 flag_omit_frame_pointer = 0;
1205 if (flag_asynchronous_unwind_tables == 2)
1206 flag_asynchronous_unwind_tables = 0;
1207 if (flag_pcc_struct_return == 2)
1208 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1211 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1212 SUBTARGET_OVERRIDE_OPTIONS;
1215 if (!ix86_tune_string && ix86_arch_string)
1216 ix86_tune_string = ix86_arch_string;
1217 if (!ix86_tune_string)
1218 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1219 if (!ix86_arch_string)
1220 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1222 if (ix86_cmodel_string != 0)
1224 if (!strcmp (ix86_cmodel_string, "small"))
1225 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1227 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1228 else if (!strcmp (ix86_cmodel_string, "32"))
1229 ix86_cmodel = CM_32;
1230 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1231 ix86_cmodel = CM_KERNEL;
1232 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1233 ix86_cmodel = CM_MEDIUM;
1234 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1235 ix86_cmodel = CM_LARGE;
1237 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1241 ix86_cmodel = CM_32;
1243 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1245 if (ix86_asm_string != 0)
1247 if (!strcmp (ix86_asm_string, "intel"))
1248 ix86_asm_dialect = ASM_INTEL;
1249 else if (!strcmp (ix86_asm_string, "att"))
1250 ix86_asm_dialect = ASM_ATT;
1252 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1254 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1255 error ("code model `%s' not supported in the %s bit mode",
1256 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1257 if (ix86_cmodel == CM_LARGE)
1258 sorry ("code model `large' not supported yet");
1259 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1260 sorry ("%i-bit mode not compiled in",
1261 (target_flags & MASK_64BIT) ? 64 : 32);
1263 for (i = 0; i < pta_size; i++)
1264 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1266 ix86_arch = processor_alias_table[i].processor;
1267 /* Default cpu tuning to the architecture. */
1268 ix86_tune = ix86_arch;
1269 if (processor_alias_table[i].flags & PTA_MMX
1270 && !(target_flags_explicit & MASK_MMX))
1271 target_flags |= MASK_MMX;
1272 if (processor_alias_table[i].flags & PTA_3DNOW
1273 && !(target_flags_explicit & MASK_3DNOW))
1274 target_flags |= MASK_3DNOW;
1275 if (processor_alias_table[i].flags & PTA_3DNOW_A
1276 && !(target_flags_explicit & MASK_3DNOW_A))
1277 target_flags |= MASK_3DNOW_A;
1278 if (processor_alias_table[i].flags & PTA_SSE
1279 && !(target_flags_explicit & MASK_SSE))
1280 target_flags |= MASK_SSE;
1281 if (processor_alias_table[i].flags & PTA_SSE2
1282 && !(target_flags_explicit & MASK_SSE2))
1283 target_flags |= MASK_SSE2;
1284 if (processor_alias_table[i].flags & PTA_SSE3
1285 && !(target_flags_explicit & MASK_SSE3))
1286 target_flags |= MASK_SSE3;
1287 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1288 x86_prefetch_sse = true;
1289 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1290 error ("CPU you selected does not support x86-64 instruction set");
1295 error ("bad value (%s) for -march= switch", ix86_arch_string);
1297 for (i = 0; i < pta_size; i++)
1298 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1300 ix86_tune = processor_alias_table[i].processor;
1301 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1302 error ("CPU you selected does not support x86-64 instruction set");
1305 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1306 x86_prefetch_sse = true;
1308 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1311 ix86_cost = &size_cost;
1313 ix86_cost = processor_target_table[ix86_tune].cost;
1314 target_flags |= processor_target_table[ix86_tune].target_enable;
1315 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1317 /* Arrange to set up i386_stack_locals for all functions. */
1318 init_machine_status = ix86_init_machine_status;
1320 /* Validate -mregparm= value. */
1321 if (ix86_regparm_string)
1323 i = atoi (ix86_regparm_string);
1324 if (i < 0 || i > REGPARM_MAX)
1325 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1331 ix86_regparm = REGPARM_MAX;
1333 /* If the user has provided any of the -malign-* options,
1334 warn and use that value only if -falign-* is not set.
1335 Remove this code in GCC 3.2 or later. */
1336 if (ix86_align_loops_string)
1338 warning ("-malign-loops is obsolete, use -falign-loops");
1339 if (align_loops == 0)
1341 i = atoi (ix86_align_loops_string);
1342 if (i < 0 || i > MAX_CODE_ALIGN)
1343 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1345 align_loops = 1 << i;
1349 if (ix86_align_jumps_string)
1351 warning ("-malign-jumps is obsolete, use -falign-jumps");
1352 if (align_jumps == 0)
1354 i = atoi (ix86_align_jumps_string);
1355 if (i < 0 || i > MAX_CODE_ALIGN)
1356 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1358 align_jumps = 1 << i;
1362 if (ix86_align_funcs_string)
1364 warning ("-malign-functions is obsolete, use -falign-functions");
1365 if (align_functions == 0)
1367 i = atoi (ix86_align_funcs_string);
1368 if (i < 0 || i > MAX_CODE_ALIGN)
1369 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1371 align_functions = 1 << i;
1375 /* Default align_* from the processor table. */
1376 if (align_loops == 0)
1378 align_loops = processor_target_table[ix86_tune].align_loop;
1379 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1381 if (align_jumps == 0)
1383 align_jumps = processor_target_table[ix86_tune].align_jump;
1384 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1386 if (align_functions == 0)
1388 align_functions = processor_target_table[ix86_tune].align_func;
1391 /* Validate -mpreferred-stack-boundary= value, or provide default.
1392 The default of 128 bits is for Pentium III's SSE __m128, but we
1393 don't want additional code to keep the stack aligned when
1394 optimizing for code size. */
1395 ix86_preferred_stack_boundary = (optimize_size
1396 ? TARGET_64BIT ? 128 : 32
1398 if (ix86_preferred_stack_boundary_string)
1400 i = atoi (ix86_preferred_stack_boundary_string);
1401 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1402 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1403 TARGET_64BIT ? 4 : 2);
1405 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1408 /* Validate -mbranch-cost= value, or provide default. */
1409 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1410 if (ix86_branch_cost_string)
1412 i = atoi (ix86_branch_cost_string);
1414 error ("-mbranch-cost=%d is not between 0 and 5", i);
1416 ix86_branch_cost = i;
1419 if (ix86_tls_dialect_string)
1421 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1422 ix86_tls_dialect = TLS_DIALECT_GNU;
1423 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1424 ix86_tls_dialect = TLS_DIALECT_SUN;
1426 error ("bad value (%s) for -mtls-dialect= switch",
1427 ix86_tls_dialect_string);
1430 /* Keep nonleaf frame pointers. */
1431 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1432 flag_omit_frame_pointer = 1;
1434 /* If we're doing fast math, we don't care about comparison order
1435 wrt NaNs. This lets us use a shorter comparison sequence. */
1436 if (flag_unsafe_math_optimizations)
1437 target_flags &= ~MASK_IEEE_FP;
1439 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1440 since the insns won't need emulation. */
1441 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1442 target_flags &= ~MASK_NO_FANCY_MATH_387;
1444 /* Turn on SSE2 builtins for -msse3. */
1446 target_flags |= MASK_SSE2;
1448 /* Turn on SSE builtins for -msse2. */
1450 target_flags |= MASK_SSE;
1454 if (TARGET_ALIGN_DOUBLE)
1455 error ("-malign-double makes no sense in the 64bit mode");
1457 error ("-mrtd calling convention not supported in the 64bit mode");
1458 /* Enable by default the SSE and MMX builtins. */
1459 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1460 ix86_fpmath = FPMATH_SSE;
1464 ix86_fpmath = FPMATH_387;
1465 /* i386 ABI does not specify red zone. It still makes sense to use it
1466 when programmer takes care to stack from being destroyed. */
1467 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1468 target_flags |= MASK_NO_RED_ZONE;
1471 if (ix86_fpmath_string != 0)
1473 if (! strcmp (ix86_fpmath_string, "387"))
1474 ix86_fpmath = FPMATH_387;
1475 else if (! strcmp (ix86_fpmath_string, "sse"))
1479 warning ("SSE instruction set disabled, using 387 arithmetics");
1480 ix86_fpmath = FPMATH_387;
1483 ix86_fpmath = FPMATH_SSE;
1485 else if (! strcmp (ix86_fpmath_string, "387,sse")
1486 || ! strcmp (ix86_fpmath_string, "sse,387"))
1490 warning ("SSE instruction set disabled, using 387 arithmetics");
1491 ix86_fpmath = FPMATH_387;
1493 else if (!TARGET_80387)
1495 warning ("387 instruction set disabled, using SSE arithmetics");
1496 ix86_fpmath = FPMATH_SSE;
1499 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1502 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1505 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1509 target_flags |= MASK_MMX;
1510 x86_prefetch_sse = true;
1513 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1516 target_flags |= MASK_MMX;
1517 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1518 extensions it adds. */
1519 if (x86_3dnow_a & (1 << ix86_arch))
1520 target_flags |= MASK_3DNOW_A;
1522 if ((x86_accumulate_outgoing_args & TUNEMASK)
1523 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1525 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1527 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1530 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1531 p = strchr (internal_label_prefix, 'X');
1532 internal_label_prefix_len = p - internal_label_prefix;
1538 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1540 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1541 make the problem with not enough registers even worse. */
1542 #ifdef INSN_SCHEDULING
1544 flag_schedule_insns = 0;
1547 /* The default values of these switches depend on the TARGET_64BIT
1548 that is not known at this moment. Mark these values with 2 and
1549 let user the to override these. In case there is no command line option
1550 specifying them, we will set the defaults in override_options. */
1552 flag_omit_frame_pointer = 2;
1553 flag_pcc_struct_return = 2;
1554 flag_asynchronous_unwind_tables = 2;
1557 /* Table of valid machine attributes. */
1558 const struct attribute_spec ix86_attribute_table[] =
1560 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1561 /* Stdcall attribute says callee is responsible for popping arguments
1562 if they are not variable. */
1563 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1564 /* Fastcall attribute says callee is responsible for popping arguments
1565 if they are not variable. */
1566 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1567 /* Cdecl attribute says the callee is a normal C declaration */
1568 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1569 /* Regparm attribute specifies how many integer arguments are to be
1570 passed in registers. */
1571 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1572 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1573 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1574 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1575 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1577 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1578 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1579 { NULL, 0, 0, false, false, false, NULL }
1582 /* Decide whether we can make a sibling call to a function. DECL is the
1583 declaration of the function being targeted by the call and EXP is the
1584 CALL_EXPR representing the call. */
1587 ix86_function_ok_for_sibcall (tree decl, tree exp)
1589 /* If we are generating position-independent code, we cannot sibcall
1590 optimize any indirect call, or a direct call to a global function,
1591 as the PLT requires %ebx be live. */
1592 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1595 /* If we are returning floats on the 80387 register stack, we cannot
1596 make a sibcall from a function that doesn't return a float to a
1597 function that does or, conversely, from a function that does return
1598 a float to a function that doesn't; the necessary stack adjustment
1599 would not be executed. */
1600 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1601 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1604 /* If this call is indirect, we'll need to be able to use a call-clobbered
1605 register for the address of the target function. Make sure that all
1606 such registers are not used for passing parameters. */
1607 if (!decl && !TARGET_64BIT)
1611 /* We're looking at the CALL_EXPR, we need the type of the function. */
1612 type = TREE_OPERAND (exp, 0); /* pointer expression */
1613 type = TREE_TYPE (type); /* pointer type */
1614 type = TREE_TYPE (type); /* function type */
1616 if (ix86_function_regparm (type, NULL) >= 3)
1618 /* ??? Need to count the actual number of registers to be used,
1619 not the possible number of registers. Fix later. */
1624 /* Otherwise okay. That also includes certain types of indirect calls. */
1628 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1629 arguments as in struct attribute_spec.handler. */
1631 ix86_handle_cdecl_attribute (tree *node, tree name,
1632 tree args ATTRIBUTE_UNUSED,
1633 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1635 if (TREE_CODE (*node) != FUNCTION_TYPE
1636 && TREE_CODE (*node) != METHOD_TYPE
1637 && TREE_CODE (*node) != FIELD_DECL
1638 && TREE_CODE (*node) != TYPE_DECL)
1640 warning ("`%s' attribute only applies to functions",
1641 IDENTIFIER_POINTER (name));
1642 *no_add_attrs = true;
1646 if (is_attribute_p ("fastcall", name))
1648 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1650 error ("fastcall and stdcall attributes are not compatible");
1652 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1654 error ("fastcall and regparm attributes are not compatible");
1657 else if (is_attribute_p ("stdcall", name))
1659 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1661 error ("fastcall and stdcall attributes are not compatible");
1668 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1669 *no_add_attrs = true;
1675 /* Handle a "regparm" attribute;
1676 arguments as in struct attribute_spec.handler. */
1678 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1679 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1681 if (TREE_CODE (*node) != FUNCTION_TYPE
1682 && TREE_CODE (*node) != METHOD_TYPE
1683 && TREE_CODE (*node) != FIELD_DECL
1684 && TREE_CODE (*node) != TYPE_DECL)
1686 warning ("`%s' attribute only applies to functions",
1687 IDENTIFIER_POINTER (name));
1688 *no_add_attrs = true;
1694 cst = TREE_VALUE (args);
1695 if (TREE_CODE (cst) != INTEGER_CST)
1697 warning ("`%s' attribute requires an integer constant argument",
1698 IDENTIFIER_POINTER (name));
1699 *no_add_attrs = true;
1701 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1703 warning ("argument to `%s' attribute larger than %d",
1704 IDENTIFIER_POINTER (name), REGPARM_MAX);
1705 *no_add_attrs = true;
1708 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1710 error ("fastcall and regparm attributes are not compatible");
1717 /* Return 0 if the attributes for two types are incompatible, 1 if they
1718 are compatible, and 2 if they are nearly compatible (which causes a
1719 warning to be generated). */
1722 ix86_comp_type_attributes (tree type1, tree type2)
1724 /* Check for mismatch of non-default calling convention. */
1725 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1727 if (TREE_CODE (type1) != FUNCTION_TYPE)
1730 /* Check for mismatched fastcall types */
1731 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1732 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1735 /* Check for mismatched return types (cdecl vs stdcall). */
1736 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1737 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1739 if (ix86_function_regparm (type1, NULL)
1740 != ix86_function_regparm (type2, NULL))
1745 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1746 DECL may be NULL when calling function indirectly
1747 or considering a libcall. */
1750 ix86_function_regparm (tree type, tree decl)
1753 int regparm = ix86_regparm;
1754 bool user_convention = false;
1758 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1761 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1762 user_convention = true;
1765 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1768 user_convention = true;
1771 /* Use register calling convention for local functions when possible. */
1772 if (!TARGET_64BIT && !user_convention && decl
1773 && flag_unit_at_a_time && !profile_flag)
1775 struct cgraph_local_info *i = cgraph_local_info (decl);
1778 /* We can't use regparm(3) for nested functions as these use
1779 static chain pointer in third argument. */
1780 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1790 /* Return true if EAX is live at the start of the function. Used by
1791 ix86_expand_prologue to determine if we need special help before
1792 calling allocate_stack_worker. */
1795 ix86_eax_live_at_start_p (void)
1797 /* Cheat. Don't bother working forward from ix86_function_regparm
1798 to the function type to whether an actual argument is located in
1799 eax. Instead just look at cfg info, which is still close enough
1800 to correct at this point. This gives false positives for broken
1801 functions that might use uninitialized data that happens to be
1802 allocated in eax, but who cares? */
1803 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1806 /* Value is the number of bytes of arguments automatically
1807 popped when returning from a subroutine call.
1808 FUNDECL is the declaration node of the function (as a tree),
1809 FUNTYPE is the data type of the function (as a tree),
1810 or for a library call it is an identifier node for the subroutine name.
1811 SIZE is the number of bytes of arguments passed on the stack.
1813 On the 80386, the RTD insn may be used to pop them if the number
1814 of args is fixed, but if the number is variable then the caller
1815 must pop them all. RTD can't be used for library calls now
1816 because the library is compiled with the Unix compiler.
1817 Use of RTD is a selectable option, since it is incompatible with
1818 standard Unix calling sequences. If the option is not selected,
1819 the caller must always pop the args.
1821 The attribute stdcall is equivalent to RTD on a per module basis. */
1824 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1826 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1828 /* Cdecl functions override -mrtd, and never pop the stack. */
1829 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1831 /* Stdcall and fastcall functions will pop the stack if not
1833 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1834 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1838 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1839 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1840 == void_type_node)))
1844 /* Lose any fake structure return argument if it is passed on the stack. */
1845 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1848 int nregs = ix86_function_regparm (funtype, fundecl);
1851 return GET_MODE_SIZE (Pmode);
1857 /* Argument support functions. */
1859 /* Return true when register may be used to pass function parameters. */
1861 ix86_function_arg_regno_p (int regno)
1865 return (regno < REGPARM_MAX
1866 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1867 if (SSE_REGNO_P (regno) && TARGET_SSE)
1869 /* RAX is used as hidden argument to va_arg functions. */
1872 for (i = 0; i < REGPARM_MAX; i++)
1873 if (regno == x86_64_int_parameter_registers[i])
1878 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1879 for a call to a function whose data type is FNTYPE.
1880 For a library call, FNTYPE is 0. */
1883 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1884 tree fntype, /* tree ptr for function decl */
1885 rtx libname, /* SYMBOL_REF of library name or 0 */
1888 static CUMULATIVE_ARGS zero_cum;
1889 tree param, next_param;
1891 if (TARGET_DEBUG_ARG)
1893 fprintf (stderr, "\ninit_cumulative_args (");
1895 fprintf (stderr, "fntype code = %s, ret code = %s",
1896 tree_code_name[(int) TREE_CODE (fntype)],
1897 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1899 fprintf (stderr, "no fntype");
1902 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1907 /* Set up the number of registers to use for passing arguments. */
1909 cum->nregs = ix86_function_regparm (fntype, fndecl);
1911 cum->nregs = ix86_regparm;
1912 cum->sse_nregs = SSE_REGPARM_MAX;
1913 cum->mmx_nregs = MMX_REGPARM_MAX;
1914 cum->warn_sse = true;
1915 cum->warn_mmx = true;
1916 cum->maybe_vaarg = false;
1918 /* Use ecx and edx registers if function has fastcall attribute */
1919 if (fntype && !TARGET_64BIT)
1921 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1929 /* Determine if this function has variable arguments. This is
1930 indicated by the last argument being 'void_type_mode' if there
1931 are no variable arguments. If there are variable arguments, then
1932 we won't pass anything in registers */
1934 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1936 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1937 param != 0; param = next_param)
1939 next_param = TREE_CHAIN (param);
1940 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1951 cum->maybe_vaarg = true;
1955 if ((!fntype && !libname)
1956 || (fntype && !TYPE_ARG_TYPES (fntype)))
1957 cum->maybe_vaarg = 1;
1959 if (TARGET_DEBUG_ARG)
1960 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1965 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1966 of this code is to classify each 8bytes of incoming argument by the register
1967 class and assign registers accordingly. */
1969 /* Return the union class of CLASS1 and CLASS2.
1970 See the x86-64 PS ABI for details. */
1972 static enum x86_64_reg_class
1973 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1975 /* Rule #1: If both classes are equal, this is the resulting class. */
1976 if (class1 == class2)
1979 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1981 if (class1 == X86_64_NO_CLASS)
1983 if (class2 == X86_64_NO_CLASS)
1986 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1987 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1988 return X86_64_MEMORY_CLASS;
1990 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1991 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1992 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1993 return X86_64_INTEGERSI_CLASS;
1994 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1995 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1996 return X86_64_INTEGER_CLASS;
1998 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1999 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2000 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2001 return X86_64_MEMORY_CLASS;
2003 /* Rule #6: Otherwise class SSE is used. */
2004 return X86_64_SSE_CLASS;
2007 /* Classify the argument of type TYPE and mode MODE.
2008 CLASSES will be filled by the register class used to pass each word
2009 of the operand. The number of words is returned. In case the parameter
2010 should be passed in memory, 0 is returned. As a special case for zero
2011 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2013 BIT_OFFSET is used internally for handling records and specifies offset
2014 of the offset in bits modulo 256 to avoid overflow cases.
2016 See the x86-64 PS ABI for details.
2020 classify_argument (enum machine_mode mode, tree type,
2021 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2023 HOST_WIDE_INT bytes =
2024 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2025 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2027 /* Variable sized entities are always passed/returned in memory. */
2031 if (mode != VOIDmode
2032 && MUST_PASS_IN_STACK (mode, type))
2035 if (type && AGGREGATE_TYPE_P (type))
2039 enum x86_64_reg_class subclasses[MAX_CLASSES];
2041 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2045 for (i = 0; i < words; i++)
2046 classes[i] = X86_64_NO_CLASS;
2048 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2049 signalize memory class, so handle it as special case. */
2052 classes[0] = X86_64_NO_CLASS;
2056 /* Classify each field of record and merge classes. */
2057 if (TREE_CODE (type) == RECORD_TYPE)
2059 /* For classes first merge in the field of the subclasses. */
2060 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2062 tree bases = TYPE_BINFO_BASETYPES (type);
2063 int n_bases = TREE_VEC_LENGTH (bases);
2066 for (i = 0; i < n_bases; ++i)
2068 tree binfo = TREE_VEC_ELT (bases, i);
2070 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2071 tree type = BINFO_TYPE (binfo);
2073 num = classify_argument (TYPE_MODE (type),
2075 (offset + bit_offset) % 256);
2078 for (i = 0; i < num; i++)
2080 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2082 merge_classes (subclasses[i], classes[i + pos]);
2086 /* And now merge the fields of structure. */
2087 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2089 if (TREE_CODE (field) == FIELD_DECL)
2093 /* Bitfields are always classified as integer. Handle them
2094 early, since later code would consider them to be
2095 misaligned integers. */
2096 if (DECL_BIT_FIELD (field))
2098 for (i = int_bit_position (field) / 8 / 8;
2099 i < (int_bit_position (field)
2100 + tree_low_cst (DECL_SIZE (field), 0)
2103 merge_classes (X86_64_INTEGER_CLASS,
2108 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2109 TREE_TYPE (field), subclasses,
2110 (int_bit_position (field)
2111 + bit_offset) % 256);
2114 for (i = 0; i < num; i++)
2117 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2119 merge_classes (subclasses[i], classes[i + pos]);
2125 /* Arrays are handled as small records. */
2126 else if (TREE_CODE (type) == ARRAY_TYPE)
2129 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2130 TREE_TYPE (type), subclasses, bit_offset);
2134 /* The partial classes are now full classes. */
2135 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2136 subclasses[0] = X86_64_SSE_CLASS;
2137 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2138 subclasses[0] = X86_64_INTEGER_CLASS;
2140 for (i = 0; i < words; i++)
2141 classes[i] = subclasses[i % num];
2143 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2144 else if (TREE_CODE (type) == UNION_TYPE
2145 || TREE_CODE (type) == QUAL_UNION_TYPE)
2147 /* For classes first merge in the field of the subclasses. */
2148 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2150 tree bases = TYPE_BINFO_BASETYPES (type);
2151 int n_bases = TREE_VEC_LENGTH (bases);
2154 for (i = 0; i < n_bases; ++i)
2156 tree binfo = TREE_VEC_ELT (bases, i);
2158 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2159 tree type = BINFO_TYPE (binfo);
2161 num = classify_argument (TYPE_MODE (type),
2163 (offset + (bit_offset % 64)) % 256);
2166 for (i = 0; i < num; i++)
2168 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2170 merge_classes (subclasses[i], classes[i + pos]);
2174 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2176 if (TREE_CODE (field) == FIELD_DECL)
2179 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2180 TREE_TYPE (field), subclasses,
2184 for (i = 0; i < num; i++)
2185 classes[i] = merge_classes (subclasses[i], classes[i]);
2189 else if (TREE_CODE (type) == SET_TYPE)
2193 classes[0] = X86_64_INTEGERSI_CLASS;
2196 else if (bytes <= 8)
2198 classes[0] = X86_64_INTEGER_CLASS;
2201 else if (bytes <= 12)
2203 classes[0] = X86_64_INTEGER_CLASS;
2204 classes[1] = X86_64_INTEGERSI_CLASS;
2209 classes[0] = X86_64_INTEGER_CLASS;
2210 classes[1] = X86_64_INTEGER_CLASS;
2217 /* Final merger cleanup. */
2218 for (i = 0; i < words; i++)
2220 /* If one class is MEMORY, everything should be passed in
2222 if (classes[i] == X86_64_MEMORY_CLASS)
2225 /* The X86_64_SSEUP_CLASS should be always preceded by
2226 X86_64_SSE_CLASS. */
2227 if (classes[i] == X86_64_SSEUP_CLASS
2228 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2229 classes[i] = X86_64_SSE_CLASS;
2231 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2232 if (classes[i] == X86_64_X87UP_CLASS
2233 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2234 classes[i] = X86_64_SSE_CLASS;
2239 /* Compute alignment needed. We align all types to natural boundaries with
2240 exception of XFmode that is aligned to 64bits. */
2241 if (mode != VOIDmode && mode != BLKmode)
2243 int mode_alignment = GET_MODE_BITSIZE (mode);
2246 mode_alignment = 128;
2247 else if (mode == XCmode)
2248 mode_alignment = 256;
2249 /* Misaligned fields are always returned in memory. */
2250 if (bit_offset % mode_alignment)
2254 /* Classification of atomic types. */
2264 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2265 classes[0] = X86_64_INTEGERSI_CLASS;
2267 classes[0] = X86_64_INTEGER_CLASS;
2271 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2274 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2275 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2278 if (!(bit_offset % 64))
2279 classes[0] = X86_64_SSESF_CLASS;
2281 classes[0] = X86_64_SSE_CLASS;
2284 classes[0] = X86_64_SSEDF_CLASS;
2287 classes[0] = X86_64_X87_CLASS;
2288 classes[1] = X86_64_X87UP_CLASS;
2294 classes[0] = X86_64_X87_CLASS;
2295 classes[1] = X86_64_X87UP_CLASS;
2296 classes[2] = X86_64_X87_CLASS;
2297 classes[3] = X86_64_X87UP_CLASS;
2300 classes[0] = X86_64_SSEDF_CLASS;
2301 classes[1] = X86_64_SSEDF_CLASS;
2304 classes[0] = X86_64_SSE_CLASS;
2312 classes[0] = X86_64_SSE_CLASS;
2313 classes[1] = X86_64_SSEUP_CLASS;
2328 /* Examine the argument and return set number of register required in each
2329 class. Return 0 iff parameter should be passed in memory. */
2331 examine_argument (enum machine_mode mode, tree type, int in_return,
2332 int *int_nregs, int *sse_nregs)
2334 enum x86_64_reg_class class[MAX_CLASSES];
2335 int n = classify_argument (mode, type, class, 0);
2341 for (n--; n >= 0; n--)
2344 case X86_64_INTEGER_CLASS:
2345 case X86_64_INTEGERSI_CLASS:
2348 case X86_64_SSE_CLASS:
2349 case X86_64_SSESF_CLASS:
2350 case X86_64_SSEDF_CLASS:
2353 case X86_64_NO_CLASS:
2354 case X86_64_SSEUP_CLASS:
2356 case X86_64_X87_CLASS:
2357 case X86_64_X87UP_CLASS:
2361 case X86_64_MEMORY_CLASS:
2366 /* Construct container for the argument used by GCC interface. See
2367 FUNCTION_ARG for the detailed description. */
2369 construct_container (enum machine_mode mode, tree type, int in_return,
2370 int nintregs, int nsseregs, const int * intreg,
2373 enum machine_mode tmpmode;
2375 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2376 enum x86_64_reg_class class[MAX_CLASSES];
2380 int needed_sseregs, needed_intregs;
2381 rtx exp[MAX_CLASSES];
2384 n = classify_argument (mode, type, class, 0);
2385 if (TARGET_DEBUG_ARG)
2388 fprintf (stderr, "Memory class\n");
2391 fprintf (stderr, "Classes:");
2392 for (i = 0; i < n; i++)
2394 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2396 fprintf (stderr, "\n");
2401 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2403 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2406 /* First construct simple cases. Avoid SCmode, since we want to use
2407 single register to pass this type. */
2408 if (n == 1 && mode != SCmode)
2411 case X86_64_INTEGER_CLASS:
2412 case X86_64_INTEGERSI_CLASS:
2413 return gen_rtx_REG (mode, intreg[0]);
2414 case X86_64_SSE_CLASS:
2415 case X86_64_SSESF_CLASS:
2416 case X86_64_SSEDF_CLASS:
2417 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2418 case X86_64_X87_CLASS:
2419 return gen_rtx_REG (mode, FIRST_STACK_REG);
2420 case X86_64_NO_CLASS:
2421 /* Zero sized array, struct or class. */
2426 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2427 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2429 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2430 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2431 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2432 && class[1] == X86_64_INTEGER_CLASS
2433 && (mode == CDImode || mode == TImode || mode == TFmode)
2434 && intreg[0] + 1 == intreg[1])
2435 return gen_rtx_REG (mode, intreg[0]);
2437 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2438 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2439 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2441 /* Otherwise figure out the entries of the PARALLEL. */
2442 for (i = 0; i < n; i++)
2446 case X86_64_NO_CLASS:
2448 case X86_64_INTEGER_CLASS:
2449 case X86_64_INTEGERSI_CLASS:
2450 /* Merge TImodes on aligned occasions here too. */
2451 if (i * 8 + 8 > bytes)
2452 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2453 else if (class[i] == X86_64_INTEGERSI_CLASS)
2457 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2458 if (tmpmode == BLKmode)
2460 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2461 gen_rtx_REG (tmpmode, *intreg),
2465 case X86_64_SSESF_CLASS:
2466 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2467 gen_rtx_REG (SFmode,
2468 SSE_REGNO (sse_regno)),
2472 case X86_64_SSEDF_CLASS:
2473 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2474 gen_rtx_REG (DFmode,
2475 SSE_REGNO (sse_regno)),
2479 case X86_64_SSE_CLASS:
2480 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2484 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2485 gen_rtx_REG (tmpmode,
2486 SSE_REGNO (sse_regno)),
2488 if (tmpmode == TImode)
2496 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2497 for (i = 0; i < nexps; i++)
2498 XVECEXP (ret, 0, i) = exp [i];
2502 /* Update the data in CUM to advance over an argument
2503 of mode MODE and data type TYPE.
2504 (TYPE is null for libcalls where that information may not be available.) */
2507 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2508 enum machine_mode mode, /* current arg mode */
2509 tree type, /* type of the argument or 0 if lib support */
2510 int named) /* whether or not the argument was named */
2513 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2514 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2516 if (TARGET_DEBUG_ARG)
2518 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2519 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2522 int int_nregs, sse_nregs;
2523 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2524 cum->words += words;
2525 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2527 cum->nregs -= int_nregs;
2528 cum->sse_nregs -= sse_nregs;
2529 cum->regno += int_nregs;
2530 cum->sse_regno += sse_nregs;
2533 cum->words += words;
2537 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2538 && (!type || !AGGREGATE_TYPE_P (type)))
2540 cum->sse_words += words;
2541 cum->sse_nregs -= 1;
2542 cum->sse_regno += 1;
2543 if (cum->sse_nregs <= 0)
2549 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2550 && (!type || !AGGREGATE_TYPE_P (type)))
2552 cum->mmx_words += words;
2553 cum->mmx_nregs -= 1;
2554 cum->mmx_regno += 1;
2555 if (cum->mmx_nregs <= 0)
2563 cum->words += words;
2564 cum->nregs -= words;
2565 cum->regno += words;
2567 if (cum->nregs <= 0)
2577 /* Define where to put the arguments to a function.
2578 Value is zero to push the argument on the stack,
2579 or a hard register in which to store the argument.
2581 MODE is the argument's machine mode.
2582 TYPE is the data type of the argument (as a tree).
2583 This is null for libcalls where that information may
2585 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2586 the preceding args and about the function being called.
2587 NAMED is nonzero if this argument is a named parameter
2588 (otherwise it is an extra parameter matching an ellipsis). */
2591 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2592 enum machine_mode mode, /* current arg mode */
2593 tree type, /* type of the argument or 0 if lib support */
2594 int named) /* != 0 for normal args, == 0 for ... args */
2598 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2599 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2600 static bool warnedsse, warnedmmx;
2602 /* Handle a hidden AL argument containing number of registers for varargs
2603 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2605 if (mode == VOIDmode)
2608 return GEN_INT (cum->maybe_vaarg
2609 ? (cum->sse_nregs < 0
2617 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2618 &x86_64_int_parameter_registers [cum->regno],
2623 /* For now, pass fp/complex values on the stack. */
2635 if (words <= cum->nregs)
2637 int regno = cum->regno;
2639 /* Fastcall allocates the first two DWORD (SImode) or
2640 smaller arguments to ECX and EDX. */
2643 if (mode == BLKmode || mode == DImode)
2646 /* ECX not EAX is the first allocated register. */
2650 ret = gen_rtx_REG (mode, regno);
2660 if (!type || !AGGREGATE_TYPE_P (type))
2662 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2665 warning ("SSE vector argument without SSE enabled "
2669 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2676 if (!type || !AGGREGATE_TYPE_P (type))
2678 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2681 warning ("MMX vector argument without MMX enabled "
2685 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2690 if (TARGET_DEBUG_ARG)
2693 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2694 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2697 print_simple_rtl (stderr, ret);
2699 fprintf (stderr, ", stack");
2701 fprintf (stderr, " )\n");
2707 /* A C expression that indicates when an argument must be passed by
2708 reference. If nonzero for an argument, a copy of that argument is
2709 made in memory and a pointer to the argument is passed instead of
2710 the argument itself. The pointer is passed in whatever way is
2711 appropriate for passing a pointer to that type. */
2714 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2715 enum machine_mode mode ATTRIBUTE_UNUSED,
2716 tree type, int named ATTRIBUTE_UNUSED)
2721 if (type && int_size_in_bytes (type) == -1)
2723 if (TARGET_DEBUG_ARG)
2724 fprintf (stderr, "function_arg_pass_by_reference\n");
2731 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2734 contains_128bit_aligned_vector_p (tree type)
2736 enum machine_mode mode = TYPE_MODE (type);
2737 if (SSE_REG_MODE_P (mode)
2738 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2740 if (TYPE_ALIGN (type) < 128)
2743 if (AGGREGATE_TYPE_P (type))
2745 /* Walk the aggregates recursively. */
2746 if (TREE_CODE (type) == RECORD_TYPE
2747 || TREE_CODE (type) == UNION_TYPE
2748 || TREE_CODE (type) == QUAL_UNION_TYPE)
2752 if (TYPE_BINFO (type) != NULL
2753 && TYPE_BINFO_BASETYPES (type) != NULL)
2755 tree bases = TYPE_BINFO_BASETYPES (type);
2756 int n_bases = TREE_VEC_LENGTH (bases);
2759 for (i = 0; i < n_bases; ++i)
2761 tree binfo = TREE_VEC_ELT (bases, i);
2762 tree type = BINFO_TYPE (binfo);
2764 if (contains_128bit_aligned_vector_p (type))
2768 /* And now merge the fields of structure. */
2769 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2771 if (TREE_CODE (field) == FIELD_DECL
2772 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2776 /* Just for use if some languages passes arrays by value. */
2777 else if (TREE_CODE (type) == ARRAY_TYPE)
2779 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2788 /* Gives the alignment boundary, in bits, of an argument with the
2789 specified mode and type. */
2792 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2796 align = TYPE_ALIGN (type);
2798 align = GET_MODE_ALIGNMENT (mode);
2799 if (align < PARM_BOUNDARY)
2800 align = PARM_BOUNDARY;
2803 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2804 make an exception for SSE modes since these require 128bit
2807 The handling here differs from field_alignment. ICC aligns MMX
2808 arguments to 4 byte boundaries, while structure fields are aligned
2809 to 8 byte boundaries. */
2812 if (!SSE_REG_MODE_P (mode))
2813 align = PARM_BOUNDARY;
2817 if (!contains_128bit_aligned_vector_p (type))
2818 align = PARM_BOUNDARY;
2826 /* Return true if N is a possible register number of function value. */
2828 ix86_function_value_regno_p (int regno)
2832 return ((regno) == 0
2833 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2834 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2836 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2837 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2838 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2841 /* Define how to find the value returned by a function.
2842 VALTYPE is the data type of the value (as a tree).
2843 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2844 otherwise, FUNC is 0. */
2846 ix86_function_value (tree valtype)
2850 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2851 REGPARM_MAX, SSE_REGPARM_MAX,
2852 x86_64_int_return_registers, 0);
2853 /* For zero sized structures, construct_container return NULL, but we need
2854 to keep rest of compiler happy by returning meaningful value. */
2856 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2860 return gen_rtx_REG (TYPE_MODE (valtype),
2861 ix86_value_regno (TYPE_MODE (valtype)));
2864 /* Return false iff type is returned in memory. */
2866 ix86_return_in_memory (tree type)
2868 int needed_intregs, needed_sseregs, size;
2869 enum machine_mode mode = TYPE_MODE (type);
2872 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2874 if (mode == BLKmode)
2877 size = int_size_in_bytes (type);
2879 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2882 if (VECTOR_MODE_P (mode) || mode == TImode)
2884 /* User-created vectors small enough to fit in EAX. */
2888 /* MMX/3dNow values are returned on the stack, since we've
2889 got to EMMS/FEMMS before returning. */
2893 /* SSE values are returned in XMM0. */
2894 /* ??? Except when it doesn't exist? We have a choice of
2895 either (1) being abi incompatible with a -march switch,
2896 or (2) generating an error here. Given no good solution,
2897 I think the safest thing is one warning. The user won't
2898 be able to use -Werror, but.... */
2909 warning ("SSE vector return without SSE enabled "
2924 /* Define how to find the value returned by a library function
2925 assuming the value has mode MODE. */
2927 ix86_libcall_value (enum machine_mode mode)
2937 return gen_rtx_REG (mode, FIRST_SSE_REG);
2940 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2945 return gen_rtx_REG (mode, 0);
2949 return gen_rtx_REG (mode, ix86_value_regno (mode));
2952 /* Given a mode, return the register to use for a return value. */
2955 ix86_value_regno (enum machine_mode mode)
2957 /* Floating point return values in %st(0). */
2958 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2959 return FIRST_FLOAT_REG;
2960 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2961 we prevent this case when sse is not available. */
2962 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2963 return FIRST_SSE_REG;
2964 /* Everything else in %eax. */
2968 /* Create the va_list data type. */
2971 ix86_build_builtin_va_list (void)
2973 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2975 /* For i386 we use plain pointer to argument area. */
2977 return build_pointer_type (char_type_node);
2979 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2980 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2982 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2983 unsigned_type_node);
2984 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2985 unsigned_type_node);
2986 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2988 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2991 DECL_FIELD_CONTEXT (f_gpr) = record;
2992 DECL_FIELD_CONTEXT (f_fpr) = record;
2993 DECL_FIELD_CONTEXT (f_ovf) = record;
2994 DECL_FIELD_CONTEXT (f_sav) = record;
2996 TREE_CHAIN (record) = type_decl;
2997 TYPE_NAME (record) = type_decl;
2998 TYPE_FIELDS (record) = f_gpr;
2999 TREE_CHAIN (f_gpr) = f_fpr;
3000 TREE_CHAIN (f_fpr) = f_ovf;
3001 TREE_CHAIN (f_ovf) = f_sav;
3003 layout_type (record);
3005 /* The correct type is an array type of one element. */
3006 return build_array_type (record, build_index_type (size_zero_node));
3009 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3012 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3013 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3016 CUMULATIVE_ARGS next_cum;
3017 rtx save_area = NULL_RTX, mem;
3030 /* Indicate to allocate space on the stack for varargs save area. */
3031 ix86_save_varrargs_registers = 1;
3033 cfun->stack_alignment_needed = 128;
3035 fntype = TREE_TYPE (current_function_decl);
3036 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3037 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3038 != void_type_node));
3040 /* For varargs, we do not want to skip the dummy va_dcl argument.
3041 For stdargs, we do want to skip the last named argument. */
3044 function_arg_advance (&next_cum, mode, type, 1);
3047 save_area = frame_pointer_rtx;
3049 set = get_varargs_alias_set ();
3051 for (i = next_cum.regno; i < ix86_regparm; i++)
3053 mem = gen_rtx_MEM (Pmode,
3054 plus_constant (save_area, i * UNITS_PER_WORD));
3055 set_mem_alias_set (mem, set);
3056 emit_move_insn (mem, gen_rtx_REG (Pmode,
3057 x86_64_int_parameter_registers[i]));
3060 if (next_cum.sse_nregs)
3062 /* Now emit code to save SSE registers. The AX parameter contains number
3063 of SSE parameter registers used to call this function. We use
3064 sse_prologue_save insn template that produces computed jump across
3065 SSE saves. We need some preparation work to get this working. */
3067 label = gen_label_rtx ();
3068 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3070 /* Compute address to jump to :
3071 label - 5*eax + nnamed_sse_arguments*5 */
3072 tmp_reg = gen_reg_rtx (Pmode);
3073 nsse_reg = gen_reg_rtx (Pmode);
3074 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3075 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3076 gen_rtx_MULT (Pmode, nsse_reg,
3078 if (next_cum.sse_regno)
3081 gen_rtx_CONST (DImode,
3082 gen_rtx_PLUS (DImode,
3084 GEN_INT (next_cum.sse_regno * 4))));
3086 emit_move_insn (nsse_reg, label_ref);
3087 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3089 /* Compute address of memory block we save into. We always use pointer
3090 pointing 127 bytes after first byte to store - this is needed to keep
3091 instruction size limited by 4 bytes. */
3092 tmp_reg = gen_reg_rtx (Pmode);
3093 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3094 plus_constant (save_area,
3095 8 * REGPARM_MAX + 127)));
3096 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3097 set_mem_alias_set (mem, set);
3098 set_mem_align (mem, BITS_PER_WORD);
3100 /* And finally do the dirty job! */
3101 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3102 GEN_INT (next_cum.sse_regno), label));
3107 /* Implement va_start. */
3110 ix86_va_start (tree valist, rtx nextarg)
3112 HOST_WIDE_INT words, n_gpr, n_fpr;
3113 tree f_gpr, f_fpr, f_ovf, f_sav;
3114 tree gpr, fpr, ovf, sav, t;
3116 /* Only 64bit target needs something special. */
3119 std_expand_builtin_va_start (valist, nextarg);
3123 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3124 f_fpr = TREE_CHAIN (f_gpr);
3125 f_ovf = TREE_CHAIN (f_fpr);
3126 f_sav = TREE_CHAIN (f_ovf);
3128 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3129 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3130 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3131 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3132 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3134 /* Count number of gp and fp argument registers used. */
3135 words = current_function_args_info.words;
3136 n_gpr = current_function_args_info.regno;
3137 n_fpr = current_function_args_info.sse_regno;
3139 if (TARGET_DEBUG_ARG)
3140 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3141 (int) words, (int) n_gpr, (int) n_fpr);
3143 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3144 build_int_2 (n_gpr * 8, 0));
3145 TREE_SIDE_EFFECTS (t) = 1;
3146 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3148 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3149 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3150 TREE_SIDE_EFFECTS (t) = 1;
3151 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3153 /* Find the overflow area. */
3154 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3156 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3157 build_int_2 (words * UNITS_PER_WORD, 0));
3158 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3159 TREE_SIDE_EFFECTS (t) = 1;
3160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3162 /* Find the register save area.
3163 Prologue of the function save it right above stack frame. */
3164 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3165 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3166 TREE_SIDE_EFFECTS (t) = 1;
3167 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3170 /* Implement va_arg. */
3172 ix86_va_arg (tree valist, tree type)
3174 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3175 tree f_gpr, f_fpr, f_ovf, f_sav;
3176 tree gpr, fpr, ovf, sav, t;
3178 rtx lab_false, lab_over = NULL_RTX;
3183 /* Only 64bit target needs something special. */
3186 return std_expand_builtin_va_arg (valist, type);
3189 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3190 f_fpr = TREE_CHAIN (f_gpr);
3191 f_ovf = TREE_CHAIN (f_fpr);
3192 f_sav = TREE_CHAIN (f_ovf);
3194 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3195 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3196 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3197 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3198 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3200 size = int_size_in_bytes (type);
3203 /* Passed by reference. */
3205 type = build_pointer_type (type);
3206 size = int_size_in_bytes (type);
3208 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3210 container = construct_container (TYPE_MODE (type), type, 0,
3211 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3213 * Pull the value out of the saved registers ...
3216 addr_rtx = gen_reg_rtx (Pmode);
3220 rtx int_addr_rtx, sse_addr_rtx;
3221 int needed_intregs, needed_sseregs;
3224 lab_over = gen_label_rtx ();
3225 lab_false = gen_label_rtx ();
3227 examine_argument (TYPE_MODE (type), type, 0,
3228 &needed_intregs, &needed_sseregs);
3231 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3232 || TYPE_ALIGN (type) > 128);
3234 /* In case we are passing structure, verify that it is consecutive block
3235 on the register save area. If not we need to do moves. */
3236 if (!need_temp && !REG_P (container))
3238 /* Verify that all registers are strictly consecutive */
3239 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3243 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3245 rtx slot = XVECEXP (container, 0, i);
3246 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3247 || INTVAL (XEXP (slot, 1)) != i * 16)
3255 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3257 rtx slot = XVECEXP (container, 0, i);
3258 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3259 || INTVAL (XEXP (slot, 1)) != i * 8)
3266 int_addr_rtx = addr_rtx;
3267 sse_addr_rtx = addr_rtx;
3271 int_addr_rtx = gen_reg_rtx (Pmode);
3272 sse_addr_rtx = gen_reg_rtx (Pmode);
3274 /* First ensure that we fit completely in registers. */
3277 emit_cmp_and_jump_insns (expand_expr
3278 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3279 GEN_INT ((REGPARM_MAX - needed_intregs +
3280 1) * 8), GE, const1_rtx, SImode,
3285 emit_cmp_and_jump_insns (expand_expr
3286 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3287 GEN_INT ((SSE_REGPARM_MAX -
3288 needed_sseregs + 1) * 16 +
3289 REGPARM_MAX * 8), GE, const1_rtx,
3290 SImode, 1, lab_false);
3293 /* Compute index to start of area used for integer regs. */
3296 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3297 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3298 if (r != int_addr_rtx)
3299 emit_move_insn (int_addr_rtx, r);
3303 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3304 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3305 if (r != sse_addr_rtx)
3306 emit_move_insn (sse_addr_rtx, r);
3314 /* Never use the memory itself, as it has the alias set. */
3315 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3316 mem = gen_rtx_MEM (BLKmode, x);
3317 force_operand (x, addr_rtx);
3318 set_mem_alias_set (mem, get_varargs_alias_set ());
3319 set_mem_align (mem, BITS_PER_UNIT);
3321 for (i = 0; i < XVECLEN (container, 0); i++)
3323 rtx slot = XVECEXP (container, 0, i);
3324 rtx reg = XEXP (slot, 0);
3325 enum machine_mode mode = GET_MODE (reg);
3331 if (SSE_REGNO_P (REGNO (reg)))
3333 src_addr = sse_addr_rtx;
3334 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3338 src_addr = int_addr_rtx;
3339 src_offset = REGNO (reg) * 8;
3341 src_mem = gen_rtx_MEM (mode, src_addr);
3342 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3343 src_mem = adjust_address (src_mem, mode, src_offset);
3344 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3345 emit_move_insn (dest_mem, src_mem);
3352 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3353 build_int_2 (needed_intregs * 8, 0));
3354 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3355 TREE_SIDE_EFFECTS (t) = 1;
3356 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3361 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3362 build_int_2 (needed_sseregs * 16, 0));
3363 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3364 TREE_SIDE_EFFECTS (t) = 1;
3365 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3368 emit_jump_insn (gen_jump (lab_over));
3370 emit_label (lab_false);
3373 /* ... otherwise out of the overflow area. */
3375 /* Care for on-stack alignment if needed. */
3376 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3380 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3381 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3382 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3386 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3388 emit_move_insn (addr_rtx, r);
3391 build (PLUS_EXPR, TREE_TYPE (t), t,
3392 build_int_2 (rsize * UNITS_PER_WORD, 0));
3393 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3394 TREE_SIDE_EFFECTS (t) = 1;
3395 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3398 emit_label (lab_over);
3402 r = gen_rtx_MEM (Pmode, addr_rtx);
3403 set_mem_alias_set (r, get_varargs_alias_set ());
3404 emit_move_insn (addr_rtx, r);
3410 /* Return nonzero if OP is either a i387 or SSE fp register. */
3412 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3414 return ANY_FP_REG_P (op);
3417 /* Return nonzero if OP is an i387 fp register. */
3419 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3421 return FP_REG_P (op);
3424 /* Return nonzero if OP is a non-fp register_operand. */
3426 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3428 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3431 /* Return nonzero if OP is a register operand other than an
3432 i387 fp register. */
3434 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3436 return register_operand (op, mode) && !FP_REG_P (op);
3439 /* Return nonzero if OP is general operand representable on x86_64. */
3442 x86_64_general_operand (rtx op, enum machine_mode mode)
3445 return general_operand (op, mode);
3446 if (nonimmediate_operand (op, mode))
3448 return x86_64_sign_extended_value (op);
3451 /* Return nonzero if OP is general operand representable on x86_64
3452 as either sign extended or zero extended constant. */
3455 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3458 return general_operand (op, mode);
3459 if (nonimmediate_operand (op, mode))
3461 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3464 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3467 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3470 return nonmemory_operand (op, mode);
3471 if (register_operand (op, mode))
3473 return x86_64_sign_extended_value (op);
3476 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3479 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3481 if (!TARGET_64BIT || !flag_pic)
3482 return nonmemory_operand (op, mode);
3483 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3485 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3490 /* Return nonzero if OPNUM's MEM should be matched
3491 in movabs* patterns. */
3494 ix86_check_movabs (rtx insn, int opnum)
3498 set = PATTERN (insn);
3499 if (GET_CODE (set) == PARALLEL)
3500 set = XVECEXP (set, 0, 0);
3501 if (GET_CODE (set) != SET)
3503 mem = XEXP (set, opnum);
3504 while (GET_CODE (mem) == SUBREG)
3505 mem = SUBREG_REG (mem);
3506 if (GET_CODE (mem) != MEM)
3508 return (volatile_ok || !MEM_VOLATILE_P (mem));
3511 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3514 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3517 return nonmemory_operand (op, mode);
3518 if (register_operand (op, mode))
3520 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3523 /* Return nonzero if OP is immediate operand representable on x86_64. */
3526 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3529 return immediate_operand (op, mode);
3530 return x86_64_sign_extended_value (op);
3533 /* Return nonzero if OP is immediate operand representable on x86_64. */
3536 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3538 return x86_64_zero_extended_value (op);
3541 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3542 for shift & compare patterns, as shifting by 0 does not change flags),
3543 else return zero. */
3546 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3548 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3551 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3552 reference and a constant. */
3555 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3557 switch (GET_CODE (op))
3565 if (GET_CODE (op) == SYMBOL_REF
3566 || GET_CODE (op) == LABEL_REF
3567 || (GET_CODE (op) == UNSPEC
3568 && (XINT (op, 1) == UNSPEC_GOT
3569 || XINT (op, 1) == UNSPEC_GOTOFF
3570 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3572 if (GET_CODE (op) != PLUS
3573 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3577 if (GET_CODE (op) == SYMBOL_REF
3578 || GET_CODE (op) == LABEL_REF)
3580 /* Only @GOTOFF gets offsets. */
3581 if (GET_CODE (op) != UNSPEC
3582 || XINT (op, 1) != UNSPEC_GOTOFF)
3585 op = XVECEXP (op, 0, 0);
3586 if (GET_CODE (op) == SYMBOL_REF
3587 || GET_CODE (op) == LABEL_REF)
3596 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3599 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3601 if (GET_CODE (op) != CONST)
3606 if (GET_CODE (op) == UNSPEC
3607 && XINT (op, 1) == UNSPEC_GOTPCREL)
3609 if (GET_CODE (op) == PLUS
3610 && GET_CODE (XEXP (op, 0)) == UNSPEC
3611 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3616 if (GET_CODE (op) == UNSPEC)
3618 if (GET_CODE (op) != PLUS
3619 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3622 if (GET_CODE (op) == UNSPEC)
3628 /* Return true if OP is a symbolic operand that resolves locally. */
3631 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3633 if (GET_CODE (op) == CONST
3634 && GET_CODE (XEXP (op, 0)) == PLUS
3635 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3636 op = XEXP (XEXP (op, 0), 0);
3638 if (GET_CODE (op) == LABEL_REF)
3641 if (GET_CODE (op) != SYMBOL_REF)
3644 if (SYMBOL_REF_LOCAL_P (op))
3647 /* There is, however, a not insubstantial body of code in the rest of
3648 the compiler that assumes it can just stick the results of
3649 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3650 /* ??? This is a hack. Should update the body of the compiler to
3651 always create a DECL an invoke targetm.encode_section_info. */
3652 if (strncmp (XSTR (op, 0), internal_label_prefix,
3653 internal_label_prefix_len) == 0)
3659 /* Test for various thread-local symbols. */
3662 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3664 if (GET_CODE (op) != SYMBOL_REF)
3666 return SYMBOL_REF_TLS_MODEL (op);
3670 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3672 if (GET_CODE (op) != SYMBOL_REF)
3674 return SYMBOL_REF_TLS_MODEL (op) == kind;
3678 global_dynamic_symbolic_operand (rtx op,
3679 enum machine_mode mode ATTRIBUTE_UNUSED)
3681 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3685 local_dynamic_symbolic_operand (rtx op,
3686 enum machine_mode mode ATTRIBUTE_UNUSED)
3688 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3692 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3694 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3698 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3700 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3703 /* Test for a valid operand for a call instruction. Don't allow the
3704 arg pointer register or virtual regs since they may decay into
3705 reg + const, which the patterns can't handle. */
3708 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3710 /* Disallow indirect through a virtual register. This leads to
3711 compiler aborts when trying to eliminate them. */
3712 if (GET_CODE (op) == REG
3713 && (op == arg_pointer_rtx
3714 || op == frame_pointer_rtx
3715 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3716 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3719 /* Disallow `call 1234'. Due to varying assembler lameness this
3720 gets either rejected or translated to `call .+1234'. */
3721 if (GET_CODE (op) == CONST_INT)
3724 /* Explicitly allow SYMBOL_REF even if pic. */
3725 if (GET_CODE (op) == SYMBOL_REF)
3728 /* Otherwise we can allow any general_operand in the address. */
3729 return general_operand (op, Pmode);
3732 /* Test for a valid operand for a call instruction. Don't allow the
3733 arg pointer register or virtual regs since they may decay into
3734 reg + const, which the patterns can't handle. */
3737 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3739 /* Disallow indirect through a virtual register. This leads to
3740 compiler aborts when trying to eliminate them. */
3741 if (GET_CODE (op) == REG
3742 && (op == arg_pointer_rtx
3743 || op == frame_pointer_rtx
3744 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3745 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3748 /* Explicitly allow SYMBOL_REF even if pic. */
3749 if (GET_CODE (op) == SYMBOL_REF)
3752 /* Otherwise we can only allow register operands. */
3753 return register_operand (op, Pmode);
3757 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3759 if (GET_CODE (op) == CONST
3760 && GET_CODE (XEXP (op, 0)) == PLUS
3761 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3762 op = XEXP (XEXP (op, 0), 0);
3763 return GET_CODE (op) == SYMBOL_REF;
3766 /* Match exactly zero and one. */
3769 const0_operand (rtx op, enum machine_mode mode)
3771 return op == CONST0_RTX (mode);
3775 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3777 return op == const1_rtx;
3780 /* Match 2, 4, or 8. Used for leal multiplicands. */
3783 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3785 return (GET_CODE (op) == CONST_INT
3786 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3790 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3792 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3796 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3798 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3802 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3804 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3808 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3810 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3814 /* True if this is a constant appropriate for an increment or decrement. */
3817 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3819 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3820 registers, since carry flag is not set. */
3821 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3823 return op == const1_rtx || op == constm1_rtx;
3826 /* Return nonzero if OP is acceptable as operand of DImode shift
3830 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3833 return nonimmediate_operand (op, mode);
3835 return register_operand (op, mode);
3838 /* Return false if this is the stack pointer, or any other fake
3839 register eliminable to the stack pointer. Otherwise, this is
3842 This is used to prevent esp from being used as an index reg.
3843 Which would only happen in pathological cases. */
3846 reg_no_sp_operand (rtx op, enum machine_mode mode)
3849 if (GET_CODE (t) == SUBREG)
3851 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3854 return register_operand (op, mode);
3858 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3860 return MMX_REG_P (op);
3863 /* Return false if this is any eliminable register. Otherwise
3867 general_no_elim_operand (rtx op, enum machine_mode mode)
3870 if (GET_CODE (t) == SUBREG)
3872 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3873 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3874 || t == virtual_stack_dynamic_rtx)
3877 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3878 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3881 return general_operand (op, mode);
3884 /* Return false if this is any eliminable register. Otherwise
3885 register_operand or const_int. */
3888 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3891 if (GET_CODE (t) == SUBREG)
3893 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3894 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3895 || t == virtual_stack_dynamic_rtx)
3898 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3901 /* Return false if this is any eliminable register or stack register,
3902 otherwise work like register_operand. */
3905 index_register_operand (rtx op, enum machine_mode mode)
3908 if (GET_CODE (t) == SUBREG)
3912 if (t == arg_pointer_rtx
3913 || t == frame_pointer_rtx
3914 || t == virtual_incoming_args_rtx
3915 || t == virtual_stack_vars_rtx
3916 || t == virtual_stack_dynamic_rtx
3917 || REGNO (t) == STACK_POINTER_REGNUM)
3920 return general_operand (op, mode);
3923 /* Return true if op is a Q_REGS class register. */
3926 q_regs_operand (rtx op, enum machine_mode mode)
3928 if (mode != VOIDmode && GET_MODE (op) != mode)
3930 if (GET_CODE (op) == SUBREG)
3931 op = SUBREG_REG (op);
3932 return ANY_QI_REG_P (op);
3935 /* Return true if op is an flags register. */
3938 flags_reg_operand (rtx op, enum machine_mode mode)
3940 if (mode != VOIDmode && GET_MODE (op) != mode)
3942 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3945 /* Return true if op is a NON_Q_REGS class register. */
3948 non_q_regs_operand (rtx op, enum machine_mode mode)
3950 if (mode != VOIDmode && GET_MODE (op) != mode)
3952 if (GET_CODE (op) == SUBREG)
3953 op = SUBREG_REG (op);
3954 return NON_QI_REG_P (op);
3958 zero_extended_scalar_load_operand (rtx op,
3959 enum machine_mode mode ATTRIBUTE_UNUSED)
3962 if (GET_CODE (op) != MEM)
3964 op = maybe_get_pool_constant (op);
3967 if (GET_CODE (op) != CONST_VECTOR)
3970 (GET_MODE_SIZE (GET_MODE (op)) /
3971 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3972 for (n_elts--; n_elts > 0; n_elts--)
3974 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3975 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3981 /* Return 1 when OP is operand acceptable for standard SSE move. */
3983 vector_move_operand (rtx op, enum machine_mode mode)
3985 if (nonimmediate_operand (op, mode))
3987 if (GET_MODE (op) != mode && mode != VOIDmode)
3989 return (op == CONST0_RTX (GET_MODE (op)));
3992 /* Return true if op if a valid address, and does not contain
3993 a segment override. */
3996 no_seg_address_operand (rtx op, enum machine_mode mode)
3998 struct ix86_address parts;
4000 if (! address_operand (op, mode))
4003 if (! ix86_decompose_address (op, &parts))
4006 return parts.seg == SEG_DEFAULT;
4009 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4012 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4014 enum rtx_code code = GET_CODE (op);
4017 /* Operations supported directly. */
4027 /* These are equivalent to ones above in non-IEEE comparisons. */
4034 return !TARGET_IEEE_FP;
4039 /* Return 1 if OP is a valid comparison operator in valid mode. */
4041 ix86_comparison_operator (rtx op, enum machine_mode mode)
4043 enum machine_mode inmode;
4044 enum rtx_code code = GET_CODE (op);
4045 if (mode != VOIDmode && GET_MODE (op) != mode)
4047 if (!COMPARISON_P (op))
4049 inmode = GET_MODE (XEXP (op, 0));
4051 if (inmode == CCFPmode || inmode == CCFPUmode)
4053 enum rtx_code second_code, bypass_code;
4054 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4055 return (bypass_code == NIL && second_code == NIL);
4062 if (inmode == CCmode || inmode == CCGCmode
4063 || inmode == CCGOCmode || inmode == CCNOmode)
4066 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4067 if (inmode == CCmode)
4071 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4079 /* Return 1 if OP is a valid comparison operator testing carry flag
4082 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4084 enum machine_mode inmode;
4085 enum rtx_code code = GET_CODE (op);
4087 if (mode != VOIDmode && GET_MODE (op) != mode)
4089 if (!COMPARISON_P (op))
4091 inmode = GET_MODE (XEXP (op, 0));
4092 if (GET_CODE (XEXP (op, 0)) != REG
4093 || REGNO (XEXP (op, 0)) != 17
4094 || XEXP (op, 1) != const0_rtx)
4097 if (inmode == CCFPmode || inmode == CCFPUmode)
4099 enum rtx_code second_code, bypass_code;
4101 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4102 if (bypass_code != NIL || second_code != NIL)
4104 code = ix86_fp_compare_code_to_integer (code);
4106 else if (inmode != CCmode)
4111 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4114 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4116 enum machine_mode inmode;
4117 enum rtx_code code = GET_CODE (op);
4119 if (mode != VOIDmode && GET_MODE (op) != mode)
4121 if (!COMPARISON_P (op))
4123 inmode = GET_MODE (XEXP (op, 0));
4124 if (inmode == CCFPmode || inmode == CCFPUmode)
4126 enum rtx_code second_code, bypass_code;
4128 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4129 if (bypass_code != NIL || second_code != NIL)
4131 code = ix86_fp_compare_code_to_integer (code);
4133 /* i387 supports just limited amount of conditional codes. */
4136 case LTU: case GTU: case LEU: case GEU:
4137 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4140 case ORDERED: case UNORDERED:
4148 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4151 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4153 switch (GET_CODE (op))
4156 /* Modern CPUs have same latency for HImode and SImode multiply,
4157 but 386 and 486 do HImode multiply faster. */
4158 return ix86_tune > PROCESSOR_I486;
4170 /* Nearly general operand, but accept any const_double, since we wish
4171 to be able to drop them into memory rather than have them get pulled
4175 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4177 if (mode != VOIDmode && mode != GET_MODE (op))
4179 if (GET_CODE (op) == CONST_DOUBLE)
4181 return general_operand (op, mode);
4184 /* Match an SI or HImode register for a zero_extract. */
4187 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4190 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4191 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4194 if (!register_operand (op, VOIDmode))
4197 /* Be careful to accept only registers having upper parts. */
4198 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4199 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4202 /* Return 1 if this is a valid binary floating-point operation.
4203 OP is the expression matched, and MODE is its mode. */
4206 binary_fp_operator (rtx op, enum machine_mode mode)
4208 if (mode != VOIDmode && mode != GET_MODE (op))
4211 switch (GET_CODE (op))
4217 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4225 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4227 return GET_CODE (op) == MULT;
4231 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4233 return GET_CODE (op) == DIV;
4237 arith_or_logical_operator (rtx op, enum machine_mode mode)
4239 return ((mode == VOIDmode || GET_MODE (op) == mode)
4240 && ARITHMETIC_P (op));
4243 /* Returns 1 if OP is memory operand with a displacement. */
4246 memory_displacement_operand (rtx op, enum machine_mode mode)
4248 struct ix86_address parts;
4250 if (! memory_operand (op, mode))
4253 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4256 return parts.disp != NULL_RTX;
4259 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4260 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4262 ??? It seems likely that this will only work because cmpsi is an
4263 expander, and no actual insns use this. */
4266 cmpsi_operand (rtx op, enum machine_mode mode)
4268 if (nonimmediate_operand (op, mode))
4271 if (GET_CODE (op) == AND
4272 && GET_MODE (op) == SImode
4273 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4274 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4275 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4276 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4277 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4278 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4284 /* Returns 1 if OP is memory operand that can not be represented by the
4288 long_memory_operand (rtx op, enum machine_mode mode)
4290 if (! memory_operand (op, mode))
4293 return memory_address_length (op) != 0;
4296 /* Return nonzero if the rtx is known aligned. */
4299 aligned_operand (rtx op, enum machine_mode mode)
4301 struct ix86_address parts;
4303 if (!general_operand (op, mode))
4306 /* Registers and immediate operands are always "aligned". */
4307 if (GET_CODE (op) != MEM)
4310 /* Don't even try to do any aligned optimizations with volatiles. */
4311 if (MEM_VOLATILE_P (op))
4316 /* Pushes and pops are only valid on the stack pointer. */
4317 if (GET_CODE (op) == PRE_DEC
4318 || GET_CODE (op) == POST_INC)
4321 /* Decode the address. */
4322 if (! ix86_decompose_address (op, &parts))
4325 /* Look for some component that isn't known to be aligned. */
4329 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4334 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4339 if (GET_CODE (parts.disp) != CONST_INT
4340 || (INTVAL (parts.disp) & 3) != 0)
4344 /* Didn't find one -- this must be an aligned address. */
4348 /* Initialize the table of extra 80387 mathematical constants. */
4351 init_ext_80387_constants (void)
4353 static const char * cst[5] =
4355 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4356 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4357 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4358 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4359 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4363 for (i = 0; i < 5; i++)
4365 real_from_string (&ext_80387_constants_table[i], cst[i]);
4366 /* Ensure each constant is rounded to XFmode precision. */
4367 real_convert (&ext_80387_constants_table[i],
4368 XFmode, &ext_80387_constants_table[i]);
4371 ext_80387_constants_init = 1;
4374 /* Return true if the constant is something that can be loaded with
4375 a special instruction. */
4378 standard_80387_constant_p (rtx x)
4380 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4383 if (x == CONST0_RTX (GET_MODE (x)))
4385 if (x == CONST1_RTX (GET_MODE (x)))
4388 /* For XFmode constants, try to find a special 80387 instruction when
4389 optimizing for size or on those CPUs that benefit from them. */
4390 if (GET_MODE (x) == XFmode
4391 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4396 if (! ext_80387_constants_init)
4397 init_ext_80387_constants ();
4399 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4400 for (i = 0; i < 5; i++)
4401 if (real_identical (&r, &ext_80387_constants_table[i]))
4408 /* Return the opcode of the special instruction to be used to load
4412 standard_80387_constant_opcode (rtx x)
4414 switch (standard_80387_constant_p (x))
4434 /* Return the CONST_DOUBLE representing the 80387 constant that is
4435 loaded by the specified special instruction. The argument IDX
4436 matches the return value from standard_80387_constant_p. */
4439 standard_80387_constant_rtx (int idx)
4443 if (! ext_80387_constants_init)
4444 init_ext_80387_constants ();
4460 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4464 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4467 standard_sse_constant_p (rtx x)
4469 if (x == const0_rtx)
4471 return (x == CONST0_RTX (GET_MODE (x)));
4474 /* Returns 1 if OP contains a symbol reference */
4477 symbolic_reference_mentioned_p (rtx op)
4482 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4485 fmt = GET_RTX_FORMAT (GET_CODE (op));
4486 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4492 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4493 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4497 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4504 /* Return 1 if it is appropriate to emit `ret' instructions in the
4505 body of a function. Do this only if the epilogue is simple, needing a
4506 couple of insns. Prior to reloading, we can't tell how many registers
4507 must be saved, so return 0 then. Return 0 if there is no frame
4508 marker to de-allocate.
4510 If NON_SAVING_SETJMP is defined and true, then it is not possible
4511 for the epilogue to be simple, so return 0. This is a special case
4512 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4513 until final, but jump_optimize may need to know sooner if a
4517 ix86_can_use_return_insn_p (void)
4519 struct ix86_frame frame;
4521 #ifdef NON_SAVING_SETJMP
4522 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4526 if (! reload_completed || frame_pointer_needed)
4529 /* Don't allow more than 32 pop, since that's all we can do
4530 with one instruction. */
4531 if (current_function_pops_args
4532 && current_function_args_size >= 32768)
4535 ix86_compute_frame_layout (&frame);
4536 return frame.to_allocate == 0 && frame.nregs == 0;
4539 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4541 x86_64_sign_extended_value (rtx value)
4543 switch (GET_CODE (value))
4545 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4546 to be at least 32 and this all acceptable constants are
4547 represented as CONST_INT. */
4549 if (HOST_BITS_PER_WIDE_INT == 32)
4553 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4554 return trunc_int_for_mode (val, SImode) == val;
4558 /* For certain code models, the symbolic references are known to fit.
4559 in CM_SMALL_PIC model we know it fits if it is local to the shared
4560 library. Don't count TLS SYMBOL_REFs here, since they should fit
4561 only if inside of UNSPEC handled below. */
4563 /* TLS symbols are not constant. */
4564 if (tls_symbolic_operand (value, Pmode))
4566 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4568 /* For certain code models, the code is near as well. */
4570 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4571 || ix86_cmodel == CM_KERNEL);
4573 /* We also may accept the offsetted memory references in certain special
4576 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4577 switch (XINT (XEXP (value, 0), 1))
4579 case UNSPEC_GOTPCREL:
4581 case UNSPEC_GOTNTPOFF:
4587 if (GET_CODE (XEXP (value, 0)) == PLUS)
4589 rtx op1 = XEXP (XEXP (value, 0), 0);
4590 rtx op2 = XEXP (XEXP (value, 0), 1);
4591 HOST_WIDE_INT offset;
4593 if (ix86_cmodel == CM_LARGE)
4595 if (GET_CODE (op2) != CONST_INT)
4597 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4598 switch (GET_CODE (op1))
4601 /* For CM_SMALL assume that latest object is 16MB before
4602 end of 31bits boundary. We may also accept pretty
4603 large negative constants knowing that all objects are
4604 in the positive half of address space. */
4605 if (ix86_cmodel == CM_SMALL
4606 && offset < 16*1024*1024
4607 && trunc_int_for_mode (offset, SImode) == offset)
4609 /* For CM_KERNEL we know that all object resist in the
4610 negative half of 32bits address space. We may not
4611 accept negative offsets, since they may be just off
4612 and we may accept pretty large positive ones. */
4613 if (ix86_cmodel == CM_KERNEL
4615 && trunc_int_for_mode (offset, SImode) == offset)
4619 /* These conditions are similar to SYMBOL_REF ones, just the
4620 constraints for code models differ. */
4621 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4622 && offset < 16*1024*1024
4623 && trunc_int_for_mode (offset, SImode) == offset)
4625 if (ix86_cmodel == CM_KERNEL
4627 && trunc_int_for_mode (offset, SImode) == offset)
4631 switch (XINT (op1, 1))
4636 && trunc_int_for_mode (offset, SImode) == offset)
4650 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4652 x86_64_zero_extended_value (rtx value)
4654 switch (GET_CODE (value))
4657 if (HOST_BITS_PER_WIDE_INT == 32)
4658 return (GET_MODE (value) == VOIDmode
4659 && !CONST_DOUBLE_HIGH (value));
4663 if (HOST_BITS_PER_WIDE_INT == 32)
4664 return INTVAL (value) >= 0;
4666 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4669 /* For certain code models, the symbolic references are known to fit. */
4671 /* TLS symbols are not constant. */
4672 if (tls_symbolic_operand (value, Pmode))
4674 return ix86_cmodel == CM_SMALL;
4676 /* For certain code models, the code is near as well. */
4678 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4680 /* We also may accept the offsetted memory references in certain special
4683 if (GET_CODE (XEXP (value, 0)) == PLUS)
4685 rtx op1 = XEXP (XEXP (value, 0), 0);
4686 rtx op2 = XEXP (XEXP (value, 0), 1);
4688 if (ix86_cmodel == CM_LARGE)
4690 switch (GET_CODE (op1))
4694 /* For small code model we may accept pretty large positive
4695 offsets, since one bit is available for free. Negative
4696 offsets are limited by the size of NULL pointer area
4697 specified by the ABI. */
4698 if (ix86_cmodel == CM_SMALL
4699 && GET_CODE (op2) == CONST_INT
4700 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4701 && (trunc_int_for_mode (INTVAL (op2), SImode)
4704 /* ??? For the kernel, we may accept adjustment of
4705 -0x10000000, since we know that it will just convert
4706 negative address space to positive, but perhaps this
4707 is not worthwhile. */
4710 /* These conditions are similar to SYMBOL_REF ones, just the
4711 constraints for code models differ. */
4712 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4713 && GET_CODE (op2) == CONST_INT
4714 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4715 && (trunc_int_for_mode (INTVAL (op2), SImode)
4729 /* Value should be nonzero if functions must have frame pointers.
4730 Zero means the frame pointer need not be set up (and parms may
4731 be accessed via the stack pointer) in functions that seem suitable. */
4734 ix86_frame_pointer_required (void)
4736 /* If we accessed previous frames, then the generated code expects
4737 to be able to access the saved ebp value in our frame. */
4738 if (cfun->machine->accesses_prev_frame)
4741 /* Several x86 os'es need a frame pointer for other reasons,
4742 usually pertaining to setjmp. */
4743 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4746 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4747 the frame pointer by default. Turn it back on now if we've not
4748 got a leaf function. */
4749 if (TARGET_OMIT_LEAF_FRAME_POINTER
4750 && (!current_function_is_leaf))
4753 if (current_function_profile)
4759 /* Record that the current function accesses previous call frames. */
4762 ix86_setup_frame_addresses (void)
4764 cfun->machine->accesses_prev_frame = 1;
4767 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4768 # define USE_HIDDEN_LINKONCE 1
4770 # define USE_HIDDEN_LINKONCE 0
4773 static int pic_labels_used;
4775 /* Fills in the label name that should be used for a pc thunk for
4776 the given register. */
4779 get_pc_thunk_name (char name[32], unsigned int regno)
4781 if (USE_HIDDEN_LINKONCE)
4782 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4784 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4788 /* This function generates code for -fpic that loads %ebx with
4789 the return address of the caller and then returns. */
4792 ix86_file_end (void)
4797 for (regno = 0; regno < 8; ++regno)
4801 if (! ((pic_labels_used >> regno) & 1))
4804 get_pc_thunk_name (name, regno);
4806 if (USE_HIDDEN_LINKONCE)
4810 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4812 TREE_PUBLIC (decl) = 1;
4813 TREE_STATIC (decl) = 1;
4814 DECL_ONE_ONLY (decl) = 1;
4816 (*targetm.asm_out.unique_section) (decl, 0);
4817 named_section (decl, NULL, 0);
4819 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4820 fputs ("\t.hidden\t", asm_out_file);
4821 assemble_name (asm_out_file, name);
4822 fputc ('\n', asm_out_file);
4823 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4828 ASM_OUTPUT_LABEL (asm_out_file, name);
4831 xops[0] = gen_rtx_REG (SImode, regno);
4832 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4833 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4834 output_asm_insn ("ret", xops);
4837 if (NEED_INDICATE_EXEC_STACK)
4838 file_end_indicate_exec_stack ();
4841 /* Emit code for the SET_GOT patterns. */
4844 output_set_got (rtx dest)
4849 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4851 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4853 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4856 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4858 output_asm_insn ("call\t%a2", xops);
4861 /* Output the "canonical" label name ("Lxx$pb") here too. This
4862 is what will be referred to by the Mach-O PIC subsystem. */
4863 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4865 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4866 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4869 output_asm_insn ("pop{l}\t%0", xops);
4874 get_pc_thunk_name (name, REGNO (dest));
4875 pic_labels_used |= 1 << REGNO (dest);
4877 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4878 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4879 output_asm_insn ("call\t%X2", xops);
4882 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4883 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4884 else if (!TARGET_MACHO)
4885 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4890 /* Generate an "push" pattern for input ARG. */
4895 return gen_rtx_SET (VOIDmode,
4897 gen_rtx_PRE_DEC (Pmode,
4898 stack_pointer_rtx)),
4902 /* Return >= 0 if there is an unused call-clobbered register available
4903 for the entire function. */
4906 ix86_select_alt_pic_regnum (void)
4908 if (current_function_is_leaf && !current_function_profile)
4911 for (i = 2; i >= 0; --i)
4912 if (!regs_ever_live[i])
4916 return INVALID_REGNUM;
4919 /* Return 1 if we need to save REGNO. */
4921 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4923 if (pic_offset_table_rtx
4924 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4925 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4926 || current_function_profile
4927 || current_function_calls_eh_return
4928 || current_function_uses_const_pool))
4930 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4935 if (current_function_calls_eh_return && maybe_eh_return)
4940 unsigned test = EH_RETURN_DATA_REGNO (i);
4941 if (test == INVALID_REGNUM)
4948 return (regs_ever_live[regno]
4949 && !call_used_regs[regno]
4950 && !fixed_regs[regno]
4951 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4954 /* Return number of registers to be saved on the stack. */
4957 ix86_nsaved_regs (void)
4962 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4963 if (ix86_save_reg (regno, true))
4968 /* Return the offset between two registers, one to be eliminated, and the other
4969 its replacement, at the start of a routine. */
4972 ix86_initial_elimination_offset (int from, int to)
4974 struct ix86_frame frame;
4975 ix86_compute_frame_layout (&frame);
4977 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4978 return frame.hard_frame_pointer_offset;
4979 else if (from == FRAME_POINTER_REGNUM
4980 && to == HARD_FRAME_POINTER_REGNUM)
4981 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4984 if (to != STACK_POINTER_REGNUM)
4986 else if (from == ARG_POINTER_REGNUM)
4987 return frame.stack_pointer_offset;
4988 else if (from != FRAME_POINTER_REGNUM)
4991 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4995 /* Fill structure ix86_frame about frame of currently computed function. */
4998 ix86_compute_frame_layout (struct ix86_frame *frame)
5000 HOST_WIDE_INT total_size;
5001 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5002 HOST_WIDE_INT offset;
5003 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5004 HOST_WIDE_INT size = get_frame_size ();
5006 frame->nregs = ix86_nsaved_regs ();
5009 /* During reload iteration the amount of registers saved can change.
5010 Recompute the value as needed. Do not recompute when amount of registers
5011 didn't change as reload does mutiple calls to the function and does not
5012 expect the decision to change within single iteration. */
5014 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5016 int count = frame->nregs;
5018 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5019 /* The fast prologue uses move instead of push to save registers. This
5020 is significantly longer, but also executes faster as modern hardware
5021 can execute the moves in parallel, but can't do that for push/pop.
5023 Be careful about choosing what prologue to emit: When function takes
5024 many instructions to execute we may use slow version as well as in
5025 case function is known to be outside hot spot (this is known with
5026 feedback only). Weight the size of function by number of registers
5027 to save as it is cheap to use one or two push instructions but very
5028 slow to use many of them. */
5030 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5031 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5032 || (flag_branch_probabilities
5033 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5034 cfun->machine->use_fast_prologue_epilogue = false;
5036 cfun->machine->use_fast_prologue_epilogue
5037 = !expensive_function_p (count);
5039 if (TARGET_PROLOGUE_USING_MOVE
5040 && cfun->machine->use_fast_prologue_epilogue)
5041 frame->save_regs_using_mov = true;
5043 frame->save_regs_using_mov = false;
5046 /* Skip return address and saved base pointer. */
5047 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5049 frame->hard_frame_pointer_offset = offset;
5051 /* Do some sanity checking of stack_alignment_needed and
5052 preferred_alignment, since i386 port is the only using those features
5053 that may break easily. */
5055 if (size && !stack_alignment_needed)
5057 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5059 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5061 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5064 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5065 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5067 /* Register save area */
5068 offset += frame->nregs * UNITS_PER_WORD;
5071 if (ix86_save_varrargs_registers)
5073 offset += X86_64_VARARGS_SIZE;
5074 frame->va_arg_size = X86_64_VARARGS_SIZE;
5077 frame->va_arg_size = 0;
5079 /* Align start of frame for local function. */
5080 frame->padding1 = ((offset + stack_alignment_needed - 1)
5081 & -stack_alignment_needed) - offset;
5083 offset += frame->padding1;
5085 /* Frame pointer points here. */
5086 frame->frame_pointer_offset = offset;
5090 /* Add outgoing arguments area. Can be skipped if we eliminated
5091 all the function calls as dead code.
5092 Skipping is however impossible when function calls alloca. Alloca
5093 expander assumes that last current_function_outgoing_args_size
5094 of stack frame are unused. */
5095 if (ACCUMULATE_OUTGOING_ARGS
5096 && (!current_function_is_leaf || current_function_calls_alloca))
5098 offset += current_function_outgoing_args_size;
5099 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5102 frame->outgoing_arguments_size = 0;
5104 /* Align stack boundary. Only needed if we're calling another function
5106 if (!current_function_is_leaf || current_function_calls_alloca)
5107 frame->padding2 = ((offset + preferred_alignment - 1)
5108 & -preferred_alignment) - offset;
5110 frame->padding2 = 0;
5112 offset += frame->padding2;
5114 /* We've reached end of stack frame. */
5115 frame->stack_pointer_offset = offset;
5117 /* Size prologue needs to allocate. */
5118 frame->to_allocate =
5119 (size + frame->padding1 + frame->padding2
5120 + frame->outgoing_arguments_size + frame->va_arg_size);
5122 if ((!frame->to_allocate && frame->nregs <= 1)
5123 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5124 frame->save_regs_using_mov = false;
5126 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5127 && current_function_is_leaf)
5129 frame->red_zone_size = frame->to_allocate;
5130 if (frame->save_regs_using_mov)
5131 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5132 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5133 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5136 frame->red_zone_size = 0;
5137 frame->to_allocate -= frame->red_zone_size;
5138 frame->stack_pointer_offset -= frame->red_zone_size;
5140 fprintf (stderr, "nregs: %i\n", frame->nregs);
5141 fprintf (stderr, "size: %i\n", size);
5142 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5143 fprintf (stderr, "padding1: %i\n", frame->padding1);
5144 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5145 fprintf (stderr, "padding2: %i\n", frame->padding2);
5146 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5147 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5148 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5149 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5150 frame->hard_frame_pointer_offset);
5151 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5155 /* Emit code to save registers in the prologue. */
5158 ix86_emit_save_regs (void)
5163 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5164 if (ix86_save_reg (regno, true))
5166 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5167 RTX_FRAME_RELATED_P (insn) = 1;
5171 /* Emit code to save registers using MOV insns. First register
5172 is restored from POINTER + OFFSET. */
5174 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5179 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5180 if (ix86_save_reg (regno, true))
5182 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5184 gen_rtx_REG (Pmode, regno));
5185 RTX_FRAME_RELATED_P (insn) = 1;
5186 offset += UNITS_PER_WORD;
5190 /* Expand prologue or epilogue stack adjustment.
5191 The pattern exist to put a dependency on all ebp-based memory accesses.
5192 STYLE should be negative if instructions should be marked as frame related,
5193 zero if %r11 register is live and cannot be freely used and positive
5197 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5202 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5203 else if (x86_64_immediate_operand (offset, DImode))
5204 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5208 /* r11 is used by indirect sibcall return as well, set before the
5209 epilogue and used after the epilogue. ATM indirect sibcall
5210 shouldn't be used together with huge frame sizes in one
5211 function because of the frame_size check in sibcall.c. */
5214 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5215 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5217 RTX_FRAME_RELATED_P (insn) = 1;
5218 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5222 RTX_FRAME_RELATED_P (insn) = 1;
5225 /* Expand the prologue into a bunch of separate insns. */
5228 ix86_expand_prologue (void)
5232 struct ix86_frame frame;
5233 HOST_WIDE_INT allocate;
5235 ix86_compute_frame_layout (&frame);
5237 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5238 slower on all targets. Also sdb doesn't like it. */
5240 if (frame_pointer_needed)
5242 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5243 RTX_FRAME_RELATED_P (insn) = 1;
5245 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5246 RTX_FRAME_RELATED_P (insn) = 1;
5249 allocate = frame.to_allocate;
5251 if (!frame.save_regs_using_mov)
5252 ix86_emit_save_regs ();
5254 allocate += frame.nregs * UNITS_PER_WORD;
5256 /* When using red zone we may start register saving before allocating
5257 the stack frame saving one cycle of the prologue. */
5258 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5259 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5260 : stack_pointer_rtx,
5261 -frame.nregs * UNITS_PER_WORD);
5265 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5266 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5267 GEN_INT (-allocate), -1);
5270 /* Only valid for Win32. */
5271 rtx eax = gen_rtx_REG (SImode, 0);
5272 bool eax_live = ix86_eax_live_at_start_p ();
5279 emit_insn (gen_push (eax));
5283 insn = emit_move_insn (eax, GEN_INT (allocate));
5284 RTX_FRAME_RELATED_P (insn) = 1;
5286 insn = emit_insn (gen_allocate_stack_worker (eax));
5287 RTX_FRAME_RELATED_P (insn) = 1;
5291 rtx t = plus_constant (stack_pointer_rtx, allocate);
5292 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5296 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5298 if (!frame_pointer_needed || !frame.to_allocate)
5299 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5301 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5302 -frame.nregs * UNITS_PER_WORD);
5305 pic_reg_used = false;
5306 if (pic_offset_table_rtx
5307 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5308 || current_function_profile))
5310 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5312 if (alt_pic_reg_used != INVALID_REGNUM)
5313 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5315 pic_reg_used = true;
5320 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5322 /* Even with accurate pre-reload life analysis, we can wind up
5323 deleting all references to the pic register after reload.
5324 Consider if cross-jumping unifies two sides of a branch
5325 controlled by a comparison vs the only read from a global.
5326 In which case, allow the set_got to be deleted, though we're
5327 too late to do anything about the ebx save in the prologue. */
5328 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5331 /* Prevent function calls from be scheduled before the call to mcount.
5332 In the pic_reg_used case, make sure that the got load isn't deleted. */
5333 if (current_function_profile)
5334 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5337 /* Emit code to restore saved registers using MOV insns. First register
5338 is restored from POINTER + OFFSET. */
5340 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5341 int maybe_eh_return)
5344 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5346 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5347 if (ix86_save_reg (regno, maybe_eh_return))
5349 /* Ensure that adjust_address won't be forced to produce pointer
5350 out of range allowed by x86-64 instruction set. */
5351 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5355 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5356 emit_move_insn (r11, GEN_INT (offset));
5357 emit_insn (gen_adddi3 (r11, r11, pointer));
5358 base_address = gen_rtx_MEM (Pmode, r11);
5361 emit_move_insn (gen_rtx_REG (Pmode, regno),
5362 adjust_address (base_address, Pmode, offset));
5363 offset += UNITS_PER_WORD;
5367 /* Restore function stack, frame, and registers. */
5370 ix86_expand_epilogue (int style)
5373 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5374 struct ix86_frame frame;
5375 HOST_WIDE_INT offset;
5377 ix86_compute_frame_layout (&frame);
5379 /* Calculate start of saved registers relative to ebp. Special care
5380 must be taken for the normal return case of a function using
5381 eh_return: the eax and edx registers are marked as saved, but not
5382 restored along this path. */
5383 offset = frame.nregs;
5384 if (current_function_calls_eh_return && style != 2)
5386 offset *= -UNITS_PER_WORD;
5388 /* If we're only restoring one register and sp is not valid then
5389 using a move instruction to restore the register since it's
5390 less work than reloading sp and popping the register.
5392 The default code result in stack adjustment using add/lea instruction,
5393 while this code results in LEAVE instruction (or discrete equivalent),
5394 so it is profitable in some other cases as well. Especially when there
5395 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5396 and there is exactly one register to pop. This heuristic may need some
5397 tuning in future. */
5398 if ((!sp_valid && frame.nregs <= 1)
5399 || (TARGET_EPILOGUE_USING_MOVE
5400 && cfun->machine->use_fast_prologue_epilogue
5401 && (frame.nregs > 1 || frame.to_allocate))
5402 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5403 || (frame_pointer_needed && TARGET_USE_LEAVE
5404 && cfun->machine->use_fast_prologue_epilogue
5405 && frame.nregs == 1)
5406 || current_function_calls_eh_return)
5408 /* Restore registers. We can use ebp or esp to address the memory
5409 locations. If both are available, default to ebp, since offsets
5410 are known to be small. Only exception is esp pointing directly to the
5411 end of block of saved registers, where we may simplify addressing
5414 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5415 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5416 frame.to_allocate, style == 2);
5418 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5419 offset, style == 2);
5421 /* eh_return epilogues need %ecx added to the stack pointer. */
5424 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5426 if (frame_pointer_needed)
5428 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5429 tmp = plus_constant (tmp, UNITS_PER_WORD);
5430 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5432 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5433 emit_move_insn (hard_frame_pointer_rtx, tmp);
5435 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5440 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5441 tmp = plus_constant (tmp, (frame.to_allocate
5442 + frame.nregs * UNITS_PER_WORD));
5443 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5446 else if (!frame_pointer_needed)
5447 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5448 GEN_INT (frame.to_allocate
5449 + frame.nregs * UNITS_PER_WORD),
5451 /* If not an i386, mov & pop is faster than "leave". */
5452 else if (TARGET_USE_LEAVE || optimize_size
5453 || !cfun->machine->use_fast_prologue_epilogue)
5454 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5457 pro_epilogue_adjust_stack (stack_pointer_rtx,
5458 hard_frame_pointer_rtx,
5461 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5463 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5468 /* First step is to deallocate the stack frame so that we can
5469 pop the registers. */
5472 if (!frame_pointer_needed)
5474 pro_epilogue_adjust_stack (stack_pointer_rtx,
5475 hard_frame_pointer_rtx,
5476 GEN_INT (offset), style);
5478 else if (frame.to_allocate)
5479 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5480 GEN_INT (frame.to_allocate), style);
5482 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5483 if (ix86_save_reg (regno, false))
5486 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5488 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5490 if (frame_pointer_needed)
5492 /* Leave results in shorter dependency chains on CPUs that are
5493 able to grok it fast. */
5494 if (TARGET_USE_LEAVE)
5495 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5496 else if (TARGET_64BIT)
5497 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5499 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5503 /* Sibcall epilogues don't want a return instruction. */
5507 if (current_function_pops_args && current_function_args_size)
5509 rtx popc = GEN_INT (current_function_pops_args);
5511 /* i386 can only pop 64K bytes. If asked to pop more, pop
5512 return address, do explicit add, and jump indirectly to the
5515 if (current_function_pops_args >= 65536)
5517 rtx ecx = gen_rtx_REG (SImode, 2);
5519 /* There is no "pascal" calling convention in 64bit ABI. */
5523 emit_insn (gen_popsi1 (ecx));
5524 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5525 emit_jump_insn (gen_return_indirect_internal (ecx));
5528 emit_jump_insn (gen_return_pop_internal (popc));
5531 emit_jump_insn (gen_return_internal ());
5534 /* Reset from the function's potential modifications. */
5537 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5538 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5540 if (pic_offset_table_rtx)
5541 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5544 /* Extract the parts of an RTL expression that is a valid memory address
5545 for an instruction. Return 0 if the structure of the address is
5546 grossly off. Return -1 if the address contains ASHIFT, so it is not
5547 strictly valid, but still used for computing length of lea instruction. */
5550 ix86_decompose_address (rtx addr, struct ix86_address *out)
5552 rtx base = NULL_RTX;
5553 rtx index = NULL_RTX;
5554 rtx disp = NULL_RTX;
5555 HOST_WIDE_INT scale = 1;
5556 rtx scale_rtx = NULL_RTX;
5558 enum ix86_address_seg seg = SEG_DEFAULT;
5560 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5562 else if (GET_CODE (addr) == PLUS)
5572 addends[n++] = XEXP (op, 1);
5575 while (GET_CODE (op) == PLUS);
5580 for (i = n; i >= 0; --i)
5583 switch (GET_CODE (op))
5588 index = XEXP (op, 0);
5589 scale_rtx = XEXP (op, 1);
5593 if (XINT (op, 1) == UNSPEC_TP
5594 && TARGET_TLS_DIRECT_SEG_REFS
5595 && seg == SEG_DEFAULT)
5596 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5625 else if (GET_CODE (addr) == MULT)
5627 index = XEXP (addr, 0); /* index*scale */
5628 scale_rtx = XEXP (addr, 1);
5630 else if (GET_CODE (addr) == ASHIFT)
5634 /* We're called for lea too, which implements ashift on occasion. */
5635 index = XEXP (addr, 0);
5636 tmp = XEXP (addr, 1);
5637 if (GET_CODE (tmp) != CONST_INT)
5639 scale = INTVAL (tmp);
5640 if ((unsigned HOST_WIDE_INT) scale > 3)
5646 disp = addr; /* displacement */
5648 /* Extract the integral value of scale. */
5651 if (GET_CODE (scale_rtx) != CONST_INT)
5653 scale = INTVAL (scale_rtx);
5656 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5657 if (base && index && scale == 1
5658 && (index == arg_pointer_rtx
5659 || index == frame_pointer_rtx
5660 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5667 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5668 if ((base == hard_frame_pointer_rtx
5669 || base == frame_pointer_rtx
5670 || base == arg_pointer_rtx) && !disp)
5673 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5674 Avoid this by transforming to [%esi+0]. */
5675 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5676 && base && !index && !disp
5678 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5681 /* Special case: encode reg+reg instead of reg*2. */
5682 if (!base && index && scale && scale == 2)
5683 base = index, scale = 1;
5685 /* Special case: scaling cannot be encoded without base or displacement. */
5686 if (!base && !disp && index && scale != 1)
5698 /* Return cost of the memory address x.
5699 For i386, it is better to use a complex address than let gcc copy
5700 the address into a reg and make a new pseudo. But not if the address
5701 requires to two regs - that would mean more pseudos with longer
5704 ix86_address_cost (rtx x)
5706 struct ix86_address parts;
5709 if (!ix86_decompose_address (x, &parts))
5712 /* More complex memory references are better. */
5713 if (parts.disp && parts.disp != const0_rtx)
5715 if (parts.seg != SEG_DEFAULT)
5718 /* Attempt to minimize number of registers in the address. */
5720 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5722 && (!REG_P (parts.index)
5723 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5727 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5729 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5730 && parts.base != parts.index)
5733 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5734 since it's predecode logic can't detect the length of instructions
5735 and it degenerates to vector decoded. Increase cost of such
5736 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5737 to split such addresses or even refuse such addresses at all.
5739 Following addressing modes are affected:
5744 The first and last case may be avoidable by explicitly coding the zero in
5745 memory address, but I don't have AMD-K6 machine handy to check this
5749 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5750 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5751 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5757 /* If X is a machine specific address (i.e. a symbol or label being
5758 referenced as a displacement from the GOT implemented using an
5759 UNSPEC), then return the base term. Otherwise return X. */
5762 ix86_find_base_term (rtx x)
5768 if (GET_CODE (x) != CONST)
5771 if (GET_CODE (term) == PLUS
5772 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5773 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5774 term = XEXP (term, 0);
5775 if (GET_CODE (term) != UNSPEC
5776 || XINT (term, 1) != UNSPEC_GOTPCREL)
5779 term = XVECEXP (term, 0, 0);
5781 if (GET_CODE (term) != SYMBOL_REF
5782 && GET_CODE (term) != LABEL_REF)
5788 term = ix86_delegitimize_address (x);
5790 if (GET_CODE (term) != SYMBOL_REF
5791 && GET_CODE (term) != LABEL_REF)
5797 /* Determine if a given RTX is a valid constant. We already know this
5798 satisfies CONSTANT_P. */
5801 legitimate_constant_p (rtx x)
5805 switch (GET_CODE (x))
5808 /* TLS symbols are not constant. */
5809 if (tls_symbolic_operand (x, Pmode))
5814 inner = XEXP (x, 0);
5816 /* Offsets of TLS symbols are never valid.
5817 Discourage CSE from creating them. */
5818 if (GET_CODE (inner) == PLUS
5819 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5822 if (GET_CODE (inner) == PLUS)
5824 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5826 inner = XEXP (inner, 0);
5829 /* Only some unspecs are valid as "constants". */
5830 if (GET_CODE (inner) == UNSPEC)
5831 switch (XINT (inner, 1))
5835 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5837 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5847 /* Otherwise we handle everything else in the move patterns. */
5851 /* Determine if it's legal to put X into the constant pool. This
5852 is not possible for the address of thread-local symbols, which
5853 is checked above. */
5856 ix86_cannot_force_const_mem (rtx x)
5858 return !legitimate_constant_p (x);
5861 /* Determine if a given RTX is a valid constant address. */
5864 constant_address_p (rtx x)
5866 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5869 /* Nonzero if the constant value X is a legitimate general operand
5870 when generating PIC code. It is given that flag_pic is on and
5871 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5874 legitimate_pic_operand_p (rtx x)
5878 switch (GET_CODE (x))
5881 inner = XEXP (x, 0);
5883 /* Only some unspecs are valid as "constants". */
5884 if (GET_CODE (inner) == UNSPEC)
5885 switch (XINT (inner, 1))
5888 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5896 return legitimate_pic_address_disp_p (x);
5903 /* Determine if a given CONST RTX is a valid memory displacement
5907 legitimate_pic_address_disp_p (rtx disp)
5911 /* In 64bit mode we can allow direct addresses of symbols and labels
5912 when they are not dynamic symbols. */
5915 /* TLS references should always be enclosed in UNSPEC. */
5916 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5918 if (GET_CODE (disp) == SYMBOL_REF
5919 && ix86_cmodel == CM_SMALL_PIC
5920 && SYMBOL_REF_LOCAL_P (disp))
5922 if (GET_CODE (disp) == LABEL_REF)
5924 if (GET_CODE (disp) == CONST
5925 && GET_CODE (XEXP (disp, 0)) == PLUS)
5927 rtx op0 = XEXP (XEXP (disp, 0), 0);
5928 rtx op1 = XEXP (XEXP (disp, 0), 1);
5930 /* TLS references should always be enclosed in UNSPEC. */
5931 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5933 if (((GET_CODE (op0) == SYMBOL_REF
5934 && ix86_cmodel == CM_SMALL_PIC
5935 && SYMBOL_REF_LOCAL_P (op0))
5936 || GET_CODE (op0) == LABEL_REF)
5937 && GET_CODE (op1) == CONST_INT
5938 && INTVAL (op1) < 16*1024*1024
5939 && INTVAL (op1) >= -16*1024*1024)
5943 if (GET_CODE (disp) != CONST)
5945 disp = XEXP (disp, 0);
5949 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5950 of GOT tables. We should not need these anyway. */
5951 if (GET_CODE (disp) != UNSPEC
5952 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5955 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5956 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5962 if (GET_CODE (disp) == PLUS)
5964 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5966 disp = XEXP (disp, 0);
5970 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5971 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5973 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5974 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5975 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5977 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5978 if (! strcmp (sym_name, "<pic base>"))
5983 if (GET_CODE (disp) != UNSPEC)
5986 switch (XINT (disp, 1))
5991 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5993 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5994 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5995 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5997 case UNSPEC_GOTTPOFF:
5998 case UNSPEC_GOTNTPOFF:
5999 case UNSPEC_INDNTPOFF:
6002 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6004 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6006 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6012 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6013 memory address for an instruction. The MODE argument is the machine mode
6014 for the MEM expression that wants to use this address.
6016 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6017 convert common non-canonical forms to canonical form so that they will
6021 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6023 struct ix86_address parts;
6024 rtx base, index, disp;
6025 HOST_WIDE_INT scale;
6026 const char *reason = NULL;
6027 rtx reason_rtx = NULL_RTX;
6029 if (TARGET_DEBUG_ADDR)
6032 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6033 GET_MODE_NAME (mode), strict);
6037 if (ix86_decompose_address (addr, &parts) <= 0)
6039 reason = "decomposition failed";
6044 index = parts.index;
6046 scale = parts.scale;
6048 /* Validate base register.
6050 Don't allow SUBREG's here, it can lead to spill failures when the base
6051 is one word out of a two word structure, which is represented internally
6058 if (GET_CODE (base) != REG)
6060 reason = "base is not a register";
6064 if (GET_MODE (base) != Pmode)
6066 reason = "base is not in Pmode";
6070 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6071 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6073 reason = "base is not valid";
6078 /* Validate index register.
6080 Don't allow SUBREG's here, it can lead to spill failures when the index
6081 is one word out of a two word structure, which is represented internally
6088 if (GET_CODE (index) != REG)
6090 reason = "index is not a register";
6094 if (GET_MODE (index) != Pmode)
6096 reason = "index is not in Pmode";
6100 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6101 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6103 reason = "index is not valid";
6108 /* Validate scale factor. */
6111 reason_rtx = GEN_INT (scale);
6114 reason = "scale without index";
6118 if (scale != 2 && scale != 4 && scale != 8)
6120 reason = "scale is not a valid multiplier";
6125 /* Validate displacement. */
6130 if (GET_CODE (disp) == CONST
6131 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6132 switch (XINT (XEXP (disp, 0), 1))
6136 case UNSPEC_GOTPCREL:
6139 goto is_legitimate_pic;
6141 case UNSPEC_GOTTPOFF:
6142 case UNSPEC_GOTNTPOFF:
6143 case UNSPEC_INDNTPOFF:
6149 reason = "invalid address unspec";
6153 else if (flag_pic && (SYMBOLIC_CONST (disp)
6155 && !machopic_operand_p (disp)
6160 if (TARGET_64BIT && (index || base))
6162 /* foo@dtpoff(%rX) is ok. */
6163 if (GET_CODE (disp) != CONST
6164 || GET_CODE (XEXP (disp, 0)) != PLUS
6165 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6166 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6167 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6168 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6170 reason = "non-constant pic memory reference";
6174 else if (! legitimate_pic_address_disp_p (disp))
6176 reason = "displacement is an invalid pic construct";
6180 /* This code used to verify that a symbolic pic displacement
6181 includes the pic_offset_table_rtx register.
6183 While this is good idea, unfortunately these constructs may
6184 be created by "adds using lea" optimization for incorrect
6193 This code is nonsensical, but results in addressing
6194 GOT table with pic_offset_table_rtx base. We can't
6195 just refuse it easily, since it gets matched by
6196 "addsi3" pattern, that later gets split to lea in the
6197 case output register differs from input. While this
6198 can be handled by separate addsi pattern for this case
6199 that never results in lea, this seems to be easier and
6200 correct fix for crash to disable this test. */
6202 else if (GET_CODE (disp) != LABEL_REF
6203 && GET_CODE (disp) != CONST_INT
6204 && (GET_CODE (disp) != CONST
6205 || !legitimate_constant_p (disp))
6206 && (GET_CODE (disp) != SYMBOL_REF
6207 || !legitimate_constant_p (disp)))
6209 reason = "displacement is not constant";
6212 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6214 reason = "displacement is out of range";
6219 /* Everything looks valid. */
6220 if (TARGET_DEBUG_ADDR)
6221 fprintf (stderr, "Success.\n");
6225 if (TARGET_DEBUG_ADDR)
6227 fprintf (stderr, "Error: %s\n", reason);
6228 debug_rtx (reason_rtx);
6233 /* Return an unique alias set for the GOT. */
6235 static HOST_WIDE_INT
6236 ix86_GOT_alias_set (void)
6238 static HOST_WIDE_INT set = -1;
6240 set = new_alias_set ();
6244 /* Return a legitimate reference for ORIG (an address) using the
6245 register REG. If REG is 0, a new pseudo is generated.
6247 There are two types of references that must be handled:
6249 1. Global data references must load the address from the GOT, via
6250 the PIC reg. An insn is emitted to do this load, and the reg is
6253 2. Static data references, constant pool addresses, and code labels
6254 compute the address as an offset from the GOT, whose base is in
6255 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6256 differentiate them from global data objects. The returned
6257 address is the PIC reg + an unspec constant.
6259 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6260 reg also appears in the address. */
6263 legitimize_pic_address (rtx orig, rtx reg)
6271 reg = gen_reg_rtx (Pmode);
6272 /* Use the generic Mach-O PIC machinery. */
6273 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6276 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6278 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6280 /* This symbol may be referenced via a displacement from the PIC
6281 base address (@GOTOFF). */
6283 if (reload_in_progress)
6284 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6285 if (GET_CODE (addr) == CONST)
6286 addr = XEXP (addr, 0);
6287 if (GET_CODE (addr) == PLUS)
6289 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6290 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6293 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6294 new = gen_rtx_CONST (Pmode, new);
6295 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6299 emit_move_insn (reg, new);
6303 else if (GET_CODE (addr) == SYMBOL_REF)
6307 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6308 new = gen_rtx_CONST (Pmode, new);
6309 new = gen_rtx_MEM (Pmode, new);
6310 RTX_UNCHANGING_P (new) = 1;
6311 set_mem_alias_set (new, ix86_GOT_alias_set ());
6314 reg = gen_reg_rtx (Pmode);
6315 /* Use directly gen_movsi, otherwise the address is loaded
6316 into register for CSE. We don't want to CSE this addresses,
6317 instead we CSE addresses from the GOT table, so skip this. */
6318 emit_insn (gen_movsi (reg, new));
6323 /* This symbol must be referenced via a load from the
6324 Global Offset Table (@GOT). */
6326 if (reload_in_progress)
6327 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6328 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6329 new = gen_rtx_CONST (Pmode, new);
6330 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6331 new = gen_rtx_MEM (Pmode, new);
6332 RTX_UNCHANGING_P (new) = 1;
6333 set_mem_alias_set (new, ix86_GOT_alias_set ());
6336 reg = gen_reg_rtx (Pmode);
6337 emit_move_insn (reg, new);
6343 if (GET_CODE (addr) == CONST)
6345 addr = XEXP (addr, 0);
6347 /* We must match stuff we generate before. Assume the only
6348 unspecs that can get here are ours. Not that we could do
6349 anything with them anyway.... */
6350 if (GET_CODE (addr) == UNSPEC
6351 || (GET_CODE (addr) == PLUS
6352 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6354 if (GET_CODE (addr) != PLUS)
6357 if (GET_CODE (addr) == PLUS)
6359 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6361 /* Check first to see if this is a constant offset from a @GOTOFF
6362 symbol reference. */
6363 if (local_symbolic_operand (op0, Pmode)
6364 && GET_CODE (op1) == CONST_INT)
6368 if (reload_in_progress)
6369 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6370 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6372 new = gen_rtx_PLUS (Pmode, new, op1);
6373 new = gen_rtx_CONST (Pmode, new);
6374 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6378 emit_move_insn (reg, new);
6384 if (INTVAL (op1) < -16*1024*1024
6385 || INTVAL (op1) >= 16*1024*1024)
6386 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6391 base = legitimize_pic_address (XEXP (addr, 0), reg);
6392 new = legitimize_pic_address (XEXP (addr, 1),
6393 base == reg ? NULL_RTX : reg);
6395 if (GET_CODE (new) == CONST_INT)
6396 new = plus_constant (base, INTVAL (new));
6399 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6401 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6402 new = XEXP (new, 1);
6404 new = gen_rtx_PLUS (Pmode, base, new);
6412 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6415 get_thread_pointer (int to_reg)
6419 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6423 reg = gen_reg_rtx (Pmode);
6424 insn = gen_rtx_SET (VOIDmode, reg, tp);
6425 insn = emit_insn (insn);
6430 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6431 false if we expect this to be used for a memory address and true if
6432 we expect to load the address into a register. */
6435 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6437 rtx dest, base, off, pic;
6442 case TLS_MODEL_GLOBAL_DYNAMIC:
6443 dest = gen_reg_rtx (Pmode);
6446 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6449 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6450 insns = get_insns ();
6453 emit_libcall_block (insns, dest, rax, x);
6456 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6459 case TLS_MODEL_LOCAL_DYNAMIC:
6460 base = gen_reg_rtx (Pmode);
6463 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6466 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6467 insns = get_insns ();
6470 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6471 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6472 emit_libcall_block (insns, base, rax, note);
6475 emit_insn (gen_tls_local_dynamic_base_32 (base));
6477 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6478 off = gen_rtx_CONST (Pmode, off);
6480 return gen_rtx_PLUS (Pmode, base, off);
6482 case TLS_MODEL_INITIAL_EXEC:
6486 type = UNSPEC_GOTNTPOFF;
6490 if (reload_in_progress)
6491 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6492 pic = pic_offset_table_rtx;
6493 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6495 else if (!TARGET_GNU_TLS)
6497 pic = gen_reg_rtx (Pmode);
6498 emit_insn (gen_set_got (pic));
6499 type = UNSPEC_GOTTPOFF;
6504 type = UNSPEC_INDNTPOFF;
6507 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6508 off = gen_rtx_CONST (Pmode, off);
6510 off = gen_rtx_PLUS (Pmode, pic, off);
6511 off = gen_rtx_MEM (Pmode, off);
6512 RTX_UNCHANGING_P (off) = 1;
6513 set_mem_alias_set (off, ix86_GOT_alias_set ());
6515 if (TARGET_64BIT || TARGET_GNU_TLS)
6517 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6518 off = force_reg (Pmode, off);
6519 return gen_rtx_PLUS (Pmode, base, off);
6523 base = get_thread_pointer (true);
6524 dest = gen_reg_rtx (Pmode);
6525 emit_insn (gen_subsi3 (dest, base, off));
6529 case TLS_MODEL_LOCAL_EXEC:
6530 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6531 (TARGET_64BIT || TARGET_GNU_TLS)
6532 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6533 off = gen_rtx_CONST (Pmode, off);
6535 if (TARGET_64BIT || TARGET_GNU_TLS)
6537 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6538 return gen_rtx_PLUS (Pmode, base, off);
6542 base = get_thread_pointer (true);
6543 dest = gen_reg_rtx (Pmode);
6544 emit_insn (gen_subsi3 (dest, base, off));
6555 /* Try machine-dependent ways of modifying an illegitimate address
6556 to be legitimate. If we find one, return the new, valid address.
6557 This macro is used in only one place: `memory_address' in explow.c.
6559 OLDX is the address as it was before break_out_memory_refs was called.
6560 In some cases it is useful to look at this to decide what needs to be done.
6562 MODE and WIN are passed so that this macro can use
6563 GO_IF_LEGITIMATE_ADDRESS.
6565 It is always safe for this macro to do nothing. It exists to recognize
6566 opportunities to optimize the output.
6568 For the 80386, we handle X+REG by loading X into a register R and
6569 using R+REG. R will go in a general reg and indexing will be used.
6570 However, if REG is a broken-out memory address or multiplication,
6571 nothing needs to be done because REG can certainly go in a general reg.
6573 When -fpic is used, special handling is needed for symbolic references.
6574 See comments by legitimize_pic_address in i386.c for details. */
6577 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6582 if (TARGET_DEBUG_ADDR)
6584 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6585 GET_MODE_NAME (mode));
6589 log = tls_symbolic_operand (x, mode);
6591 return legitimize_tls_address (x, log, false);
6593 if (flag_pic && SYMBOLIC_CONST (x))
6594 return legitimize_pic_address (x, 0);
6596 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6597 if (GET_CODE (x) == ASHIFT
6598 && GET_CODE (XEXP (x, 1)) == CONST_INT
6599 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6602 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6603 GEN_INT (1 << log));
6606 if (GET_CODE (x) == PLUS)
6608 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6610 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6611 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6612 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6615 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6616 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6617 GEN_INT (1 << log));
6620 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6621 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6622 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6625 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6626 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6627 GEN_INT (1 << log));
6630 /* Put multiply first if it isn't already. */
6631 if (GET_CODE (XEXP (x, 1)) == MULT)
6633 rtx tmp = XEXP (x, 0);
6634 XEXP (x, 0) = XEXP (x, 1);
6639 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6640 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6641 created by virtual register instantiation, register elimination, and
6642 similar optimizations. */
6643 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6646 x = gen_rtx_PLUS (Pmode,
6647 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6648 XEXP (XEXP (x, 1), 0)),
6649 XEXP (XEXP (x, 1), 1));
6653 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6654 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6655 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6656 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6657 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6658 && CONSTANT_P (XEXP (x, 1)))
6661 rtx other = NULL_RTX;
6663 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6665 constant = XEXP (x, 1);
6666 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6668 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6670 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6671 other = XEXP (x, 1);
6679 x = gen_rtx_PLUS (Pmode,
6680 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6681 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6682 plus_constant (other, INTVAL (constant)));
6686 if (changed && legitimate_address_p (mode, x, FALSE))
6689 if (GET_CODE (XEXP (x, 0)) == MULT)
6692 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6695 if (GET_CODE (XEXP (x, 1)) == MULT)
6698 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6702 && GET_CODE (XEXP (x, 1)) == REG
6703 && GET_CODE (XEXP (x, 0)) == REG)
6706 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6709 x = legitimize_pic_address (x, 0);
6712 if (changed && legitimate_address_p (mode, x, FALSE))
6715 if (GET_CODE (XEXP (x, 0)) == REG)
6717 rtx temp = gen_reg_rtx (Pmode);
6718 rtx val = force_operand (XEXP (x, 1), temp);
6720 emit_move_insn (temp, val);
6726 else if (GET_CODE (XEXP (x, 1)) == REG)
6728 rtx temp = gen_reg_rtx (Pmode);
6729 rtx val = force_operand (XEXP (x, 0), temp);
6731 emit_move_insn (temp, val);
6741 /* Print an integer constant expression in assembler syntax. Addition
6742 and subtraction are the only arithmetic that may appear in these
6743 expressions. FILE is the stdio stream to write to, X is the rtx, and
6744 CODE is the operand print code from the output string. */
6747 output_pic_addr_const (FILE *file, rtx x, int code)
6751 switch (GET_CODE (x))
6761 assemble_name (file, XSTR (x, 0));
6762 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6763 fputs ("@PLT", file);
6770 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6771 assemble_name (asm_out_file, buf);
6775 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6779 /* This used to output parentheses around the expression,
6780 but that does not work on the 386 (either ATT or BSD assembler). */
6781 output_pic_addr_const (file, XEXP (x, 0), code);
6785 if (GET_MODE (x) == VOIDmode)
6787 /* We can use %d if the number is <32 bits and positive. */
6788 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6789 fprintf (file, "0x%lx%08lx",
6790 (unsigned long) CONST_DOUBLE_HIGH (x),
6791 (unsigned long) CONST_DOUBLE_LOW (x));
6793 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6796 /* We can't handle floating point constants;
6797 PRINT_OPERAND must handle them. */
6798 output_operand_lossage ("floating constant misused");
6802 /* Some assemblers need integer constants to appear first. */
6803 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6805 output_pic_addr_const (file, XEXP (x, 0), code);
6807 output_pic_addr_const (file, XEXP (x, 1), code);
6809 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6811 output_pic_addr_const (file, XEXP (x, 1), code);
6813 output_pic_addr_const (file, XEXP (x, 0), code);
6821 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6822 output_pic_addr_const (file, XEXP (x, 0), code);
6824 output_pic_addr_const (file, XEXP (x, 1), code);
6826 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6830 if (XVECLEN (x, 0) != 1)
6832 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6833 switch (XINT (x, 1))
6836 fputs ("@GOT", file);
6839 fputs ("@GOTOFF", file);
6841 case UNSPEC_GOTPCREL:
6842 fputs ("@GOTPCREL(%rip)", file);
6844 case UNSPEC_GOTTPOFF:
6845 /* FIXME: This might be @TPOFF in Sun ld too. */
6846 fputs ("@GOTTPOFF", file);
6849 fputs ("@TPOFF", file);
6853 fputs ("@TPOFF", file);
6855 fputs ("@NTPOFF", file);
6858 fputs ("@DTPOFF", file);
6860 case UNSPEC_GOTNTPOFF:
6862 fputs ("@GOTTPOFF(%rip)", file);
6864 fputs ("@GOTNTPOFF", file);
6866 case UNSPEC_INDNTPOFF:
6867 fputs ("@INDNTPOFF", file);
6870 output_operand_lossage ("invalid UNSPEC as operand");
6876 output_operand_lossage ("invalid expression as operand");
6880 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6881 We need to handle our special PIC relocations. */
6884 i386_dwarf_output_addr_const (FILE *file, rtx x)
6887 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6891 fprintf (file, "%s", ASM_LONG);
6894 output_pic_addr_const (file, x, '\0');
6896 output_addr_const (file, x);
6900 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6901 We need to emit DTP-relative relocations. */
6904 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6906 fputs (ASM_LONG, file);
6907 output_addr_const (file, x);
6908 fputs ("@DTPOFF", file);
6914 fputs (", 0", file);
6921 /* In the name of slightly smaller debug output, and to cater to
6922 general assembler losage, recognize PIC+GOTOFF and turn it back
6923 into a direct symbol reference. */
6926 ix86_delegitimize_address (rtx orig_x)
6930 if (GET_CODE (x) == MEM)
6935 if (GET_CODE (x) != CONST
6936 || GET_CODE (XEXP (x, 0)) != UNSPEC
6937 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6938 || GET_CODE (orig_x) != MEM)
6940 return XVECEXP (XEXP (x, 0), 0, 0);
6943 if (GET_CODE (x) != PLUS
6944 || GET_CODE (XEXP (x, 1)) != CONST)
6947 if (GET_CODE (XEXP (x, 0)) == REG
6948 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6949 /* %ebx + GOT/GOTOFF */
6951 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6953 /* %ebx + %reg * scale + GOT/GOTOFF */
6955 if (GET_CODE (XEXP (y, 0)) == REG
6956 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6958 else if (GET_CODE (XEXP (y, 1)) == REG
6959 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6963 if (GET_CODE (y) != REG
6964 && GET_CODE (y) != MULT
6965 && GET_CODE (y) != ASHIFT)
6971 x = XEXP (XEXP (x, 1), 0);
6972 if (GET_CODE (x) == UNSPEC
6973 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6974 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6977 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6978 return XVECEXP (x, 0, 0);
6981 if (GET_CODE (x) == PLUS
6982 && GET_CODE (XEXP (x, 0)) == UNSPEC
6983 && GET_CODE (XEXP (x, 1)) == CONST_INT
6984 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6985 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6986 && GET_CODE (orig_x) != MEM)))
6988 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6990 return gen_rtx_PLUS (Pmode, y, x);
6998 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7003 if (mode == CCFPmode || mode == CCFPUmode)
7005 enum rtx_code second_code, bypass_code;
7006 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7007 if (bypass_code != NIL || second_code != NIL)
7009 code = ix86_fp_compare_code_to_integer (code);
7013 code = reverse_condition (code);
7024 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7029 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7030 Those same assemblers have the same but opposite losage on cmov. */
7033 suffix = fp ? "nbe" : "a";
7036 if (mode == CCNOmode || mode == CCGOCmode)
7038 else if (mode == CCmode || mode == CCGCmode)
7049 if (mode == CCNOmode || mode == CCGOCmode)
7051 else if (mode == CCmode || mode == CCGCmode)
7060 suffix = fp ? "nb" : "ae";
7063 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7073 suffix = fp ? "u" : "p";
7076 suffix = fp ? "nu" : "np";
7081 fputs (suffix, file);
7084 /* Print the name of register X to FILE based on its machine mode and number.
7085 If CODE is 'w', pretend the mode is HImode.
7086 If CODE is 'b', pretend the mode is QImode.
7087 If CODE is 'k', pretend the mode is SImode.
7088 If CODE is 'q', pretend the mode is DImode.
7089 If CODE is 'h', pretend the reg is the `high' byte register.
7090 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7093 print_reg (rtx x, int code, FILE *file)
7095 if (REGNO (x) == ARG_POINTER_REGNUM
7096 || REGNO (x) == FRAME_POINTER_REGNUM
7097 || REGNO (x) == FLAGS_REG
7098 || REGNO (x) == FPSR_REG)
7101 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7104 if (code == 'w' || MMX_REG_P (x))
7106 else if (code == 'b')
7108 else if (code == 'k')
7110 else if (code == 'q')
7112 else if (code == 'y')
7114 else if (code == 'h')
7117 code = GET_MODE_SIZE (GET_MODE (x));
7119 /* Irritatingly, AMD extended registers use different naming convention
7120 from the normal registers. */
7121 if (REX_INT_REG_P (x))
7128 error ("extended registers have no high halves");
7131 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7134 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7137 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7140 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7143 error ("unsupported operand size for extended register");
7151 if (STACK_TOP_P (x))
7153 fputs ("st(0)", file);
7160 if (! ANY_FP_REG_P (x))
7161 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7166 fputs (hi_reg_name[REGNO (x)], file);
7169 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7171 fputs (qi_reg_name[REGNO (x)], file);
7174 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7176 fputs (qi_high_reg_name[REGNO (x)], file);
7183 /* Locate some local-dynamic symbol still in use by this function
7184 so that we can print its name in some tls_local_dynamic_base
7188 get_some_local_dynamic_name (void)
7192 if (cfun->machine->some_ld_name)
7193 return cfun->machine->some_ld_name;
7195 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7197 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7198 return cfun->machine->some_ld_name;
7204 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7208 if (GET_CODE (x) == SYMBOL_REF
7209 && local_dynamic_symbolic_operand (x, Pmode))
7211 cfun->machine->some_ld_name = XSTR (x, 0);
7219 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7220 C -- print opcode suffix for set/cmov insn.
7221 c -- like C, but print reversed condition
7222 F,f -- likewise, but for floating-point.
7223 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7225 R -- print the prefix for register names.
7226 z -- print the opcode suffix for the size of the current operand.
7227 * -- print a star (in certain assembler syntax)
7228 A -- print an absolute memory reference.
7229 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7230 s -- print a shift double count, followed by the assemblers argument
7232 b -- print the QImode name of the register for the indicated operand.
7233 %b0 would print %al if operands[0] is reg 0.
7234 w -- likewise, print the HImode name of the register.
7235 k -- likewise, print the SImode name of the register.
7236 q -- likewise, print the DImode name of the register.
7237 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7238 y -- print "st(0)" instead of "st" as a register.
7239 D -- print condition for SSE cmp instruction.
7240 P -- if PIC, print an @PLT suffix.
7241 X -- don't print any sort of PIC '@' suffix for a symbol.
7242 & -- print some in-use local-dynamic symbol name.
7246 print_operand (FILE *file, rtx x, int code)
7253 if (ASSEMBLER_DIALECT == ASM_ATT)
7258 assemble_name (file, get_some_local_dynamic_name ());
7262 if (ASSEMBLER_DIALECT == ASM_ATT)
7264 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7266 /* Intel syntax. For absolute addresses, registers should not
7267 be surrounded by braces. */
7268 if (GET_CODE (x) != REG)
7271 PRINT_OPERAND (file, x, 0);
7279 PRINT_OPERAND (file, x, 0);
7284 if (ASSEMBLER_DIALECT == ASM_ATT)
7289 if (ASSEMBLER_DIALECT == ASM_ATT)
7294 if (ASSEMBLER_DIALECT == ASM_ATT)
7299 if (ASSEMBLER_DIALECT == ASM_ATT)
7304 if (ASSEMBLER_DIALECT == ASM_ATT)
7309 if (ASSEMBLER_DIALECT == ASM_ATT)
7314 /* 387 opcodes don't get size suffixes if the operands are
7316 if (STACK_REG_P (x))
7319 /* Likewise if using Intel opcodes. */
7320 if (ASSEMBLER_DIALECT == ASM_INTEL)
7323 /* This is the size of op from size of operand. */
7324 switch (GET_MODE_SIZE (GET_MODE (x)))
7327 #ifdef HAVE_GAS_FILDS_FISTS
7333 if (GET_MODE (x) == SFmode)
7348 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7350 #ifdef GAS_MNEMONICS
7376 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7378 PRINT_OPERAND (file, x, 0);
7384 /* Little bit of braindamage here. The SSE compare instructions
7385 does use completely different names for the comparisons that the
7386 fp conditional moves. */
7387 switch (GET_CODE (x))
7402 fputs ("unord", file);
7406 fputs ("neq", file);
7410 fputs ("nlt", file);
7414 fputs ("nle", file);
7417 fputs ("ord", file);
7425 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7426 if (ASSEMBLER_DIALECT == ASM_ATT)
7428 switch (GET_MODE (x))
7430 case HImode: putc ('w', file); break;
7432 case SFmode: putc ('l', file); break;
7434 case DFmode: putc ('q', file); break;
7442 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7445 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7446 if (ASSEMBLER_DIALECT == ASM_ATT)
7449 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7452 /* Like above, but reverse condition */
7454 /* Check to see if argument to %c is really a constant
7455 and not a condition code which needs to be reversed. */
7456 if (!COMPARISON_P (x))
7458 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7461 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7464 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7465 if (ASSEMBLER_DIALECT == ASM_ATT)
7468 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7474 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7477 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7480 int pred_val = INTVAL (XEXP (x, 0));
7482 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7483 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7485 int taken = pred_val > REG_BR_PROB_BASE / 2;
7486 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7488 /* Emit hints only in the case default branch prediction
7489 heuristics would fail. */
7490 if (taken != cputaken)
7492 /* We use 3e (DS) prefix for taken branches and
7493 2e (CS) prefix for not taken branches. */
7495 fputs ("ds ; ", file);
7497 fputs ("cs ; ", file);
7504 output_operand_lossage ("invalid operand code `%c'", code);
7508 if (GET_CODE (x) == REG)
7509 print_reg (x, code, file);
7511 else if (GET_CODE (x) == MEM)
7513 /* No `byte ptr' prefix for call instructions. */
7514 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7517 switch (GET_MODE_SIZE (GET_MODE (x)))
7519 case 1: size = "BYTE"; break;
7520 case 2: size = "WORD"; break;
7521 case 4: size = "DWORD"; break;
7522 case 8: size = "QWORD"; break;
7523 case 12: size = "XWORD"; break;
7524 case 16: size = "XMMWORD"; break;
7529 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7532 else if (code == 'w')
7534 else if (code == 'k')
7538 fputs (" PTR ", file);
7542 /* Avoid (%rip) for call operands. */
7543 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7544 && GET_CODE (x) != CONST_INT)
7545 output_addr_const (file, x);
7546 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7547 output_operand_lossage ("invalid constraints for operand");
7552 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7557 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7558 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7560 if (ASSEMBLER_DIALECT == ASM_ATT)
7562 fprintf (file, "0x%08lx", l);
7565 /* These float cases don't actually occur as immediate operands. */
7566 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7570 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7571 fprintf (file, "%s", dstr);
7574 else if (GET_CODE (x) == CONST_DOUBLE
7575 && GET_MODE (x) == XFmode)
7579 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7580 fprintf (file, "%s", dstr);
7587 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7589 if (ASSEMBLER_DIALECT == ASM_ATT)
7592 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7593 || GET_CODE (x) == LABEL_REF)
7595 if (ASSEMBLER_DIALECT == ASM_ATT)
7598 fputs ("OFFSET FLAT:", file);
7601 if (GET_CODE (x) == CONST_INT)
7602 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7604 output_pic_addr_const (file, x, code);
7606 output_addr_const (file, x);
7610 /* Print a memory operand whose address is ADDR. */
7613 print_operand_address (FILE *file, rtx addr)
7615 struct ix86_address parts;
7616 rtx base, index, disp;
7619 if (! ix86_decompose_address (addr, &parts))
7623 index = parts.index;
7625 scale = parts.scale;
7633 if (USER_LABEL_PREFIX[0] == 0)
7635 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7641 if (!base && !index)
7643 /* Displacement only requires special attention. */
7645 if (GET_CODE (disp) == CONST_INT)
7647 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7649 if (USER_LABEL_PREFIX[0] == 0)
7651 fputs ("ds:", file);
7653 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7656 output_pic_addr_const (file, disp, 0);
7658 output_addr_const (file, disp);
7660 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7662 && ((GET_CODE (disp) == SYMBOL_REF
7663 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7664 || GET_CODE (disp) == LABEL_REF
7665 || (GET_CODE (disp) == CONST
7666 && GET_CODE (XEXP (disp, 0)) == PLUS
7667 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7668 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7669 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7670 fputs ("(%rip)", file);
7674 if (ASSEMBLER_DIALECT == ASM_ATT)
7679 output_pic_addr_const (file, disp, 0);
7680 else if (GET_CODE (disp) == LABEL_REF)
7681 output_asm_label (disp);
7683 output_addr_const (file, disp);
7688 print_reg (base, 0, file);
7692 print_reg (index, 0, file);
7694 fprintf (file, ",%d", scale);
7700 rtx offset = NULL_RTX;
7704 /* Pull out the offset of a symbol; print any symbol itself. */
7705 if (GET_CODE (disp) == CONST
7706 && GET_CODE (XEXP (disp, 0)) == PLUS
7707 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7709 offset = XEXP (XEXP (disp, 0), 1);
7710 disp = gen_rtx_CONST (VOIDmode,
7711 XEXP (XEXP (disp, 0), 0));
7715 output_pic_addr_const (file, disp, 0);
7716 else if (GET_CODE (disp) == LABEL_REF)
7717 output_asm_label (disp);
7718 else if (GET_CODE (disp) == CONST_INT)
7721 output_addr_const (file, disp);
7727 print_reg (base, 0, file);
7730 if (INTVAL (offset) >= 0)
7732 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7736 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7743 print_reg (index, 0, file);
7745 fprintf (file, "*%d", scale);
7753 output_addr_const_extra (FILE *file, rtx x)
7757 if (GET_CODE (x) != UNSPEC)
7760 op = XVECEXP (x, 0, 0);
7761 switch (XINT (x, 1))
7763 case UNSPEC_GOTTPOFF:
7764 output_addr_const (file, op);
7765 /* FIXME: This might be @TPOFF in Sun ld. */
7766 fputs ("@GOTTPOFF", file);
7769 output_addr_const (file, op);
7770 fputs ("@TPOFF", file);
7773 output_addr_const (file, op);
7775 fputs ("@TPOFF", file);
7777 fputs ("@NTPOFF", file);
7780 output_addr_const (file, op);
7781 fputs ("@DTPOFF", file);
7783 case UNSPEC_GOTNTPOFF:
7784 output_addr_const (file, op);
7786 fputs ("@GOTTPOFF(%rip)", file);
7788 fputs ("@GOTNTPOFF", file);
7790 case UNSPEC_INDNTPOFF:
7791 output_addr_const (file, op);
7792 fputs ("@INDNTPOFF", file);
7802 /* Split one or more DImode RTL references into pairs of SImode
7803 references. The RTL can be REG, offsettable MEM, integer constant, or
7804 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7805 split and "num" is its length. lo_half and hi_half are output arrays
7806 that parallel "operands". */
7809 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7813 rtx op = operands[num];
7815 /* simplify_subreg refuse to split volatile memory addresses,
7816 but we still have to handle it. */
7817 if (GET_CODE (op) == MEM)
7819 lo_half[num] = adjust_address (op, SImode, 0);
7820 hi_half[num] = adjust_address (op, SImode, 4);
7824 lo_half[num] = simplify_gen_subreg (SImode, op,
7825 GET_MODE (op) == VOIDmode
7826 ? DImode : GET_MODE (op), 0);
7827 hi_half[num] = simplify_gen_subreg (SImode, op,
7828 GET_MODE (op) == VOIDmode
7829 ? DImode : GET_MODE (op), 4);
7833 /* Split one or more TImode RTL references into pairs of SImode
7834 references. The RTL can be REG, offsettable MEM, integer constant, or
7835 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7836 split and "num" is its length. lo_half and hi_half are output arrays
7837 that parallel "operands". */
7840 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7844 rtx op = operands[num];
7846 /* simplify_subreg refuse to split volatile memory addresses, but we
7847 still have to handle it. */
7848 if (GET_CODE (op) == MEM)
7850 lo_half[num] = adjust_address (op, DImode, 0);
7851 hi_half[num] = adjust_address (op, DImode, 8);
7855 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7856 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7861 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7862 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7863 is the expression of the binary operation. The output may either be
7864 emitted here, or returned to the caller, like all output_* functions.
7866 There is no guarantee that the operands are the same mode, as they
7867 might be within FLOAT or FLOAT_EXTEND expressions. */
7869 #ifndef SYSV386_COMPAT
7870 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7871 wants to fix the assemblers because that causes incompatibility
7872 with gcc. No-one wants to fix gcc because that causes
7873 incompatibility with assemblers... You can use the option of
7874 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7875 #define SYSV386_COMPAT 1
7879 output_387_binary_op (rtx insn, rtx *operands)
7881 static char buf[30];
7884 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7886 #ifdef ENABLE_CHECKING
7887 /* Even if we do not want to check the inputs, this documents input
7888 constraints. Which helps in understanding the following code. */
7889 if (STACK_REG_P (operands[0])
7890 && ((REG_P (operands[1])
7891 && REGNO (operands[0]) == REGNO (operands[1])
7892 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7893 || (REG_P (operands[2])
7894 && REGNO (operands[0]) == REGNO (operands[2])
7895 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7896 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7902 switch (GET_CODE (operands[3]))
7905 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7906 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7914 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7915 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7947 if (GET_MODE (operands[0]) == SFmode)
7948 strcat (buf, "ss\t{%2, %0|%0, %2}");
7950 strcat (buf, "sd\t{%2, %0|%0, %2}");
7955 switch (GET_CODE (operands[3]))
7959 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7961 rtx temp = operands[2];
7962 operands[2] = operands[1];
7966 /* know operands[0] == operands[1]. */
7968 if (GET_CODE (operands[2]) == MEM)
7974 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7976 if (STACK_TOP_P (operands[0]))
7977 /* How is it that we are storing to a dead operand[2]?
7978 Well, presumably operands[1] is dead too. We can't
7979 store the result to st(0) as st(0) gets popped on this
7980 instruction. Instead store to operands[2] (which I
7981 think has to be st(1)). st(1) will be popped later.
7982 gcc <= 2.8.1 didn't have this check and generated
7983 assembly code that the Unixware assembler rejected. */
7984 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7986 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7990 if (STACK_TOP_P (operands[0]))
7991 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7993 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7998 if (GET_CODE (operands[1]) == MEM)
8004 if (GET_CODE (operands[2]) == MEM)
8010 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8013 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8014 derived assemblers, confusingly reverse the direction of
8015 the operation for fsub{r} and fdiv{r} when the
8016 destination register is not st(0). The Intel assembler
8017 doesn't have this brain damage. Read !SYSV386_COMPAT to
8018 figure out what the hardware really does. */
8019 if (STACK_TOP_P (operands[0]))
8020 p = "{p\t%0, %2|rp\t%2, %0}";
8022 p = "{rp\t%2, %0|p\t%0, %2}";
8024 if (STACK_TOP_P (operands[0]))
8025 /* As above for fmul/fadd, we can't store to st(0). */
8026 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8028 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8033 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8036 if (STACK_TOP_P (operands[0]))
8037 p = "{rp\t%0, %1|p\t%1, %0}";
8039 p = "{p\t%1, %0|rp\t%0, %1}";
8041 if (STACK_TOP_P (operands[0]))
8042 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8044 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8049 if (STACK_TOP_P (operands[0]))
8051 if (STACK_TOP_P (operands[1]))
8052 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8054 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8057 else if (STACK_TOP_P (operands[1]))
8060 p = "{\t%1, %0|r\t%0, %1}";
8062 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8068 p = "{r\t%2, %0|\t%0, %2}";
8070 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8083 /* Output code to initialize control word copies used by
8084 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8085 is set to control word rounding downwards. */
8087 emit_i387_cw_initialization (rtx normal, rtx round_down)
8089 rtx reg = gen_reg_rtx (HImode);
8091 emit_insn (gen_x86_fnstcw_1 (normal));
8092 emit_move_insn (reg, normal);
8093 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8095 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8097 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8098 emit_move_insn (round_down, reg);
8101 /* Output code for INSN to convert a float to a signed int. OPERANDS
8102 are the insn operands. The output may be [HSD]Imode and the input
8103 operand may be [SDX]Fmode. */
8106 output_fix_trunc (rtx insn, rtx *operands)
8108 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8109 int dimode_p = GET_MODE (operands[0]) == DImode;
8111 /* Jump through a hoop or two for DImode, since the hardware has no
8112 non-popping instruction. We used to do this a different way, but
8113 that was somewhat fragile and broke with post-reload splitters. */
8114 if (dimode_p && !stack_top_dies)
8115 output_asm_insn ("fld\t%y1", operands);
8117 if (!STACK_TOP_P (operands[1]))
8120 if (GET_CODE (operands[0]) != MEM)
8123 output_asm_insn ("fldcw\t%3", operands);
8124 if (stack_top_dies || dimode_p)
8125 output_asm_insn ("fistp%z0\t%0", operands);
8127 output_asm_insn ("fist%z0\t%0", operands);
8128 output_asm_insn ("fldcw\t%2", operands);
8133 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8134 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8135 when fucom should be used. */
8138 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8141 rtx cmp_op0 = operands[0];
8142 rtx cmp_op1 = operands[1];
8143 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8148 cmp_op1 = operands[2];
8152 if (GET_MODE (operands[0]) == SFmode)
8154 return "ucomiss\t{%1, %0|%0, %1}";
8156 return "comiss\t{%1, %0|%0, %1}";
8159 return "ucomisd\t{%1, %0|%0, %1}";
8161 return "comisd\t{%1, %0|%0, %1}";
8164 if (! STACK_TOP_P (cmp_op0))
8167 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8169 if (STACK_REG_P (cmp_op1)
8171 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8172 && REGNO (cmp_op1) != FIRST_STACK_REG)
8174 /* If both the top of the 387 stack dies, and the other operand
8175 is also a stack register that dies, then this must be a
8176 `fcompp' float compare */
8180 /* There is no double popping fcomi variant. Fortunately,
8181 eflags is immune from the fstp's cc clobbering. */
8183 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8185 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8193 return "fucompp\n\tfnstsw\t%0";
8195 return "fcompp\n\tfnstsw\t%0";
8208 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8210 static const char * const alt[24] =
8222 "fcomi\t{%y1, %0|%0, %y1}",
8223 "fcomip\t{%y1, %0|%0, %y1}",
8224 "fucomi\t{%y1, %0|%0, %y1}",
8225 "fucomip\t{%y1, %0|%0, %y1}",
8232 "fcom%z2\t%y2\n\tfnstsw\t%0",
8233 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8234 "fucom%z2\t%y2\n\tfnstsw\t%0",
8235 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8237 "ficom%z2\t%y2\n\tfnstsw\t%0",
8238 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8246 mask = eflags_p << 3;
8247 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8248 mask |= unordered_p << 1;
8249 mask |= stack_top_dies;
8262 ix86_output_addr_vec_elt (FILE *file, int value)
8264 const char *directive = ASM_LONG;
8269 directive = ASM_QUAD;
8275 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8279 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8282 fprintf (file, "%s%s%d-%s%d\n",
8283 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8284 else if (HAVE_AS_GOTOFF_IN_DATA)
8285 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8287 else if (TARGET_MACHO)
8289 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8290 machopic_output_function_base_name (file);
8291 fprintf(file, "\n");
8295 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8296 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8299 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8303 ix86_expand_clear (rtx dest)
8307 /* We play register width games, which are only valid after reload. */
8308 if (!reload_completed)
8311 /* Avoid HImode and its attendant prefix byte. */
8312 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8313 dest = gen_rtx_REG (SImode, REGNO (dest));
8315 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8317 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8318 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8320 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8321 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8327 /* X is an unchanging MEM. If it is a constant pool reference, return
8328 the constant pool rtx, else NULL. */
8331 maybe_get_pool_constant (rtx x)
8333 x = ix86_delegitimize_address (XEXP (x, 0));
8335 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8336 return get_pool_constant (x);
8342 ix86_expand_move (enum machine_mode mode, rtx operands[])
8344 int strict = (reload_in_progress || reload_completed);
8346 enum tls_model model;
8351 model = tls_symbolic_operand (op1, Pmode);
8354 op1 = legitimize_tls_address (op1, model, true);
8355 op1 = force_operand (op1, op0);
8360 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8365 rtx temp = ((reload_in_progress
8366 || ((op0 && GET_CODE (op0) == REG)
8368 ? op0 : gen_reg_rtx (Pmode));
8369 op1 = machopic_indirect_data_reference (op1, temp);
8370 op1 = machopic_legitimize_pic_address (op1, mode,
8371 temp == op1 ? 0 : temp);
8373 else if (MACHOPIC_INDIRECT)
8374 op1 = machopic_indirect_data_reference (op1, 0);
8378 if (GET_CODE (op0) == MEM)
8379 op1 = force_reg (Pmode, op1);
8383 if (GET_CODE (temp) != REG)
8384 temp = gen_reg_rtx (Pmode);
8385 temp = legitimize_pic_address (op1, temp);
8390 #endif /* TARGET_MACHO */
8394 if (GET_CODE (op0) == MEM
8395 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8396 || !push_operand (op0, mode))
8397 && GET_CODE (op1) == MEM)
8398 op1 = force_reg (mode, op1);
8400 if (push_operand (op0, mode)
8401 && ! general_no_elim_operand (op1, mode))
8402 op1 = copy_to_mode_reg (mode, op1);
8404 /* Force large constants in 64bit compilation into register
8405 to get them CSEed. */
8406 if (TARGET_64BIT && mode == DImode
8407 && immediate_operand (op1, mode)
8408 && !x86_64_zero_extended_value (op1)
8409 && !register_operand (op0, mode)
8410 && optimize && !reload_completed && !reload_in_progress)
8411 op1 = copy_to_mode_reg (mode, op1);
8413 if (FLOAT_MODE_P (mode))
8415 /* If we are loading a floating point constant to a register,
8416 force the value to memory now, since we'll get better code
8417 out the back end. */
8421 else if (GET_CODE (op1) == CONST_DOUBLE)
8423 op1 = validize_mem (force_const_mem (mode, op1));
8424 if (!register_operand (op0, mode))
8426 rtx temp = gen_reg_rtx (mode);
8427 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8428 emit_move_insn (op0, temp);
8435 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8439 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8441 /* Force constants other than zero into memory. We do not know how
8442 the instructions used to build constants modify the upper 64 bits
8443 of the register, once we have that information we may be able
8444 to handle some of them more efficiently. */
8445 if ((reload_in_progress | reload_completed) == 0
8446 && register_operand (operands[0], mode)
8447 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8448 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8450 /* Make operand1 a register if it isn't already. */
8452 && !register_operand (operands[0], mode)
8453 && !register_operand (operands[1], mode))
8455 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8456 emit_move_insn (operands[0], temp);
8460 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8463 /* Attempt to expand a binary operator. Make the expansion closer to the
8464 actual machine, then just general_operand, which will allow 3 separate
8465 memory references (one output, two input) in a single insn. */
8468 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8471 int matching_memory;
8472 rtx src1, src2, dst, op, clob;
8478 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8479 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8480 && (rtx_equal_p (dst, src2)
8481 || immediate_operand (src1, mode)))
8488 /* If the destination is memory, and we do not have matching source
8489 operands, do things in registers. */
8490 matching_memory = 0;
8491 if (GET_CODE (dst) == MEM)
8493 if (rtx_equal_p (dst, src1))
8494 matching_memory = 1;
8495 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8496 && rtx_equal_p (dst, src2))
8497 matching_memory = 2;
8499 dst = gen_reg_rtx (mode);
8502 /* Both source operands cannot be in memory. */
8503 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8505 if (matching_memory != 2)
8506 src2 = force_reg (mode, src2);
8508 src1 = force_reg (mode, src1);
8511 /* If the operation is not commutable, source 1 cannot be a constant
8512 or non-matching memory. */
8513 if ((CONSTANT_P (src1)
8514 || (!matching_memory && GET_CODE (src1) == MEM))
8515 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8516 src1 = force_reg (mode, src1);
8518 /* If optimizing, copy to regs to improve CSE */
8519 if (optimize && ! no_new_pseudos)
8521 if (GET_CODE (dst) == MEM)
8522 dst = gen_reg_rtx (mode);
8523 if (GET_CODE (src1) == MEM)
8524 src1 = force_reg (mode, src1);
8525 if (GET_CODE (src2) == MEM)
8526 src2 = force_reg (mode, src2);
8529 /* Emit the instruction. */
8531 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8532 if (reload_in_progress)
8534 /* Reload doesn't know about the flags register, and doesn't know that
8535 it doesn't want to clobber it. We can only do this with PLUS. */
8542 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8543 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8546 /* Fix up the destination if needed. */
8547 if (dst != operands[0])
8548 emit_move_insn (operands[0], dst);
8551 /* Return TRUE or FALSE depending on whether the binary operator meets the
8552 appropriate constraints. */
8555 ix86_binary_operator_ok (enum rtx_code code,
8556 enum machine_mode mode ATTRIBUTE_UNUSED,
8559 /* Both source operands cannot be in memory. */
8560 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8562 /* If the operation is not commutable, source 1 cannot be a constant. */
8563 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8565 /* If the destination is memory, we must have a matching source operand. */
8566 if (GET_CODE (operands[0]) == MEM
8567 && ! (rtx_equal_p (operands[0], operands[1])
8568 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8569 && rtx_equal_p (operands[0], operands[2]))))
8571 /* If the operation is not commutable and the source 1 is memory, we must
8572 have a matching destination. */
8573 if (GET_CODE (operands[1]) == MEM
8574 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8575 && ! rtx_equal_p (operands[0], operands[1]))
8580 /* Attempt to expand a unary operator. Make the expansion closer to the
8581 actual machine, then just general_operand, which will allow 2 separate
8582 memory references (one output, one input) in a single insn. */
8585 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8588 int matching_memory;
8589 rtx src, dst, op, clob;
8594 /* If the destination is memory, and we do not have matching source
8595 operands, do things in registers. */
8596 matching_memory = 0;
8597 if (GET_CODE (dst) == MEM)
8599 if (rtx_equal_p (dst, src))
8600 matching_memory = 1;
8602 dst = gen_reg_rtx (mode);
8605 /* When source operand is memory, destination must match. */
8606 if (!matching_memory && GET_CODE (src) == MEM)
8607 src = force_reg (mode, src);
8609 /* If optimizing, copy to regs to improve CSE */
8610 if (optimize && ! no_new_pseudos)
8612 if (GET_CODE (dst) == MEM)
8613 dst = gen_reg_rtx (mode);
8614 if (GET_CODE (src) == MEM)
8615 src = force_reg (mode, src);
8618 /* Emit the instruction. */
8620 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8621 if (reload_in_progress || code == NOT)
8623 /* Reload doesn't know about the flags register, and doesn't know that
8624 it doesn't want to clobber it. */
8631 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8635 /* Fix up the destination if needed. */
8636 if (dst != operands[0])
8637 emit_move_insn (operands[0], dst);
8640 /* Return TRUE or FALSE depending on whether the unary operator meets the
8641 appropriate constraints. */
8644 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8645 enum machine_mode mode ATTRIBUTE_UNUSED,
8646 rtx operands[2] ATTRIBUTE_UNUSED)
8648 /* If one of operands is memory, source and destination must match. */
8649 if ((GET_CODE (operands[0]) == MEM
8650 || GET_CODE (operands[1]) == MEM)
8651 && ! rtx_equal_p (operands[0], operands[1]))
8656 /* Return TRUE or FALSE depending on whether the first SET in INSN
8657 has source and destination with matching CC modes, and that the
8658 CC mode is at least as constrained as REQ_MODE. */
8661 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8664 enum machine_mode set_mode;
8666 set = PATTERN (insn);
8667 if (GET_CODE (set) == PARALLEL)
8668 set = XVECEXP (set, 0, 0);
8669 if (GET_CODE (set) != SET)
8671 if (GET_CODE (SET_SRC (set)) != COMPARE)
8674 set_mode = GET_MODE (SET_DEST (set));
8678 if (req_mode != CCNOmode
8679 && (req_mode != CCmode
8680 || XEXP (SET_SRC (set), 1) != const0_rtx))
8684 if (req_mode == CCGCmode)
8688 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8692 if (req_mode == CCZmode)
8702 return (GET_MODE (SET_SRC (set)) == set_mode);
8705 /* Generate insn patterns to do an integer compare of OPERANDS. */
8708 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8710 enum machine_mode cmpmode;
8713 cmpmode = SELECT_CC_MODE (code, op0, op1);
8714 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8716 /* This is very simple, but making the interface the same as in the
8717 FP case makes the rest of the code easier. */
8718 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8719 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8721 /* Return the test that should be put into the flags user, i.e.
8722 the bcc, scc, or cmov instruction. */
8723 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8726 /* Figure out whether to use ordered or unordered fp comparisons.
8727 Return the appropriate mode to use. */
8730 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8732 /* ??? In order to make all comparisons reversible, we do all comparisons
8733 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8734 all forms trapping and nontrapping comparisons, we can make inequality
8735 comparisons trapping again, since it results in better code when using
8736 FCOM based compares. */
8737 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8741 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8743 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8744 return ix86_fp_compare_mode (code);
8747 /* Only zero flag is needed. */
8749 case NE: /* ZF!=0 */
8751 /* Codes needing carry flag. */
8752 case GEU: /* CF=0 */
8753 case GTU: /* CF=0 & ZF=0 */
8754 case LTU: /* CF=1 */
8755 case LEU: /* CF=1 | ZF=1 */
8757 /* Codes possibly doable only with sign flag when
8758 comparing against zero. */
8759 case GE: /* SF=OF or SF=0 */
8760 case LT: /* SF<>OF or SF=1 */
8761 if (op1 == const0_rtx)
8764 /* For other cases Carry flag is not required. */
8766 /* Codes doable only with sign flag when comparing
8767 against zero, but we miss jump instruction for it
8768 so we need to use relational tests against overflow
8769 that thus needs to be zero. */
8770 case GT: /* ZF=0 & SF=OF */
8771 case LE: /* ZF=1 | SF<>OF */
8772 if (op1 == const0_rtx)
8776 /* strcmp pattern do (use flags) and combine may ask us for proper
8785 /* Return the fixed registers used for condition codes. */
8788 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8795 /* If two condition code modes are compatible, return a condition code
8796 mode which is compatible with both. Otherwise, return
8799 static enum machine_mode
8800 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8805 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8808 if ((m1 == CCGCmode && m2 == CCGOCmode)
8809 || (m1 == CCGOCmode && m2 == CCGCmode))
8837 /* These are only compatible with themselves, which we already
8843 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8846 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8848 enum rtx_code swapped_code = swap_condition (code);
8849 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8850 || (ix86_fp_comparison_cost (swapped_code)
8851 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8854 /* Swap, force into registers, or otherwise massage the two operands
8855 to a fp comparison. The operands are updated in place; the new
8856 comparison code is returned. */
8858 static enum rtx_code
8859 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8861 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8862 rtx op0 = *pop0, op1 = *pop1;
8863 enum machine_mode op_mode = GET_MODE (op0);
8864 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8866 /* All of the unordered compare instructions only work on registers.
8867 The same is true of the XFmode compare instructions. The same is
8868 true of the fcomi compare instructions. */
8871 && (fpcmp_mode == CCFPUmode
8872 || op_mode == XFmode
8873 || ix86_use_fcomi_compare (code)))
8875 op0 = force_reg (op_mode, op0);
8876 op1 = force_reg (op_mode, op1);
8880 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8881 things around if they appear profitable, otherwise force op0
8884 if (standard_80387_constant_p (op0) == 0
8885 || (GET_CODE (op0) == MEM
8886 && ! (standard_80387_constant_p (op1) == 0
8887 || GET_CODE (op1) == MEM)))
8890 tmp = op0, op0 = op1, op1 = tmp;
8891 code = swap_condition (code);
8894 if (GET_CODE (op0) != REG)
8895 op0 = force_reg (op_mode, op0);
8897 if (CONSTANT_P (op1))
8899 if (standard_80387_constant_p (op1))
8900 op1 = force_reg (op_mode, op1);
8902 op1 = validize_mem (force_const_mem (op_mode, op1));
8906 /* Try to rearrange the comparison to make it cheaper. */
8907 if (ix86_fp_comparison_cost (code)
8908 > ix86_fp_comparison_cost (swap_condition (code))
8909 && (GET_CODE (op1) == REG || !no_new_pseudos))
8912 tmp = op0, op0 = op1, op1 = tmp;
8913 code = swap_condition (code);
8914 if (GET_CODE (op0) != REG)
8915 op0 = force_reg (op_mode, op0);
8923 /* Convert comparison codes we use to represent FP comparison to integer
8924 code that will result in proper branch. Return UNKNOWN if no such code
8926 static enum rtx_code
8927 ix86_fp_compare_code_to_integer (enum rtx_code code)
8956 /* Split comparison code CODE into comparisons we can do using branch
8957 instructions. BYPASS_CODE is comparison code for branch that will
8958 branch around FIRST_CODE and SECOND_CODE. If some of branches
8959 is not required, set value to NIL.
8960 We never require more than two branches. */
8962 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8963 enum rtx_code *first_code,
8964 enum rtx_code *second_code)
8970 /* The fcomi comparison sets flags as follows:
8980 case GT: /* GTU - CF=0 & ZF=0 */
8981 case GE: /* GEU - CF=0 */
8982 case ORDERED: /* PF=0 */
8983 case UNORDERED: /* PF=1 */
8984 case UNEQ: /* EQ - ZF=1 */
8985 case UNLT: /* LTU - CF=1 */
8986 case UNLE: /* LEU - CF=1 | ZF=1 */
8987 case LTGT: /* EQ - ZF=0 */
8989 case LT: /* LTU - CF=1 - fails on unordered */
8991 *bypass_code = UNORDERED;
8993 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8995 *bypass_code = UNORDERED;
8997 case EQ: /* EQ - ZF=1 - fails on unordered */
8999 *bypass_code = UNORDERED;
9001 case NE: /* NE - ZF=0 - fails on unordered */
9003 *second_code = UNORDERED;
9005 case UNGE: /* GEU - CF=0 - fails on unordered */
9007 *second_code = UNORDERED;
9009 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9011 *second_code = UNORDERED;
9016 if (!TARGET_IEEE_FP)
9023 /* Return cost of comparison done fcom + arithmetics operations on AX.
9024 All following functions do use number of instructions as a cost metrics.
9025 In future this should be tweaked to compute bytes for optimize_size and
9026 take into account performance of various instructions on various CPUs. */
9028 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9030 if (!TARGET_IEEE_FP)
9032 /* The cost of code output by ix86_expand_fp_compare. */
9060 /* Return cost of comparison done using fcomi operation.
9061 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9063 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9065 enum rtx_code bypass_code, first_code, second_code;
9066 /* Return arbitrarily high cost when instruction is not supported - this
9067 prevents gcc from using it. */
9070 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9071 return (bypass_code != NIL || second_code != NIL) + 2;
9074 /* Return cost of comparison done using sahf operation.
9075 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9077 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9079 enum rtx_code bypass_code, first_code, second_code;
9080 /* Return arbitrarily high cost when instruction is not preferred - this
9081 avoids gcc from using it. */
9082 if (!TARGET_USE_SAHF && !optimize_size)
9084 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9085 return (bypass_code != NIL || second_code != NIL) + 3;
9088 /* Compute cost of the comparison done using any method.
9089 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9091 ix86_fp_comparison_cost (enum rtx_code code)
9093 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9096 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9097 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9099 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9100 if (min > sahf_cost)
9102 if (min > fcomi_cost)
9107 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9110 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9111 rtx *second_test, rtx *bypass_test)
9113 enum machine_mode fpcmp_mode, intcmp_mode;
9115 int cost = ix86_fp_comparison_cost (code);
9116 enum rtx_code bypass_code, first_code, second_code;
9118 fpcmp_mode = ix86_fp_compare_mode (code);
9119 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9122 *second_test = NULL_RTX;
9124 *bypass_test = NULL_RTX;
9126 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9128 /* Do fcomi/sahf based test when profitable. */
9129 if ((bypass_code == NIL || bypass_test)
9130 && (second_code == NIL || second_test)
9131 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9135 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9136 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9142 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9143 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9145 scratch = gen_reg_rtx (HImode);
9146 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9147 emit_insn (gen_x86_sahf_1 (scratch));
9150 /* The FP codes work out to act like unsigned. */
9151 intcmp_mode = fpcmp_mode;
9153 if (bypass_code != NIL)
9154 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9155 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9157 if (second_code != NIL)
9158 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9159 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9164 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9165 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9166 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9168 scratch = gen_reg_rtx (HImode);
9169 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9171 /* In the unordered case, we have to check C2 for NaN's, which
9172 doesn't happen to work out to anything nice combination-wise.
9173 So do some bit twiddling on the value we've got in AH to come
9174 up with an appropriate set of condition codes. */
9176 intcmp_mode = CCNOmode;
9181 if (code == GT || !TARGET_IEEE_FP)
9183 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9188 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9189 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9190 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9191 intcmp_mode = CCmode;
9197 if (code == LT && TARGET_IEEE_FP)
9199 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9200 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9201 intcmp_mode = CCmode;
9206 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9212 if (code == GE || !TARGET_IEEE_FP)
9214 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9219 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9220 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9227 if (code == LE && TARGET_IEEE_FP)
9229 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9230 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9231 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9232 intcmp_mode = CCmode;
9237 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9243 if (code == EQ && TARGET_IEEE_FP)
9245 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9247 intcmp_mode = CCmode;
9252 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9259 if (code == NE && TARGET_IEEE_FP)
9261 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9262 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9268 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9274 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9278 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9287 /* Return the test that should be put into the flags user, i.e.
9288 the bcc, scc, or cmov instruction. */
9289 return gen_rtx_fmt_ee (code, VOIDmode,
9290 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9295 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9298 op0 = ix86_compare_op0;
9299 op1 = ix86_compare_op1;
9302 *second_test = NULL_RTX;
9304 *bypass_test = NULL_RTX;
9306 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9307 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9308 second_test, bypass_test);
9310 ret = ix86_expand_int_compare (code, op0, op1);
9315 /* Return true if the CODE will result in nontrivial jump sequence. */
9317 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9319 enum rtx_code bypass_code, first_code, second_code;
9322 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9323 return bypass_code != NIL || second_code != NIL;
9327 ix86_expand_branch (enum rtx_code code, rtx label)
9331 switch (GET_MODE (ix86_compare_op0))
9337 tmp = ix86_expand_compare (code, NULL, NULL);
9338 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9339 gen_rtx_LABEL_REF (VOIDmode, label),
9341 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9350 enum rtx_code bypass_code, first_code, second_code;
9352 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9355 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9357 /* Check whether we will use the natural sequence with one jump. If
9358 so, we can expand jump early. Otherwise delay expansion by
9359 creating compound insn to not confuse optimizers. */
9360 if (bypass_code == NIL && second_code == NIL
9363 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9364 gen_rtx_LABEL_REF (VOIDmode, label),
9369 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9370 ix86_compare_op0, ix86_compare_op1);
9371 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9372 gen_rtx_LABEL_REF (VOIDmode, label),
9374 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9376 use_fcomi = ix86_use_fcomi_compare (code);
9377 vec = rtvec_alloc (3 + !use_fcomi);
9378 RTVEC_ELT (vec, 0) = tmp;
9380 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9382 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9385 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9387 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9395 /* Expand DImode branch into multiple compare+branch. */
9397 rtx lo[2], hi[2], label2;
9398 enum rtx_code code1, code2, code3;
9400 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9402 tmp = ix86_compare_op0;
9403 ix86_compare_op0 = ix86_compare_op1;
9404 ix86_compare_op1 = tmp;
9405 code = swap_condition (code);
9407 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9408 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9410 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9411 avoid two branches. This costs one extra insn, so disable when
9412 optimizing for size. */
9414 if ((code == EQ || code == NE)
9416 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9421 if (hi[1] != const0_rtx)
9422 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9423 NULL_RTX, 0, OPTAB_WIDEN);
9426 if (lo[1] != const0_rtx)
9427 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9428 NULL_RTX, 0, OPTAB_WIDEN);
9430 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9431 NULL_RTX, 0, OPTAB_WIDEN);
9433 ix86_compare_op0 = tmp;
9434 ix86_compare_op1 = const0_rtx;
9435 ix86_expand_branch (code, label);
9439 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9440 op1 is a constant and the low word is zero, then we can just
9441 examine the high word. */
9443 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9446 case LT: case LTU: case GE: case GEU:
9447 ix86_compare_op0 = hi[0];
9448 ix86_compare_op1 = hi[1];
9449 ix86_expand_branch (code, label);
9455 /* Otherwise, we need two or three jumps. */
9457 label2 = gen_label_rtx ();
9460 code2 = swap_condition (code);
9461 code3 = unsigned_condition (code);
9465 case LT: case GT: case LTU: case GTU:
9468 case LE: code1 = LT; code2 = GT; break;
9469 case GE: code1 = GT; code2 = LT; break;
9470 case LEU: code1 = LTU; code2 = GTU; break;
9471 case GEU: code1 = GTU; code2 = LTU; break;
9473 case EQ: code1 = NIL; code2 = NE; break;
9474 case NE: code2 = NIL; break;
9482 * if (hi(a) < hi(b)) goto true;
9483 * if (hi(a) > hi(b)) goto false;
9484 * if (lo(a) < lo(b)) goto true;
9488 ix86_compare_op0 = hi[0];
9489 ix86_compare_op1 = hi[1];
9492 ix86_expand_branch (code1, label);
9494 ix86_expand_branch (code2, label2);
9496 ix86_compare_op0 = lo[0];
9497 ix86_compare_op1 = lo[1];
9498 ix86_expand_branch (code3, label);
9501 emit_label (label2);
9510 /* Split branch based on floating point condition. */
9512 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9513 rtx target1, rtx target2, rtx tmp)
9516 rtx label = NULL_RTX;
9518 int bypass_probability = -1, second_probability = -1, probability = -1;
9521 if (target2 != pc_rtx)
9524 code = reverse_condition_maybe_unordered (code);
9529 condition = ix86_expand_fp_compare (code, op1, op2,
9530 tmp, &second, &bypass);
9532 if (split_branch_probability >= 0)
9534 /* Distribute the probabilities across the jumps.
9535 Assume the BYPASS and SECOND to be always test
9537 probability = split_branch_probability;
9539 /* Value of 1 is low enough to make no need for probability
9540 to be updated. Later we may run some experiments and see
9541 if unordered values are more frequent in practice. */
9543 bypass_probability = 1;
9545 second_probability = 1;
9547 if (bypass != NULL_RTX)
9549 label = gen_label_rtx ();
9550 i = emit_jump_insn (gen_rtx_SET
9552 gen_rtx_IF_THEN_ELSE (VOIDmode,
9554 gen_rtx_LABEL_REF (VOIDmode,
9557 if (bypass_probability >= 0)
9559 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9560 GEN_INT (bypass_probability),
9563 i = emit_jump_insn (gen_rtx_SET
9565 gen_rtx_IF_THEN_ELSE (VOIDmode,
9566 condition, target1, target2)));
9567 if (probability >= 0)
9569 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9570 GEN_INT (probability),
9572 if (second != NULL_RTX)
9574 i = emit_jump_insn (gen_rtx_SET
9576 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9578 if (second_probability >= 0)
9580 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9581 GEN_INT (second_probability),
9584 if (label != NULL_RTX)
9589 ix86_expand_setcc (enum rtx_code code, rtx dest)
9591 rtx ret, tmp, tmpreg, equiv;
9592 rtx second_test, bypass_test;
9594 if (GET_MODE (ix86_compare_op0) == DImode
9596 return 0; /* FAIL */
9598 if (GET_MODE (dest) != QImode)
9601 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9602 PUT_MODE (ret, QImode);
9607 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9608 if (bypass_test || second_test)
9610 rtx test = second_test;
9612 rtx tmp2 = gen_reg_rtx (QImode);
9619 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9621 PUT_MODE (test, QImode);
9622 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9625 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9627 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9630 /* Attach a REG_EQUAL note describing the comparison result. */
9631 equiv = simplify_gen_relational (code, QImode,
9632 GET_MODE (ix86_compare_op0),
9633 ix86_compare_op0, ix86_compare_op1);
9634 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9636 return 1; /* DONE */
9639 /* Expand comparison setting or clearing carry flag. Return true when
9640 successful and set pop for the operation. */
9642 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9644 enum machine_mode mode =
9645 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9647 /* Do not handle DImode compares that go trought special path. Also we can't
9648 deal with FP compares yet. This is possible to add. */
9649 if ((mode == DImode && !TARGET_64BIT))
9651 if (FLOAT_MODE_P (mode))
9653 rtx second_test = NULL, bypass_test = NULL;
9654 rtx compare_op, compare_seq;
9656 /* Shortcut: following common codes never translate into carry flag compares. */
9657 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9658 || code == ORDERED || code == UNORDERED)
9661 /* These comparisons require zero flag; swap operands so they won't. */
9662 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9668 code = swap_condition (code);
9671 /* Try to expand the comparison and verify that we end up with carry flag
9672 based comparison. This is fails to be true only when we decide to expand
9673 comparison using arithmetic that is not too common scenario. */
9675 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9676 &second_test, &bypass_test);
9677 compare_seq = get_insns ();
9680 if (second_test || bypass_test)
9682 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9683 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9684 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9686 code = GET_CODE (compare_op);
9687 if (code != LTU && code != GEU)
9689 emit_insn (compare_seq);
9693 if (!INTEGRAL_MODE_P (mode))
9701 /* Convert a==0 into (unsigned)a<1. */
9704 if (op1 != const0_rtx)
9707 code = (code == EQ ? LTU : GEU);
9710 /* Convert a>b into b<a or a>=b-1. */
9713 if (GET_CODE (op1) == CONST_INT)
9715 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9716 /* Bail out on overflow. We still can swap operands but that
9717 would force loading of the constant into register. */
9718 if (op1 == const0_rtx
9719 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9721 code = (code == GTU ? GEU : LTU);
9728 code = (code == GTU ? LTU : GEU);
9732 /* Convert a>=0 into (unsigned)a<0x80000000. */
9735 if (mode == DImode || op1 != const0_rtx)
9737 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9738 code = (code == LT ? GEU : LTU);
9742 if (mode == DImode || op1 != constm1_rtx)
9744 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9745 code = (code == LE ? GEU : LTU);
9751 /* Swapping operands may cause constant to appear as first operand. */
9752 if (!nonimmediate_operand (op0, VOIDmode))
9756 op0 = force_reg (mode, op0);
9758 ix86_compare_op0 = op0;
9759 ix86_compare_op1 = op1;
9760 *pop = ix86_expand_compare (code, NULL, NULL);
9761 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9767 ix86_expand_int_movcc (rtx operands[])
9769 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9770 rtx compare_seq, compare_op;
9771 rtx second_test, bypass_test;
9772 enum machine_mode mode = GET_MODE (operands[0]);
9773 bool sign_bit_compare_p = false;;
9776 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9777 compare_seq = get_insns ();
9780 compare_code = GET_CODE (compare_op);
9782 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9783 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9784 sign_bit_compare_p = true;
9786 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9787 HImode insns, we'd be swallowed in word prefix ops. */
9789 if ((mode != HImode || TARGET_FAST_PREFIX)
9790 && (mode != DImode || TARGET_64BIT)
9791 && GET_CODE (operands[2]) == CONST_INT
9792 && GET_CODE (operands[3]) == CONST_INT)
9794 rtx out = operands[0];
9795 HOST_WIDE_INT ct = INTVAL (operands[2]);
9796 HOST_WIDE_INT cf = INTVAL (operands[3]);
9800 /* Sign bit compares are better done using shifts than we do by using
9802 if (sign_bit_compare_p
9803 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9804 ix86_compare_op1, &compare_op))
9806 /* Detect overlap between destination and compare sources. */
9809 if (!sign_bit_compare_p)
9813 compare_code = GET_CODE (compare_op);
9815 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9816 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9819 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9822 /* To simplify rest of code, restrict to the GEU case. */
9823 if (compare_code == LTU)
9825 HOST_WIDE_INT tmp = ct;
9828 compare_code = reverse_condition (compare_code);
9829 code = reverse_condition (code);
9834 PUT_CODE (compare_op,
9835 reverse_condition_maybe_unordered
9836 (GET_CODE (compare_op)));
9838 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9842 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9843 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9844 tmp = gen_reg_rtx (mode);
9847 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9849 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9853 if (code == GT || code == GE)
9854 code = reverse_condition (code);
9857 HOST_WIDE_INT tmp = ct;
9862 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9863 ix86_compare_op1, VOIDmode, 0, -1);
9876 tmp = expand_simple_binop (mode, PLUS,
9878 copy_rtx (tmp), 1, OPTAB_DIRECT);
9889 tmp = expand_simple_binop (mode, IOR,
9891 copy_rtx (tmp), 1, OPTAB_DIRECT);
9893 else if (diff == -1 && ct)
9903 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9905 tmp = expand_simple_binop (mode, PLUS,
9906 copy_rtx (tmp), GEN_INT (cf),
9907 copy_rtx (tmp), 1, OPTAB_DIRECT);
9915 * andl cf - ct, dest
9925 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9928 tmp = expand_simple_binop (mode, AND,
9930 gen_int_mode (cf - ct, mode),
9931 copy_rtx (tmp), 1, OPTAB_DIRECT);
9933 tmp = expand_simple_binop (mode, PLUS,
9934 copy_rtx (tmp), GEN_INT (ct),
9935 copy_rtx (tmp), 1, OPTAB_DIRECT);
9938 if (!rtx_equal_p (tmp, out))
9939 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9941 return 1; /* DONE */
9947 tmp = ct, ct = cf, cf = tmp;
9949 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9951 /* We may be reversing unordered compare to normal compare, that
9952 is not valid in general (we may convert non-trapping condition
9953 to trapping one), however on i386 we currently emit all
9954 comparisons unordered. */
9955 compare_code = reverse_condition_maybe_unordered (compare_code);
9956 code = reverse_condition_maybe_unordered (code);
9960 compare_code = reverse_condition (compare_code);
9961 code = reverse_condition (code);
9966 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9967 && GET_CODE (ix86_compare_op1) == CONST_INT)
9969 if (ix86_compare_op1 == const0_rtx
9970 && (code == LT || code == GE))
9971 compare_code = code;
9972 else if (ix86_compare_op1 == constm1_rtx)
9976 else if (code == GT)
9981 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9982 if (compare_code != NIL
9983 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9984 && (cf == -1 || ct == -1))
9986 /* If lea code below could be used, only optimize
9987 if it results in a 2 insn sequence. */
9989 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9990 || diff == 3 || diff == 5 || diff == 9)
9991 || (compare_code == LT && ct == -1)
9992 || (compare_code == GE && cf == -1))
9995 * notl op1 (if necessary)
10003 code = reverse_condition (code);
10006 out = emit_store_flag (out, code, ix86_compare_op0,
10007 ix86_compare_op1, VOIDmode, 0, -1);
10009 out = expand_simple_binop (mode, IOR,
10011 out, 1, OPTAB_DIRECT);
10012 if (out != operands[0])
10013 emit_move_insn (operands[0], out);
10015 return 1; /* DONE */
10020 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10021 || diff == 3 || diff == 5 || diff == 9)
10022 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10023 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10029 * lea cf(dest*(ct-cf)),dest
10033 * This also catches the degenerate setcc-only case.
10039 out = emit_store_flag (out, code, ix86_compare_op0,
10040 ix86_compare_op1, VOIDmode, 0, 1);
10043 /* On x86_64 the lea instruction operates on Pmode, so we need
10044 to get arithmetics done in proper mode to match. */
10046 tmp = copy_rtx (out);
10050 out1 = copy_rtx (out);
10051 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10055 tmp = gen_rtx_PLUS (mode, tmp, out1);
10061 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10064 if (!rtx_equal_p (tmp, out))
10067 out = force_operand (tmp, copy_rtx (out));
10069 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10071 if (!rtx_equal_p (out, operands[0]))
10072 emit_move_insn (operands[0], copy_rtx (out));
10074 return 1; /* DONE */
10078 * General case: Jumpful:
10079 * xorl dest,dest cmpl op1, op2
10080 * cmpl op1, op2 movl ct, dest
10081 * setcc dest jcc 1f
10082 * decl dest movl cf, dest
10083 * andl (cf-ct),dest 1:
10086 * Size 20. Size 14.
10088 * This is reasonably steep, but branch mispredict costs are
10089 * high on modern cpus, so consider failing only if optimizing
10093 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10094 && BRANCH_COST >= 2)
10100 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10101 /* We may be reversing unordered compare to normal compare,
10102 that is not valid in general (we may convert non-trapping
10103 condition to trapping one), however on i386 we currently
10104 emit all comparisons unordered. */
10105 code = reverse_condition_maybe_unordered (code);
10108 code = reverse_condition (code);
10109 if (compare_code != NIL)
10110 compare_code = reverse_condition (compare_code);
10114 if (compare_code != NIL)
10116 /* notl op1 (if needed)
10121 For x < 0 (resp. x <= -1) there will be no notl,
10122 so if possible swap the constants to get rid of the
10124 True/false will be -1/0 while code below (store flag
10125 followed by decrement) is 0/-1, so the constants need
10126 to be exchanged once more. */
10128 if (compare_code == GE || !cf)
10130 code = reverse_condition (code);
10135 HOST_WIDE_INT tmp = cf;
10140 out = emit_store_flag (out, code, ix86_compare_op0,
10141 ix86_compare_op1, VOIDmode, 0, -1);
10145 out = emit_store_flag (out, code, ix86_compare_op0,
10146 ix86_compare_op1, VOIDmode, 0, 1);
10148 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10149 copy_rtx (out), 1, OPTAB_DIRECT);
10152 out = expand_simple_binop (mode, AND, copy_rtx (out),
10153 gen_int_mode (cf - ct, mode),
10154 copy_rtx (out), 1, OPTAB_DIRECT);
10156 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10157 copy_rtx (out), 1, OPTAB_DIRECT);
10158 if (!rtx_equal_p (out, operands[0]))
10159 emit_move_insn (operands[0], copy_rtx (out));
10161 return 1; /* DONE */
10165 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10167 /* Try a few things more with specific constants and a variable. */
10170 rtx var, orig_out, out, tmp;
10172 if (BRANCH_COST <= 2)
10173 return 0; /* FAIL */
10175 /* If one of the two operands is an interesting constant, load a
10176 constant with the above and mask it in with a logical operation. */
10178 if (GET_CODE (operands[2]) == CONST_INT)
10181 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10182 operands[3] = constm1_rtx, op = and_optab;
10183 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10184 operands[3] = const0_rtx, op = ior_optab;
10186 return 0; /* FAIL */
10188 else if (GET_CODE (operands[3]) == CONST_INT)
10191 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10192 operands[2] = constm1_rtx, op = and_optab;
10193 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10194 operands[2] = const0_rtx, op = ior_optab;
10196 return 0; /* FAIL */
10199 return 0; /* FAIL */
10201 orig_out = operands[0];
10202 tmp = gen_reg_rtx (mode);
10205 /* Recurse to get the constant loaded. */
10206 if (ix86_expand_int_movcc (operands) == 0)
10207 return 0; /* FAIL */
10209 /* Mask in the interesting variable. */
10210 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10212 if (!rtx_equal_p (out, orig_out))
10213 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10215 return 1; /* DONE */
10219 * For comparison with above,
10229 if (! nonimmediate_operand (operands[2], mode))
10230 operands[2] = force_reg (mode, operands[2]);
10231 if (! nonimmediate_operand (operands[3], mode))
10232 operands[3] = force_reg (mode, operands[3]);
10234 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10236 rtx tmp = gen_reg_rtx (mode);
10237 emit_move_insn (tmp, operands[3]);
10240 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10242 rtx tmp = gen_reg_rtx (mode);
10243 emit_move_insn (tmp, operands[2]);
10247 if (! register_operand (operands[2], VOIDmode)
10249 || ! register_operand (operands[3], VOIDmode)))
10250 operands[2] = force_reg (mode, operands[2]);
10253 && ! register_operand (operands[3], VOIDmode))
10254 operands[3] = force_reg (mode, operands[3]);
10256 emit_insn (compare_seq);
10257 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10258 gen_rtx_IF_THEN_ELSE (mode,
10259 compare_op, operands[2],
10262 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10263 gen_rtx_IF_THEN_ELSE (mode,
10265 copy_rtx (operands[3]),
10266 copy_rtx (operands[0]))));
10268 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10269 gen_rtx_IF_THEN_ELSE (mode,
10271 copy_rtx (operands[2]),
10272 copy_rtx (operands[0]))));
10274 return 1; /* DONE */
10278 ix86_expand_fp_movcc (rtx operands[])
10280 enum rtx_code code;
10282 rtx compare_op, second_test, bypass_test;
10284 /* For SF/DFmode conditional moves based on comparisons
10285 in same mode, we may want to use SSE min/max instructions. */
10286 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10287 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10288 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10289 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10290 && (!TARGET_IEEE_FP
10291 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10292 /* We may be called from the post-reload splitter. */
10293 && (!REG_P (operands[0])
10294 || SSE_REG_P (operands[0])
10295 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10297 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10298 code = GET_CODE (operands[1]);
10300 /* See if we have (cross) match between comparison operands and
10301 conditional move operands. */
10302 if (rtx_equal_p (operands[2], op1))
10307 code = reverse_condition_maybe_unordered (code);
10309 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10311 /* Check for min operation. */
10312 if (code == LT || code == UNLE)
10320 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10321 if (memory_operand (op0, VOIDmode))
10322 op0 = force_reg (GET_MODE (operands[0]), op0);
10323 if (GET_MODE (operands[0]) == SFmode)
10324 emit_insn (gen_minsf3 (operands[0], op0, op1));
10326 emit_insn (gen_mindf3 (operands[0], op0, op1));
10329 /* Check for max operation. */
10330 if (code == GT || code == UNGE)
10338 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10339 if (memory_operand (op0, VOIDmode))
10340 op0 = force_reg (GET_MODE (operands[0]), op0);
10341 if (GET_MODE (operands[0]) == SFmode)
10342 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10344 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10348 /* Manage condition to be sse_comparison_operator. In case we are
10349 in non-ieee mode, try to canonicalize the destination operand
10350 to be first in the comparison - this helps reload to avoid extra
10352 if (!sse_comparison_operator (operands[1], VOIDmode)
10353 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10355 rtx tmp = ix86_compare_op0;
10356 ix86_compare_op0 = ix86_compare_op1;
10357 ix86_compare_op1 = tmp;
10358 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10359 VOIDmode, ix86_compare_op0,
10362 /* Similarly try to manage result to be first operand of conditional
10363 move. We also don't support the NE comparison on SSE, so try to
10365 if ((rtx_equal_p (operands[0], operands[3])
10366 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10367 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10369 rtx tmp = operands[2];
10370 operands[2] = operands[3];
10372 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10373 (GET_CODE (operands[1])),
10374 VOIDmode, ix86_compare_op0,
10377 if (GET_MODE (operands[0]) == SFmode)
10378 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10379 operands[2], operands[3],
10380 ix86_compare_op0, ix86_compare_op1));
10382 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10383 operands[2], operands[3],
10384 ix86_compare_op0, ix86_compare_op1));
10388 /* The floating point conditional move instructions don't directly
10389 support conditions resulting from a signed integer comparison. */
10391 code = GET_CODE (operands[1]);
10392 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10394 /* The floating point conditional move instructions don't directly
10395 support signed integer comparisons. */
10397 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10399 if (second_test != NULL || bypass_test != NULL)
10401 tmp = gen_reg_rtx (QImode);
10402 ix86_expand_setcc (code, tmp);
10404 ix86_compare_op0 = tmp;
10405 ix86_compare_op1 = const0_rtx;
10406 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10408 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10410 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10411 emit_move_insn (tmp, operands[3]);
10414 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10416 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10417 emit_move_insn (tmp, operands[2]);
10421 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10422 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10427 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10428 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10433 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10434 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10442 /* Expand conditional increment or decrement using adb/sbb instructions.
10443 The default case using setcc followed by the conditional move can be
10444 done by generic code. */
10446 ix86_expand_int_addcc (rtx operands[])
10448 enum rtx_code code = GET_CODE (operands[1]);
10450 rtx val = const0_rtx;
10451 bool fpcmp = false;
10452 enum machine_mode mode = GET_MODE (operands[0]);
10454 if (operands[3] != const1_rtx
10455 && operands[3] != constm1_rtx)
10457 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10458 ix86_compare_op1, &compare_op))
10460 code = GET_CODE (compare_op);
10462 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10463 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10466 code = ix86_fp_compare_code_to_integer (code);
10473 PUT_CODE (compare_op,
10474 reverse_condition_maybe_unordered
10475 (GET_CODE (compare_op)));
10477 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10479 PUT_MODE (compare_op, mode);
10481 /* Construct either adc or sbb insn. */
10482 if ((code == LTU) == (operands[3] == constm1_rtx))
10484 switch (GET_MODE (operands[0]))
10487 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10490 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10493 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10496 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10504 switch (GET_MODE (operands[0]))
10507 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10510 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10513 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10516 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10522 return 1; /* DONE */
10526 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10527 works for floating pointer parameters and nonoffsetable memories.
10528 For pushes, it returns just stack offsets; the values will be saved
10529 in the right order. Maximally three parts are generated. */
10532 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10537 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10539 size = (GET_MODE_SIZE (mode) + 4) / 8;
10541 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10543 if (size < 2 || size > 3)
10546 /* Optimize constant pool reference to immediates. This is used by fp
10547 moves, that force all constants to memory to allow combining. */
10548 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10550 rtx tmp = maybe_get_pool_constant (operand);
10555 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10557 /* The only non-offsetable memories we handle are pushes. */
10558 if (! push_operand (operand, VOIDmode))
10561 operand = copy_rtx (operand);
10562 PUT_MODE (operand, Pmode);
10563 parts[0] = parts[1] = parts[2] = operand;
10565 else if (!TARGET_64BIT)
10567 if (mode == DImode)
10568 split_di (&operand, 1, &parts[0], &parts[1]);
10571 if (REG_P (operand))
10573 if (!reload_completed)
10575 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10576 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10578 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10580 else if (offsettable_memref_p (operand))
10582 operand = adjust_address (operand, SImode, 0);
10583 parts[0] = operand;
10584 parts[1] = adjust_address (operand, SImode, 4);
10586 parts[2] = adjust_address (operand, SImode, 8);
10588 else if (GET_CODE (operand) == CONST_DOUBLE)
10593 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10597 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10598 parts[2] = gen_int_mode (l[2], SImode);
10601 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10606 parts[1] = gen_int_mode (l[1], SImode);
10607 parts[0] = gen_int_mode (l[0], SImode);
10615 if (mode == TImode)
10616 split_ti (&operand, 1, &parts[0], &parts[1]);
10617 if (mode == XFmode || mode == TFmode)
10619 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10620 if (REG_P (operand))
10622 if (!reload_completed)
10624 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10625 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10627 else if (offsettable_memref_p (operand))
10629 operand = adjust_address (operand, DImode, 0);
10630 parts[0] = operand;
10631 parts[1] = adjust_address (operand, upper_mode, 8);
10633 else if (GET_CODE (operand) == CONST_DOUBLE)
10638 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10639 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10640 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10641 if (HOST_BITS_PER_WIDE_INT >= 64)
10644 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10645 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10648 parts[0] = immed_double_const (l[0], l[1], DImode);
10649 if (upper_mode == SImode)
10650 parts[1] = gen_int_mode (l[2], SImode);
10651 else if (HOST_BITS_PER_WIDE_INT >= 64)
10654 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10655 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10658 parts[1] = immed_double_const (l[2], l[3], DImode);
10668 /* Emit insns to perform a move or push of DI, DF, and XF values.
10669 Return false when normal moves are needed; true when all required
10670 insns have been emitted. Operands 2-4 contain the input values
10671 int the correct order; operands 5-7 contain the output values. */
10674 ix86_split_long_move (rtx operands[])
10679 int collisions = 0;
10680 enum machine_mode mode = GET_MODE (operands[0]);
10682 /* The DFmode expanders may ask us to move double.
10683 For 64bit target this is single move. By hiding the fact
10684 here we simplify i386.md splitters. */
10685 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10687 /* Optimize constant pool reference to immediates. This is used by
10688 fp moves, that force all constants to memory to allow combining. */
10690 if (GET_CODE (operands[1]) == MEM
10691 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10692 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10693 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10694 if (push_operand (operands[0], VOIDmode))
10696 operands[0] = copy_rtx (operands[0]);
10697 PUT_MODE (operands[0], Pmode);
10700 operands[0] = gen_lowpart (DImode, operands[0]);
10701 operands[1] = gen_lowpart (DImode, operands[1]);
10702 emit_move_insn (operands[0], operands[1]);
10706 /* The only non-offsettable memory we handle is push. */
10707 if (push_operand (operands[0], VOIDmode))
10709 else if (GET_CODE (operands[0]) == MEM
10710 && ! offsettable_memref_p (operands[0]))
10713 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10714 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10716 /* When emitting push, take care for source operands on the stack. */
10717 if (push && GET_CODE (operands[1]) == MEM
10718 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10721 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10722 XEXP (part[1][2], 0));
10723 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10724 XEXP (part[1][1], 0));
10727 /* We need to do copy in the right order in case an address register
10728 of the source overlaps the destination. */
10729 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10731 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10733 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10736 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10739 /* Collision in the middle part can be handled by reordering. */
10740 if (collisions == 1 && nparts == 3
10741 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10744 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10745 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10748 /* If there are more collisions, we can't handle it by reordering.
10749 Do an lea to the last part and use only one colliding move. */
10750 else if (collisions > 1)
10756 base = part[0][nparts - 1];
10758 /* Handle the case when the last part isn't valid for lea.
10759 Happens in 64-bit mode storing the 12-byte XFmode. */
10760 if (GET_MODE (base) != Pmode)
10761 base = gen_rtx_REG (Pmode, REGNO (base));
10763 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10764 part[1][0] = replace_equiv_address (part[1][0], base);
10765 part[1][1] = replace_equiv_address (part[1][1],
10766 plus_constant (base, UNITS_PER_WORD));
10768 part[1][2] = replace_equiv_address (part[1][2],
10769 plus_constant (base, 8));
10779 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10780 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10781 emit_move_insn (part[0][2], part[1][2]);
10786 /* In 64bit mode we don't have 32bit push available. In case this is
10787 register, it is OK - we will just use larger counterpart. We also
10788 retype memory - these comes from attempt to avoid REX prefix on
10789 moving of second half of TFmode value. */
10790 if (GET_MODE (part[1][1]) == SImode)
10792 if (GET_CODE (part[1][1]) == MEM)
10793 part[1][1] = adjust_address (part[1][1], DImode, 0);
10794 else if (REG_P (part[1][1]))
10795 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10798 if (GET_MODE (part[1][0]) == SImode)
10799 part[1][0] = part[1][1];
10802 emit_move_insn (part[0][1], part[1][1]);
10803 emit_move_insn (part[0][0], part[1][0]);
10807 /* Choose correct order to not overwrite the source before it is copied. */
10808 if ((REG_P (part[0][0])
10809 && REG_P (part[1][1])
10810 && (REGNO (part[0][0]) == REGNO (part[1][1])
10812 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10814 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10818 operands[2] = part[0][2];
10819 operands[3] = part[0][1];
10820 operands[4] = part[0][0];
10821 operands[5] = part[1][2];
10822 operands[6] = part[1][1];
10823 operands[7] = part[1][0];
10827 operands[2] = part[0][1];
10828 operands[3] = part[0][0];
10829 operands[5] = part[1][1];
10830 operands[6] = part[1][0];
10837 operands[2] = part[0][0];
10838 operands[3] = part[0][1];
10839 operands[4] = part[0][2];
10840 operands[5] = part[1][0];
10841 operands[6] = part[1][1];
10842 operands[7] = part[1][2];
10846 operands[2] = part[0][0];
10847 operands[3] = part[0][1];
10848 operands[5] = part[1][0];
10849 operands[6] = part[1][1];
10852 emit_move_insn (operands[2], operands[5]);
10853 emit_move_insn (operands[3], operands[6]);
10855 emit_move_insn (operands[4], operands[7]);
10861 ix86_split_ashldi (rtx *operands, rtx scratch)
10863 rtx low[2], high[2];
10866 if (GET_CODE (operands[2]) == CONST_INT)
10868 split_di (operands, 2, low, high);
10869 count = INTVAL (operands[2]) & 63;
10873 emit_move_insn (high[0], low[1]);
10874 emit_move_insn (low[0], const0_rtx);
10877 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10881 if (!rtx_equal_p (operands[0], operands[1]))
10882 emit_move_insn (operands[0], operands[1]);
10883 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10884 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10889 if (!rtx_equal_p (operands[0], operands[1]))
10890 emit_move_insn (operands[0], operands[1]);
10892 split_di (operands, 1, low, high);
10894 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10895 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10897 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10899 if (! no_new_pseudos)
10900 scratch = force_reg (SImode, const0_rtx);
10902 emit_move_insn (scratch, const0_rtx);
10904 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10908 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10913 ix86_split_ashrdi (rtx *operands, rtx scratch)
10915 rtx low[2], high[2];
10918 if (GET_CODE (operands[2]) == CONST_INT)
10920 split_di (operands, 2, low, high);
10921 count = INTVAL (operands[2]) & 63;
10925 emit_move_insn (high[0], high[1]);
10926 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10927 emit_move_insn (low[0], high[0]);
10930 else if (count >= 32)
10932 emit_move_insn (low[0], high[1]);
10934 if (! reload_completed)
10935 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10938 emit_move_insn (high[0], low[0]);
10939 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10943 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10947 if (!rtx_equal_p (operands[0], operands[1]))
10948 emit_move_insn (operands[0], operands[1]);
10949 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10950 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10955 if (!rtx_equal_p (operands[0], operands[1]))
10956 emit_move_insn (operands[0], operands[1]);
10958 split_di (operands, 1, low, high);
10960 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10961 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10963 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10965 if (! no_new_pseudos)
10966 scratch = gen_reg_rtx (SImode);
10967 emit_move_insn (scratch, high[0]);
10968 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10969 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10973 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10978 ix86_split_lshrdi (rtx *operands, rtx scratch)
10980 rtx low[2], high[2];
10983 if (GET_CODE (operands[2]) == CONST_INT)
10985 split_di (operands, 2, low, high);
10986 count = INTVAL (operands[2]) & 63;
10990 emit_move_insn (low[0], high[1]);
10991 emit_move_insn (high[0], const0_rtx);
10994 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10998 if (!rtx_equal_p (operands[0], operands[1]))
10999 emit_move_insn (operands[0], operands[1]);
11000 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11001 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11006 if (!rtx_equal_p (operands[0], operands[1]))
11007 emit_move_insn (operands[0], operands[1]);
11009 split_di (operands, 1, low, high);
11011 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11012 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11014 /* Heh. By reversing the arguments, we can reuse this pattern. */
11015 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11017 if (! no_new_pseudos)
11018 scratch = force_reg (SImode, const0_rtx);
11020 emit_move_insn (scratch, const0_rtx);
11022 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11026 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11030 /* Helper function for the string operations below. Dest VARIABLE whether
11031 it is aligned to VALUE bytes. If true, jump to the label. */
11033 ix86_expand_aligntest (rtx variable, int value)
11035 rtx label = gen_label_rtx ();
11036 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11037 if (GET_MODE (variable) == DImode)
11038 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11040 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11041 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11046 /* Adjust COUNTER by the VALUE. */
11048 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11050 if (GET_MODE (countreg) == DImode)
11051 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11053 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11056 /* Zero extend possibly SImode EXP to Pmode register. */
11058 ix86_zero_extend_to_Pmode (rtx exp)
11061 if (GET_MODE (exp) == VOIDmode)
11062 return force_reg (Pmode, exp);
11063 if (GET_MODE (exp) == Pmode)
11064 return copy_to_mode_reg (Pmode, exp);
11065 r = gen_reg_rtx (Pmode);
11066 emit_insn (gen_zero_extendsidi2 (r, exp));
11070 /* Expand string move (memcpy) operation. Use i386 string operations when
11071 profitable. expand_clrstr contains similar code. */
11073 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11075 rtx srcreg, destreg, countreg, srcexp, destexp;
11076 enum machine_mode counter_mode;
11077 HOST_WIDE_INT align = 0;
11078 unsigned HOST_WIDE_INT count = 0;
11080 if (GET_CODE (align_exp) == CONST_INT)
11081 align = INTVAL (align_exp);
11083 /* Can't use any of this if the user has appropriated esi or edi. */
11084 if (global_regs[4] || global_regs[5])
11087 /* This simple hack avoids all inlining code and simplifies code below. */
11088 if (!TARGET_ALIGN_STRINGOPS)
11091 if (GET_CODE (count_exp) == CONST_INT)
11093 count = INTVAL (count_exp);
11094 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11098 /* Figure out proper mode for counter. For 32bits it is always SImode,
11099 for 64bits use SImode when possible, otherwise DImode.
11100 Set count to number of bytes copied when known at compile time. */
11101 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11102 || x86_64_zero_extended_value (count_exp))
11103 counter_mode = SImode;
11105 counter_mode = DImode;
11107 if (counter_mode != SImode && counter_mode != DImode)
11110 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11111 if (destreg != XEXP (dst, 0))
11112 dst = replace_equiv_address_nv (dst, destreg);
11113 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11114 if (srcreg != XEXP (src, 0))
11115 src = replace_equiv_address_nv (src, srcreg);
11117 /* When optimizing for size emit simple rep ; movsb instruction for
11118 counts not divisible by 4. */
11120 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11122 emit_insn (gen_cld ());
11123 countreg = ix86_zero_extend_to_Pmode (count_exp);
11124 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11125 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11126 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11130 /* For constant aligned (or small unaligned) copies use rep movsl
11131 followed by code copying the rest. For PentiumPro ensure 8 byte
11132 alignment to allow rep movsl acceleration. */
11134 else if (count != 0
11136 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11137 || optimize_size || count < (unsigned int) 64))
11139 unsigned HOST_WIDE_INT offset = 0;
11140 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11141 rtx srcmem, dstmem;
11143 emit_insn (gen_cld ());
11144 if (count & ~(size - 1))
11146 countreg = copy_to_mode_reg (counter_mode,
11147 GEN_INT ((count >> (size == 4 ? 2 : 3))
11148 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11149 countreg = ix86_zero_extend_to_Pmode (countreg);
11151 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11152 GEN_INT (size == 4 ? 2 : 3));
11153 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11154 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11156 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11157 countreg, destexp, srcexp));
11158 offset = count & ~(size - 1);
11160 if (size == 8 && (count & 0x04))
11162 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11164 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11166 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11171 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11173 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11175 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11180 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11182 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11184 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11187 /* The generic code based on the glibc implementation:
11188 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11189 allowing accelerated copying there)
11190 - copy the data using rep movsl
11191 - copy the rest. */
11196 rtx srcmem, dstmem;
11197 int desired_alignment = (TARGET_PENTIUMPRO
11198 && (count == 0 || count >= (unsigned int) 260)
11199 ? 8 : UNITS_PER_WORD);
11200 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11201 dst = change_address (dst, BLKmode, destreg);
11202 src = change_address (src, BLKmode, srcreg);
11204 /* In case we don't know anything about the alignment, default to
11205 library version, since it is usually equally fast and result in
11208 Also emit call when we know that the count is large and call overhead
11209 will not be important. */
11210 if (!TARGET_INLINE_ALL_STRINGOPS
11211 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11214 if (TARGET_SINGLE_STRINGOP)
11215 emit_insn (gen_cld ());
11217 countreg2 = gen_reg_rtx (Pmode);
11218 countreg = copy_to_mode_reg (counter_mode, count_exp);
11220 /* We don't use loops to align destination and to copy parts smaller
11221 than 4 bytes, because gcc is able to optimize such code better (in
11222 the case the destination or the count really is aligned, gcc is often
11223 able to predict the branches) and also it is friendlier to the
11224 hardware branch prediction.
11226 Using loops is beneficial for generic case, because we can
11227 handle small counts using the loops. Many CPUs (such as Athlon)
11228 have large REP prefix setup costs.
11230 This is quite costly. Maybe we can revisit this decision later or
11231 add some customizability to this code. */
11233 if (count == 0 && align < desired_alignment)
11235 label = gen_label_rtx ();
11236 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11237 LEU, 0, counter_mode, 1, label);
11241 rtx label = ix86_expand_aligntest (destreg, 1);
11242 srcmem = change_address (src, QImode, srcreg);
11243 dstmem = change_address (dst, QImode, destreg);
11244 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11245 ix86_adjust_counter (countreg, 1);
11246 emit_label (label);
11247 LABEL_NUSES (label) = 1;
11251 rtx label = ix86_expand_aligntest (destreg, 2);
11252 srcmem = change_address (src, HImode, srcreg);
11253 dstmem = change_address (dst, HImode, destreg);
11254 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11255 ix86_adjust_counter (countreg, 2);
11256 emit_label (label);
11257 LABEL_NUSES (label) = 1;
11259 if (align <= 4 && desired_alignment > 4)
11261 rtx label = ix86_expand_aligntest (destreg, 4);
11262 srcmem = change_address (src, SImode, srcreg);
11263 dstmem = change_address (dst, SImode, destreg);
11264 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11265 ix86_adjust_counter (countreg, 4);
11266 emit_label (label);
11267 LABEL_NUSES (label) = 1;
11270 if (label && desired_alignment > 4 && !TARGET_64BIT)
11272 emit_label (label);
11273 LABEL_NUSES (label) = 1;
11276 if (!TARGET_SINGLE_STRINGOP)
11277 emit_insn (gen_cld ());
11280 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11282 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11286 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11287 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11289 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11290 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11291 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11292 countreg2, destexp, srcexp));
11296 emit_label (label);
11297 LABEL_NUSES (label) = 1;
11299 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11301 srcmem = change_address (src, SImode, srcreg);
11302 dstmem = change_address (dst, SImode, destreg);
11303 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11305 if ((align <= 4 || count == 0) && TARGET_64BIT)
11307 rtx label = ix86_expand_aligntest (countreg, 4);
11308 srcmem = change_address (src, SImode, srcreg);
11309 dstmem = change_address (dst, SImode, destreg);
11310 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11311 emit_label (label);
11312 LABEL_NUSES (label) = 1;
11314 if (align > 2 && count != 0 && (count & 2))
11316 srcmem = change_address (src, HImode, srcreg);
11317 dstmem = change_address (dst, HImode, destreg);
11318 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11320 if (align <= 2 || count == 0)
11322 rtx label = ix86_expand_aligntest (countreg, 2);
11323 srcmem = change_address (src, HImode, srcreg);
11324 dstmem = change_address (dst, HImode, destreg);
11325 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11326 emit_label (label);
11327 LABEL_NUSES (label) = 1;
11329 if (align > 1 && count != 0 && (count & 1))
11331 srcmem = change_address (src, QImode, srcreg);
11332 dstmem = change_address (dst, QImode, destreg);
11333 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11335 if (align <= 1 || count == 0)
11337 rtx label = ix86_expand_aligntest (countreg, 1);
11338 srcmem = change_address (src, QImode, srcreg);
11339 dstmem = change_address (dst, QImode, destreg);
11340 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11341 emit_label (label);
11342 LABEL_NUSES (label) = 1;
11349 /* Expand string clear operation (bzero). Use i386 string operations when
11350 profitable. expand_movstr contains similar code. */
11352 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11354 rtx destreg, zeroreg, countreg, destexp;
11355 enum machine_mode counter_mode;
11356 HOST_WIDE_INT align = 0;
11357 unsigned HOST_WIDE_INT count = 0;
11359 if (GET_CODE (align_exp) == CONST_INT)
11360 align = INTVAL (align_exp);
11362 /* Can't use any of this if the user has appropriated esi. */
11363 if (global_regs[4])
11366 /* This simple hack avoids all inlining code and simplifies code below. */
11367 if (!TARGET_ALIGN_STRINGOPS)
11370 if (GET_CODE (count_exp) == CONST_INT)
11372 count = INTVAL (count_exp);
11373 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11376 /* Figure out proper mode for counter. For 32bits it is always SImode,
11377 for 64bits use SImode when possible, otherwise DImode.
11378 Set count to number of bytes copied when known at compile time. */
11379 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11380 || x86_64_zero_extended_value (count_exp))
11381 counter_mode = SImode;
11383 counter_mode = DImode;
11385 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11386 if (destreg != XEXP (dst, 0))
11387 dst = replace_equiv_address_nv (dst, destreg);
11389 emit_insn (gen_cld ());
11391 /* When optimizing for size emit simple rep ; movsb instruction for
11392 counts not divisible by 4. */
11394 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11396 countreg = ix86_zero_extend_to_Pmode (count_exp);
11397 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11398 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11399 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11401 else if (count != 0
11403 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11404 || optimize_size || count < (unsigned int) 64))
11406 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11407 unsigned HOST_WIDE_INT offset = 0;
11409 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11410 if (count & ~(size - 1))
11412 countreg = copy_to_mode_reg (counter_mode,
11413 GEN_INT ((count >> (size == 4 ? 2 : 3))
11414 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11415 countreg = ix86_zero_extend_to_Pmode (countreg);
11416 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11417 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11418 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11419 offset = count & ~(size - 1);
11421 if (size == 8 && (count & 0x04))
11423 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11425 emit_insn (gen_strset (destreg, mem,
11426 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11431 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11433 emit_insn (gen_strset (destreg, mem,
11434 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11439 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11441 emit_insn (gen_strset (destreg, mem,
11442 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11449 /* Compute desired alignment of the string operation. */
11450 int desired_alignment = (TARGET_PENTIUMPRO
11451 && (count == 0 || count >= (unsigned int) 260)
11452 ? 8 : UNITS_PER_WORD);
11454 /* In case we don't know anything about the alignment, default to
11455 library version, since it is usually equally fast and result in
11458 Also emit call when we know that the count is large and call overhead
11459 will not be important. */
11460 if (!TARGET_INLINE_ALL_STRINGOPS
11461 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11464 if (TARGET_SINGLE_STRINGOP)
11465 emit_insn (gen_cld ());
11467 countreg2 = gen_reg_rtx (Pmode);
11468 countreg = copy_to_mode_reg (counter_mode, count_exp);
11469 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11470 /* Get rid of MEM_OFFSET, it won't be accurate. */
11471 dst = change_address (dst, BLKmode, destreg);
11473 if (count == 0 && align < desired_alignment)
11475 label = gen_label_rtx ();
11476 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11477 LEU, 0, counter_mode, 1, label);
11481 rtx label = ix86_expand_aligntest (destreg, 1);
11482 emit_insn (gen_strset (destreg, dst,
11483 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11484 ix86_adjust_counter (countreg, 1);
11485 emit_label (label);
11486 LABEL_NUSES (label) = 1;
11490 rtx label = ix86_expand_aligntest (destreg, 2);
11491 emit_insn (gen_strset (destreg, dst,
11492 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11493 ix86_adjust_counter (countreg, 2);
11494 emit_label (label);
11495 LABEL_NUSES (label) = 1;
11497 if (align <= 4 && desired_alignment > 4)
11499 rtx label = ix86_expand_aligntest (destreg, 4);
11500 emit_insn (gen_strset (destreg, dst,
11502 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11504 ix86_adjust_counter (countreg, 4);
11505 emit_label (label);
11506 LABEL_NUSES (label) = 1;
11509 if (label && desired_alignment > 4 && !TARGET_64BIT)
11511 emit_label (label);
11512 LABEL_NUSES (label) = 1;
11516 if (!TARGET_SINGLE_STRINGOP)
11517 emit_insn (gen_cld ());
11520 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11522 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11526 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11527 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11529 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11530 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11534 emit_label (label);
11535 LABEL_NUSES (label) = 1;
11538 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11539 emit_insn (gen_strset (destreg, dst,
11540 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11541 if (TARGET_64BIT && (align <= 4 || count == 0))
11543 rtx label = ix86_expand_aligntest (countreg, 4);
11544 emit_insn (gen_strset (destreg, dst,
11545 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11546 emit_label (label);
11547 LABEL_NUSES (label) = 1;
11549 if (align > 2 && count != 0 && (count & 2))
11550 emit_insn (gen_strset (destreg, dst,
11551 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11552 if (align <= 2 || count == 0)
11554 rtx label = ix86_expand_aligntest (countreg, 2);
11555 emit_insn (gen_strset (destreg, dst,
11556 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11557 emit_label (label);
11558 LABEL_NUSES (label) = 1;
11560 if (align > 1 && count != 0 && (count & 1))
11561 emit_insn (gen_strset (destreg, dst,
11562 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11563 if (align <= 1 || count == 0)
11565 rtx label = ix86_expand_aligntest (countreg, 1);
11566 emit_insn (gen_strset (destreg, dst,
11567 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11568 emit_label (label);
11569 LABEL_NUSES (label) = 1;
11575 /* Expand strlen. */
11577 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11579 rtx addr, scratch1, scratch2, scratch3, scratch4;
11581 /* The generic case of strlen expander is long. Avoid it's
11582 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11584 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11585 && !TARGET_INLINE_ALL_STRINGOPS
11587 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11590 addr = force_reg (Pmode, XEXP (src, 0));
11591 scratch1 = gen_reg_rtx (Pmode);
11593 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11596 /* Well it seems that some optimizer does not combine a call like
11597 foo(strlen(bar), strlen(bar));
11598 when the move and the subtraction is done here. It does calculate
11599 the length just once when these instructions are done inside of
11600 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11601 often used and I use one fewer register for the lifetime of
11602 output_strlen_unroll() this is better. */
11604 emit_move_insn (out, addr);
11606 ix86_expand_strlensi_unroll_1 (out, src, align);
11608 /* strlensi_unroll_1 returns the address of the zero at the end of
11609 the string, like memchr(), so compute the length by subtracting
11610 the start address. */
11612 emit_insn (gen_subdi3 (out, out, addr));
11614 emit_insn (gen_subsi3 (out, out, addr));
11619 scratch2 = gen_reg_rtx (Pmode);
11620 scratch3 = gen_reg_rtx (Pmode);
11621 scratch4 = force_reg (Pmode, constm1_rtx);
11623 emit_move_insn (scratch3, addr);
11624 eoschar = force_reg (QImode, eoschar);
11626 emit_insn (gen_cld ());
11627 src = replace_equiv_address_nv (src, scratch3);
11629 /* If .md starts supporting :P, this can be done in .md. */
11630 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11631 scratch4), UNSPEC_SCAS);
11632 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11635 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11636 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11640 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11641 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11647 /* Expand the appropriate insns for doing strlen if not just doing
11650 out = result, initialized with the start address
11651 align_rtx = alignment of the address.
11652 scratch = scratch register, initialized with the startaddress when
11653 not aligned, otherwise undefined
11655 This is just the body. It needs the initializations mentioned above and
11656 some address computing at the end. These things are done in i386.md. */
11659 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11663 rtx align_2_label = NULL_RTX;
11664 rtx align_3_label = NULL_RTX;
11665 rtx align_4_label = gen_label_rtx ();
11666 rtx end_0_label = gen_label_rtx ();
11668 rtx tmpreg = gen_reg_rtx (SImode);
11669 rtx scratch = gen_reg_rtx (SImode);
11673 if (GET_CODE (align_rtx) == CONST_INT)
11674 align = INTVAL (align_rtx);
11676 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11678 /* Is there a known alignment and is it less than 4? */
11681 rtx scratch1 = gen_reg_rtx (Pmode);
11682 emit_move_insn (scratch1, out);
11683 /* Is there a known alignment and is it not 2? */
11686 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11687 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11689 /* Leave just the 3 lower bits. */
11690 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11691 NULL_RTX, 0, OPTAB_WIDEN);
11693 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11694 Pmode, 1, align_4_label);
11695 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11696 Pmode, 1, align_2_label);
11697 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11698 Pmode, 1, align_3_label);
11702 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11703 check if is aligned to 4 - byte. */
11705 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11706 NULL_RTX, 0, OPTAB_WIDEN);
11708 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11709 Pmode, 1, align_4_label);
11712 mem = change_address (src, QImode, out);
11714 /* Now compare the bytes. */
11716 /* Compare the first n unaligned byte on a byte per byte basis. */
11717 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11718 QImode, 1, end_0_label);
11720 /* Increment the address. */
11722 emit_insn (gen_adddi3 (out, out, const1_rtx));
11724 emit_insn (gen_addsi3 (out, out, const1_rtx));
11726 /* Not needed with an alignment of 2 */
11729 emit_label (align_2_label);
11731 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11735 emit_insn (gen_adddi3 (out, out, const1_rtx));
11737 emit_insn (gen_addsi3 (out, out, const1_rtx));
11739 emit_label (align_3_label);
11742 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11746 emit_insn (gen_adddi3 (out, out, const1_rtx));
11748 emit_insn (gen_addsi3 (out, out, const1_rtx));
11751 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11752 align this loop. It gives only huge programs, but does not help to
11754 emit_label (align_4_label);
11756 mem = change_address (src, SImode, out);
11757 emit_move_insn (scratch, mem);
11759 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11761 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11763 /* This formula yields a nonzero result iff one of the bytes is zero.
11764 This saves three branches inside loop and many cycles. */
11766 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11767 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11768 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11769 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11770 gen_int_mode (0x80808080, SImode)));
11771 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11776 rtx reg = gen_reg_rtx (SImode);
11777 rtx reg2 = gen_reg_rtx (Pmode);
11778 emit_move_insn (reg, tmpreg);
11779 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11781 /* If zero is not in the first two bytes, move two bytes forward. */
11782 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11783 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11784 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11785 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11786 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11789 /* Emit lea manually to avoid clobbering of flags. */
11790 emit_insn (gen_rtx_SET (SImode, reg2,
11791 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11793 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11794 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11795 emit_insn (gen_rtx_SET (VOIDmode, out,
11796 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11803 rtx end_2_label = gen_label_rtx ();
11804 /* Is zero in the first two bytes? */
11806 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11807 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11808 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11809 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11810 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11812 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11813 JUMP_LABEL (tmp) = end_2_label;
11815 /* Not in the first two. Move two bytes forward. */
11816 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11818 emit_insn (gen_adddi3 (out, out, const2_rtx));
11820 emit_insn (gen_addsi3 (out, out, const2_rtx));
11822 emit_label (end_2_label);
11826 /* Avoid branch in fixing the byte. */
11827 tmpreg = gen_lowpart (QImode, tmpreg);
11828 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11829 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11831 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11833 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11835 emit_label (end_0_label);
11839 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11840 rtx callarg2 ATTRIBUTE_UNUSED,
11841 rtx pop, int sibcall)
11843 rtx use = NULL, call;
11845 if (pop == const0_rtx)
11847 if (TARGET_64BIT && pop)
11851 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11852 fnaddr = machopic_indirect_call_target (fnaddr);
11854 /* Static functions and indirect calls don't need the pic register. */
11855 if (! TARGET_64BIT && flag_pic
11856 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11857 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11858 use_reg (&use, pic_offset_table_rtx);
11860 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11862 rtx al = gen_rtx_REG (QImode, 0);
11863 emit_move_insn (al, callarg2);
11864 use_reg (&use, al);
11866 #endif /* TARGET_MACHO */
11868 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11870 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11871 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11873 if (sibcall && TARGET_64BIT
11874 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11877 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11878 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11879 emit_move_insn (fnaddr, addr);
11880 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11883 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11885 call = gen_rtx_SET (VOIDmode, retval, call);
11888 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11889 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11890 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11893 call = emit_call_insn (call);
11895 CALL_INSN_FUNCTION_USAGE (call) = use;
11899 /* Clear stack slot assignments remembered from previous functions.
11900 This is called from INIT_EXPANDERS once before RTL is emitted for each
11903 static struct machine_function *
11904 ix86_init_machine_status (void)
11906 struct machine_function *f;
11908 f = ggc_alloc_cleared (sizeof (struct machine_function));
11909 f->use_fast_prologue_epilogue_nregs = -1;
11914 /* Return a MEM corresponding to a stack slot with mode MODE.
11915 Allocate a new slot if necessary.
11917 The RTL for a function can have several slots available: N is
11918 which slot to use. */
11921 assign_386_stack_local (enum machine_mode mode, int n)
11923 struct stack_local_entry *s;
11925 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11928 for (s = ix86_stack_locals; s; s = s->next)
11929 if (s->mode == mode && s->n == n)
11932 s = (struct stack_local_entry *)
11933 ggc_alloc (sizeof (struct stack_local_entry));
11936 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11938 s->next = ix86_stack_locals;
11939 ix86_stack_locals = s;
11943 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11945 static GTY(()) rtx ix86_tls_symbol;
11947 ix86_tls_get_addr (void)
11950 if (!ix86_tls_symbol)
11952 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11953 (TARGET_GNU_TLS && !TARGET_64BIT)
11954 ? "___tls_get_addr"
11955 : "__tls_get_addr");
11958 return ix86_tls_symbol;
11961 /* Calculate the length of the memory address in the instruction
11962 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11965 memory_address_length (rtx addr)
11967 struct ix86_address parts;
11968 rtx base, index, disp;
11971 if (GET_CODE (addr) == PRE_DEC
11972 || GET_CODE (addr) == POST_INC
11973 || GET_CODE (addr) == PRE_MODIFY
11974 || GET_CODE (addr) == POST_MODIFY)
11977 if (! ix86_decompose_address (addr, &parts))
11981 index = parts.index;
11986 - esp as the base always wants an index,
11987 - ebp as the base always wants a displacement. */
11989 /* Register Indirect. */
11990 if (base && !index && !disp)
11992 /* esp (for its index) and ebp (for its displacement) need
11993 the two-byte modrm form. */
11994 if (addr == stack_pointer_rtx
11995 || addr == arg_pointer_rtx
11996 || addr == frame_pointer_rtx
11997 || addr == hard_frame_pointer_rtx)
12001 /* Direct Addressing. */
12002 else if (disp && !base && !index)
12007 /* Find the length of the displacement constant. */
12010 if (GET_CODE (disp) == CONST_INT
12011 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12017 /* ebp always wants a displacement. */
12018 else if (base == hard_frame_pointer_rtx)
12021 /* An index requires the two-byte modrm form.... */
12023 /* ...like esp, which always wants an index. */
12024 || base == stack_pointer_rtx
12025 || base == arg_pointer_rtx
12026 || base == frame_pointer_rtx)
12033 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12034 is set, expect that insn have 8bit immediate alternative. */
12036 ix86_attr_length_immediate_default (rtx insn, int shortform)
12040 extract_insn_cached (insn);
12041 for (i = recog_data.n_operands - 1; i >= 0; --i)
12042 if (CONSTANT_P (recog_data.operand[i]))
12047 && GET_CODE (recog_data.operand[i]) == CONST_INT
12048 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12052 switch (get_attr_mode (insn))
12063 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12068 fatal_insn ("unknown insn mode", insn);
12074 /* Compute default value for "length_address" attribute. */
12076 ix86_attr_length_address_default (rtx insn)
12080 if (get_attr_type (insn) == TYPE_LEA)
12082 rtx set = PATTERN (insn);
12083 if (GET_CODE (set) == SET)
12085 else if (GET_CODE (set) == PARALLEL
12086 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12087 set = XVECEXP (set, 0, 0);
12090 #ifdef ENABLE_CHECKING
12096 return memory_address_length (SET_SRC (set));
12099 extract_insn_cached (insn);
12100 for (i = recog_data.n_operands - 1; i >= 0; --i)
12101 if (GET_CODE (recog_data.operand[i]) == MEM)
12103 return memory_address_length (XEXP (recog_data.operand[i], 0));
12109 /* Return the maximum number of instructions a cpu can issue. */
12112 ix86_issue_rate (void)
12116 case PROCESSOR_PENTIUM:
12120 case PROCESSOR_PENTIUMPRO:
12121 case PROCESSOR_PENTIUM4:
12122 case PROCESSOR_ATHLON:
12124 case PROCESSOR_NOCONA:
12132 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12133 by DEP_INSN and nothing set by DEP_INSN. */
12136 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12140 /* Simplify the test for uninteresting insns. */
12141 if (insn_type != TYPE_SETCC
12142 && insn_type != TYPE_ICMOV
12143 && insn_type != TYPE_FCMOV
12144 && insn_type != TYPE_IBR)
12147 if ((set = single_set (dep_insn)) != 0)
12149 set = SET_DEST (set);
12152 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12153 && XVECLEN (PATTERN (dep_insn), 0) == 2
12154 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12155 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12157 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12158 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12163 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12166 /* This test is true if the dependent insn reads the flags but
12167 not any other potentially set register. */
12168 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12171 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12177 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12178 address with operands set by DEP_INSN. */
12181 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12185 if (insn_type == TYPE_LEA
12188 addr = PATTERN (insn);
12189 if (GET_CODE (addr) == SET)
12191 else if (GET_CODE (addr) == PARALLEL
12192 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12193 addr = XVECEXP (addr, 0, 0);
12196 addr = SET_SRC (addr);
12201 extract_insn_cached (insn);
12202 for (i = recog_data.n_operands - 1; i >= 0; --i)
12203 if (GET_CODE (recog_data.operand[i]) == MEM)
12205 addr = XEXP (recog_data.operand[i], 0);
12212 return modified_in_p (addr, dep_insn);
12216 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12218 enum attr_type insn_type, dep_insn_type;
12219 enum attr_memory memory, dep_memory;
12221 int dep_insn_code_number;
12223 /* Anti and output dependencies have zero cost on all CPUs. */
12224 if (REG_NOTE_KIND (link) != 0)
12227 dep_insn_code_number = recog_memoized (dep_insn);
12229 /* If we can't recognize the insns, we can't really do anything. */
12230 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12233 insn_type = get_attr_type (insn);
12234 dep_insn_type = get_attr_type (dep_insn);
12238 case PROCESSOR_PENTIUM:
12239 /* Address Generation Interlock adds a cycle of latency. */
12240 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12243 /* ??? Compares pair with jump/setcc. */
12244 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12247 /* Floating point stores require value to be ready one cycle earlier. */
12248 if (insn_type == TYPE_FMOV
12249 && get_attr_memory (insn) == MEMORY_STORE
12250 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12254 case PROCESSOR_PENTIUMPRO:
12255 memory = get_attr_memory (insn);
12256 dep_memory = get_attr_memory (dep_insn);
12258 /* Since we can't represent delayed latencies of load+operation,
12259 increase the cost here for non-imov insns. */
12260 if (dep_insn_type != TYPE_IMOV
12261 && dep_insn_type != TYPE_FMOV
12262 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12265 /* INT->FP conversion is expensive. */
12266 if (get_attr_fp_int_src (dep_insn))
12269 /* There is one cycle extra latency between an FP op and a store. */
12270 if (insn_type == TYPE_FMOV
12271 && (set = single_set (dep_insn)) != NULL_RTX
12272 && (set2 = single_set (insn)) != NULL_RTX
12273 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12274 && GET_CODE (SET_DEST (set2)) == MEM)
12277 /* Show ability of reorder buffer to hide latency of load by executing
12278 in parallel with previous instruction in case
12279 previous instruction is not needed to compute the address. */
12280 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12281 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12283 /* Claim moves to take one cycle, as core can issue one load
12284 at time and the next load can start cycle later. */
12285 if (dep_insn_type == TYPE_IMOV
12286 || dep_insn_type == TYPE_FMOV)
12294 memory = get_attr_memory (insn);
12295 dep_memory = get_attr_memory (dep_insn);
12296 /* The esp dependency is resolved before the instruction is really
12298 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12299 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12302 /* Since we can't represent delayed latencies of load+operation,
12303 increase the cost here for non-imov insns. */
12304 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12305 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12307 /* INT->FP conversion is expensive. */
12308 if (get_attr_fp_int_src (dep_insn))
12311 /* Show ability of reorder buffer to hide latency of load by executing
12312 in parallel with previous instruction in case
12313 previous instruction is not needed to compute the address. */
12314 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12315 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12317 /* Claim moves to take one cycle, as core can issue one load
12318 at time and the next load can start cycle later. */
12319 if (dep_insn_type == TYPE_IMOV
12320 || dep_insn_type == TYPE_FMOV)
12329 case PROCESSOR_ATHLON:
12331 memory = get_attr_memory (insn);
12332 dep_memory = get_attr_memory (dep_insn);
12334 /* Show ability of reorder buffer to hide latency of load by executing
12335 in parallel with previous instruction in case
12336 previous instruction is not needed to compute the address. */
12337 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12338 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12340 enum attr_unit unit = get_attr_unit (insn);
12343 /* Because of the difference between the length of integer and
12344 floating unit pipeline preparation stages, the memory operands
12345 for floating point are cheaper.
12347 ??? For Athlon it the difference is most probably 2. */
12348 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12351 loadcost = TARGET_ATHLON ? 2 : 0;
12353 if (cost >= loadcost)
12367 ia32_use_dfa_pipeline_interface (void)
12370 || TARGET_PENTIUMPRO
12371 || TARGET_ATHLON_K8)
12376 /* How many alternative schedules to try. This should be as wide as the
12377 scheduling freedom in the DFA, but no wider. Making this value too
12378 large results extra work for the scheduler. */
12381 ia32_multipass_dfa_lookahead (void)
12383 if (ix86_tune == PROCESSOR_PENTIUM)
12386 if (ix86_tune == PROCESSOR_PENTIUMPRO)
12394 /* Compute the alignment given to a constant that is being placed in memory.
12395 EXP is the constant and ALIGN is the alignment that the object would
12397 The value of this function is used instead of that alignment to align
12401 ix86_constant_alignment (tree exp, int align)
12403 if (TREE_CODE (exp) == REAL_CST)
12405 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12407 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12410 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12411 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12412 return BITS_PER_WORD;
12417 /* Compute the alignment for a static variable.
12418 TYPE is the data type, and ALIGN is the alignment that
12419 the object would ordinarily have. The value of this function is used
12420 instead of that alignment to align the object. */
12423 ix86_data_alignment (tree type, int align)
12425 if (AGGREGATE_TYPE_P (type)
12426 && TYPE_SIZE (type)
12427 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12428 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12429 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12432 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12433 to 16byte boundary. */
12436 if (AGGREGATE_TYPE_P (type)
12437 && TYPE_SIZE (type)
12438 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12439 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12440 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12444 if (TREE_CODE (type) == ARRAY_TYPE)
12446 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12448 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12451 else if (TREE_CODE (type) == COMPLEX_TYPE)
12454 if (TYPE_MODE (type) == DCmode && align < 64)
12456 if (TYPE_MODE (type) == XCmode && align < 128)
12459 else if ((TREE_CODE (type) == RECORD_TYPE
12460 || TREE_CODE (type) == UNION_TYPE
12461 || TREE_CODE (type) == QUAL_UNION_TYPE)
12462 && TYPE_FIELDS (type))
12464 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12466 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12469 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12470 || TREE_CODE (type) == INTEGER_TYPE)
12472 if (TYPE_MODE (type) == DFmode && align < 64)
12474 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12481 /* Compute the alignment for a local variable.
12482 TYPE is the data type, and ALIGN is the alignment that
12483 the object would ordinarily have. The value of this macro is used
12484 instead of that alignment to align the object. */
12487 ix86_local_alignment (tree type, int align)
12489 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12490 to 16byte boundary. */
12493 if (AGGREGATE_TYPE_P (type)
12494 && TYPE_SIZE (type)
12495 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12496 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12497 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12500 if (TREE_CODE (type) == ARRAY_TYPE)
12502 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12504 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12507 else if (TREE_CODE (type) == COMPLEX_TYPE)
12509 if (TYPE_MODE (type) == DCmode && align < 64)
12511 if (TYPE_MODE (type) == XCmode && align < 128)
12514 else if ((TREE_CODE (type) == RECORD_TYPE
12515 || TREE_CODE (type) == UNION_TYPE
12516 || TREE_CODE (type) == QUAL_UNION_TYPE)
12517 && TYPE_FIELDS (type))
12519 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12521 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12524 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12525 || TREE_CODE (type) == INTEGER_TYPE)
12528 if (TYPE_MODE (type) == DFmode && align < 64)
12530 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12536 /* Emit RTL insns to initialize the variable parts of a trampoline.
12537 FNADDR is an RTX for the address of the function's pure code.
12538 CXT is an RTX for the static chain value for the function. */
12540 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12544 /* Compute offset from the end of the jmp to the target function. */
12545 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12546 plus_constant (tramp, 10),
12547 NULL_RTX, 1, OPTAB_DIRECT);
12548 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12549 gen_int_mode (0xb9, QImode));
12550 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12551 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12552 gen_int_mode (0xe9, QImode));
12553 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12558 /* Try to load address using shorter movl instead of movabs.
12559 We may want to support movq for kernel mode, but kernel does not use
12560 trampolines at the moment. */
12561 if (x86_64_zero_extended_value (fnaddr))
12563 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12564 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12565 gen_int_mode (0xbb41, HImode));
12566 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12567 gen_lowpart (SImode, fnaddr));
12572 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12573 gen_int_mode (0xbb49, HImode));
12574 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12578 /* Load static chain using movabs to r10. */
12579 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12580 gen_int_mode (0xba49, HImode));
12581 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12584 /* Jump to the r11 */
12585 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12586 gen_int_mode (0xff49, HImode));
12587 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12588 gen_int_mode (0xe3, QImode));
12590 if (offset > TRAMPOLINE_SIZE)
12594 #ifdef TRANSFER_FROM_TRAMPOLINE
12595 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12596 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12600 #define def_builtin(MASK, NAME, TYPE, CODE) \
12602 if ((MASK) & target_flags \
12603 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12604 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12605 NULL, NULL_TREE); \
12608 struct builtin_description
12610 const unsigned int mask;
12611 const enum insn_code icode;
12612 const char *const name;
12613 const enum ix86_builtins code;
12614 const enum rtx_code comparison;
12615 const unsigned int flag;
12618 static const struct builtin_description bdesc_comi[] =
12620 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12621 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12622 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12623 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12624 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12625 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12626 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12627 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12628 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12629 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12630 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12631 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12632 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12633 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12634 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12635 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12636 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12637 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12638 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12639 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12640 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12641 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12642 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12643 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12646 static const struct builtin_description bdesc_2arg[] =
12649 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12650 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12651 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12652 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12653 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12654 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12655 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12656 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12658 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12659 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12660 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12661 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12662 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12663 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12664 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12665 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12666 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12667 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12668 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12669 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12670 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12671 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12672 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12673 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12674 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12675 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12676 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12677 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12679 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12680 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12681 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12682 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12684 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12685 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12686 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12687 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12689 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12690 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12693 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12696 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12697 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12698 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12699 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12700 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12701 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12702 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12703 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12705 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12706 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12707 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12708 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12709 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12710 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12711 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12712 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12714 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12715 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12716 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12718 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12719 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12720 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12721 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12723 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12724 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12726 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12727 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12728 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12729 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12730 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12731 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12733 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12734 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12735 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12736 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12738 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12739 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12740 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12741 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12742 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12743 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12746 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12747 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12748 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12750 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12751 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12752 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12754 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12755 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12756 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12757 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12758 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12761 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12762 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12763 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12764 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12765 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12766 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12768 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12769 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12770 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12771 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12773 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12774 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12777 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12786 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12787 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12788 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12789 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12790 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12791 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12792 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12793 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12794 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12795 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12796 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12797 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12798 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12799 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12800 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12801 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12802 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12803 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12804 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12805 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12807 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12812 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12831 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12832 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12833 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12834 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12835 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12836 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12837 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12838 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12840 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12841 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12842 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12843 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12845 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12846 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12850 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12853 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12855 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12860 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12865 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12888 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12903 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12908 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12909 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12910 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12911 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12912 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12913 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12916 static const struct builtin_description bdesc_1arg[] =
12918 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12919 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12921 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12922 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12923 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12925 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12926 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12927 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12928 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12929 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12930 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12952 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12953 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12962 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12963 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12964 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12968 ix86_init_builtins (void)
12971 ix86_init_mmx_sse_builtins ();
12974 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12975 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12978 ix86_init_mmx_sse_builtins (void)
12980 const struct builtin_description * d;
12983 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12984 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12985 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12986 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12987 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12988 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12989 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12990 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12991 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12992 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12994 tree pchar_type_node = build_pointer_type (char_type_node);
12995 tree pcchar_type_node = build_pointer_type (
12996 build_type_variant (char_type_node, 1, 0));
12997 tree pfloat_type_node = build_pointer_type (float_type_node);
12998 tree pcfloat_type_node = build_pointer_type (
12999 build_type_variant (float_type_node, 1, 0));
13000 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13001 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13002 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13005 tree int_ftype_v4sf_v4sf
13006 = build_function_type_list (integer_type_node,
13007 V4SF_type_node, V4SF_type_node, NULL_TREE);
13008 tree v4si_ftype_v4sf_v4sf
13009 = build_function_type_list (V4SI_type_node,
13010 V4SF_type_node, V4SF_type_node, NULL_TREE);
13011 /* MMX/SSE/integer conversions. */
13012 tree int_ftype_v4sf
13013 = build_function_type_list (integer_type_node,
13014 V4SF_type_node, NULL_TREE);
13015 tree int64_ftype_v4sf
13016 = build_function_type_list (long_long_integer_type_node,
13017 V4SF_type_node, NULL_TREE);
13018 tree int_ftype_v8qi
13019 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13020 tree v4sf_ftype_v4sf_int
13021 = build_function_type_list (V4SF_type_node,
13022 V4SF_type_node, integer_type_node, NULL_TREE);
13023 tree v4sf_ftype_v4sf_int64
13024 = build_function_type_list (V4SF_type_node,
13025 V4SF_type_node, long_long_integer_type_node,
13027 tree v4sf_ftype_v4sf_v2si
13028 = build_function_type_list (V4SF_type_node,
13029 V4SF_type_node, V2SI_type_node, NULL_TREE);
13030 tree int_ftype_v4hi_int
13031 = build_function_type_list (integer_type_node,
13032 V4HI_type_node, integer_type_node, NULL_TREE);
13033 tree v4hi_ftype_v4hi_int_int
13034 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13035 integer_type_node, integer_type_node,
13037 /* Miscellaneous. */
13038 tree v8qi_ftype_v4hi_v4hi
13039 = build_function_type_list (V8QI_type_node,
13040 V4HI_type_node, V4HI_type_node, NULL_TREE);
13041 tree v4hi_ftype_v2si_v2si
13042 = build_function_type_list (V4HI_type_node,
13043 V2SI_type_node, V2SI_type_node, NULL_TREE);
13044 tree v4sf_ftype_v4sf_v4sf_int
13045 = build_function_type_list (V4SF_type_node,
13046 V4SF_type_node, V4SF_type_node,
13047 integer_type_node, NULL_TREE);
13048 tree v2si_ftype_v4hi_v4hi
13049 = build_function_type_list (V2SI_type_node,
13050 V4HI_type_node, V4HI_type_node, NULL_TREE);
13051 tree v4hi_ftype_v4hi_int
13052 = build_function_type_list (V4HI_type_node,
13053 V4HI_type_node, integer_type_node, NULL_TREE);
13054 tree v4hi_ftype_v4hi_di
13055 = build_function_type_list (V4HI_type_node,
13056 V4HI_type_node, long_long_unsigned_type_node,
13058 tree v2si_ftype_v2si_di
13059 = build_function_type_list (V2SI_type_node,
13060 V2SI_type_node, long_long_unsigned_type_node,
13062 tree void_ftype_void
13063 = build_function_type (void_type_node, void_list_node);
13064 tree void_ftype_unsigned
13065 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13066 tree void_ftype_unsigned_unsigned
13067 = build_function_type_list (void_type_node, unsigned_type_node,
13068 unsigned_type_node, NULL_TREE);
13069 tree void_ftype_pcvoid_unsigned_unsigned
13070 = build_function_type_list (void_type_node, const_ptr_type_node,
13071 unsigned_type_node, unsigned_type_node,
13073 tree unsigned_ftype_void
13074 = build_function_type (unsigned_type_node, void_list_node);
13076 = build_function_type (long_long_unsigned_type_node, void_list_node);
13077 tree v4sf_ftype_void
13078 = build_function_type (V4SF_type_node, void_list_node);
13079 tree v2si_ftype_v4sf
13080 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13081 /* Loads/stores. */
13082 tree void_ftype_v8qi_v8qi_pchar
13083 = build_function_type_list (void_type_node,
13084 V8QI_type_node, V8QI_type_node,
13085 pchar_type_node, NULL_TREE);
13086 tree v4sf_ftype_pcfloat
13087 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13088 /* @@@ the type is bogus */
13089 tree v4sf_ftype_v4sf_pv2si
13090 = build_function_type_list (V4SF_type_node,
13091 V4SF_type_node, pv2si_type_node, NULL_TREE);
13092 tree void_ftype_pv2si_v4sf
13093 = build_function_type_list (void_type_node,
13094 pv2si_type_node, V4SF_type_node, NULL_TREE);
13095 tree void_ftype_pfloat_v4sf
13096 = build_function_type_list (void_type_node,
13097 pfloat_type_node, V4SF_type_node, NULL_TREE);
13098 tree void_ftype_pdi_di
13099 = build_function_type_list (void_type_node,
13100 pdi_type_node, long_long_unsigned_type_node,
13102 tree void_ftype_pv2di_v2di
13103 = build_function_type_list (void_type_node,
13104 pv2di_type_node, V2DI_type_node, NULL_TREE);
13105 /* Normal vector unops. */
13106 tree v4sf_ftype_v4sf
13107 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13109 /* Normal vector binops. */
13110 tree v4sf_ftype_v4sf_v4sf
13111 = build_function_type_list (V4SF_type_node,
13112 V4SF_type_node, V4SF_type_node, NULL_TREE);
13113 tree v8qi_ftype_v8qi_v8qi
13114 = build_function_type_list (V8QI_type_node,
13115 V8QI_type_node, V8QI_type_node, NULL_TREE);
13116 tree v4hi_ftype_v4hi_v4hi
13117 = build_function_type_list (V4HI_type_node,
13118 V4HI_type_node, V4HI_type_node, NULL_TREE);
13119 tree v2si_ftype_v2si_v2si
13120 = build_function_type_list (V2SI_type_node,
13121 V2SI_type_node, V2SI_type_node, NULL_TREE);
13122 tree di_ftype_di_di
13123 = build_function_type_list (long_long_unsigned_type_node,
13124 long_long_unsigned_type_node,
13125 long_long_unsigned_type_node, NULL_TREE);
13127 tree v2si_ftype_v2sf
13128 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13129 tree v2sf_ftype_v2si
13130 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13131 tree v2si_ftype_v2si
13132 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13133 tree v2sf_ftype_v2sf
13134 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13135 tree v2sf_ftype_v2sf_v2sf
13136 = build_function_type_list (V2SF_type_node,
13137 V2SF_type_node, V2SF_type_node, NULL_TREE);
13138 tree v2si_ftype_v2sf_v2sf
13139 = build_function_type_list (V2SI_type_node,
13140 V2SF_type_node, V2SF_type_node, NULL_TREE);
13141 tree pint_type_node = build_pointer_type (integer_type_node);
13142 tree pcint_type_node = build_pointer_type (
13143 build_type_variant (integer_type_node, 1, 0));
13144 tree pdouble_type_node = build_pointer_type (double_type_node);
13145 tree pcdouble_type_node = build_pointer_type (
13146 build_type_variant (double_type_node, 1, 0));
13147 tree int_ftype_v2df_v2df
13148 = build_function_type_list (integer_type_node,
13149 V2DF_type_node, V2DF_type_node, NULL_TREE);
13152 = build_function_type (intTI_type_node, void_list_node);
13153 tree v2di_ftype_void
13154 = build_function_type (V2DI_type_node, void_list_node);
13155 tree ti_ftype_ti_ti
13156 = build_function_type_list (intTI_type_node,
13157 intTI_type_node, intTI_type_node, NULL_TREE);
13158 tree void_ftype_pcvoid
13159 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13161 = build_function_type_list (V2DI_type_node,
13162 long_long_unsigned_type_node, NULL_TREE);
13164 = build_function_type_list (long_long_unsigned_type_node,
13165 V2DI_type_node, NULL_TREE);
13166 tree v4sf_ftype_v4si
13167 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13168 tree v4si_ftype_v4sf
13169 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13170 tree v2df_ftype_v4si
13171 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13172 tree v4si_ftype_v2df
13173 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13174 tree v2si_ftype_v2df
13175 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13176 tree v4sf_ftype_v2df
13177 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13178 tree v2df_ftype_v2si
13179 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13180 tree v2df_ftype_v4sf
13181 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13182 tree int_ftype_v2df
13183 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13184 tree int64_ftype_v2df
13185 = build_function_type_list (long_long_integer_type_node,
13186 V2DF_type_node, NULL_TREE);
13187 tree v2df_ftype_v2df_int
13188 = build_function_type_list (V2DF_type_node,
13189 V2DF_type_node, integer_type_node, NULL_TREE);
13190 tree v2df_ftype_v2df_int64
13191 = build_function_type_list (V2DF_type_node,
13192 V2DF_type_node, long_long_integer_type_node,
13194 tree v4sf_ftype_v4sf_v2df
13195 = build_function_type_list (V4SF_type_node,
13196 V4SF_type_node, V2DF_type_node, NULL_TREE);
13197 tree v2df_ftype_v2df_v4sf
13198 = build_function_type_list (V2DF_type_node,
13199 V2DF_type_node, V4SF_type_node, NULL_TREE);
13200 tree v2df_ftype_v2df_v2df_int
13201 = build_function_type_list (V2DF_type_node,
13202 V2DF_type_node, V2DF_type_node,
13205 tree v2df_ftype_v2df_pv2si
13206 = build_function_type_list (V2DF_type_node,
13207 V2DF_type_node, pv2si_type_node, NULL_TREE);
13208 tree void_ftype_pv2si_v2df
13209 = build_function_type_list (void_type_node,
13210 pv2si_type_node, V2DF_type_node, NULL_TREE);
13211 tree void_ftype_pdouble_v2df
13212 = build_function_type_list (void_type_node,
13213 pdouble_type_node, V2DF_type_node, NULL_TREE);
13214 tree void_ftype_pint_int
13215 = build_function_type_list (void_type_node,
13216 pint_type_node, integer_type_node, NULL_TREE);
13217 tree void_ftype_v16qi_v16qi_pchar
13218 = build_function_type_list (void_type_node,
13219 V16QI_type_node, V16QI_type_node,
13220 pchar_type_node, NULL_TREE);
13221 tree v2df_ftype_pcdouble
13222 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13223 tree v2df_ftype_v2df_v2df
13224 = build_function_type_list (V2DF_type_node,
13225 V2DF_type_node, V2DF_type_node, NULL_TREE);
13226 tree v16qi_ftype_v16qi_v16qi
13227 = build_function_type_list (V16QI_type_node,
13228 V16QI_type_node, V16QI_type_node, NULL_TREE);
13229 tree v8hi_ftype_v8hi_v8hi
13230 = build_function_type_list (V8HI_type_node,
13231 V8HI_type_node, V8HI_type_node, NULL_TREE);
13232 tree v4si_ftype_v4si_v4si
13233 = build_function_type_list (V4SI_type_node,
13234 V4SI_type_node, V4SI_type_node, NULL_TREE);
13235 tree v2di_ftype_v2di_v2di
13236 = build_function_type_list (V2DI_type_node,
13237 V2DI_type_node, V2DI_type_node, NULL_TREE);
13238 tree v2di_ftype_v2df_v2df
13239 = build_function_type_list (V2DI_type_node,
13240 V2DF_type_node, V2DF_type_node, NULL_TREE);
13241 tree v2df_ftype_v2df
13242 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13243 tree v2df_ftype_double
13244 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13245 tree v2df_ftype_double_double
13246 = build_function_type_list (V2DF_type_node,
13247 double_type_node, double_type_node, NULL_TREE);
13248 tree int_ftype_v8hi_int
13249 = build_function_type_list (integer_type_node,
13250 V8HI_type_node, integer_type_node, NULL_TREE);
13251 tree v8hi_ftype_v8hi_int_int
13252 = build_function_type_list (V8HI_type_node,
13253 V8HI_type_node, integer_type_node,
13254 integer_type_node, NULL_TREE);
13255 tree v2di_ftype_v2di_int
13256 = build_function_type_list (V2DI_type_node,
13257 V2DI_type_node, integer_type_node, NULL_TREE);
13258 tree v4si_ftype_v4si_int
13259 = build_function_type_list (V4SI_type_node,
13260 V4SI_type_node, integer_type_node, NULL_TREE);
13261 tree v8hi_ftype_v8hi_int
13262 = build_function_type_list (V8HI_type_node,
13263 V8HI_type_node, integer_type_node, NULL_TREE);
13264 tree v8hi_ftype_v8hi_v2di
13265 = build_function_type_list (V8HI_type_node,
13266 V8HI_type_node, V2DI_type_node, NULL_TREE);
13267 tree v4si_ftype_v4si_v2di
13268 = build_function_type_list (V4SI_type_node,
13269 V4SI_type_node, V2DI_type_node, NULL_TREE);
13270 tree v4si_ftype_v8hi_v8hi
13271 = build_function_type_list (V4SI_type_node,
13272 V8HI_type_node, V8HI_type_node, NULL_TREE);
13273 tree di_ftype_v8qi_v8qi
13274 = build_function_type_list (long_long_unsigned_type_node,
13275 V8QI_type_node, V8QI_type_node, NULL_TREE);
13276 tree v2di_ftype_v16qi_v16qi
13277 = build_function_type_list (V2DI_type_node,
13278 V16QI_type_node, V16QI_type_node, NULL_TREE);
13279 tree int_ftype_v16qi
13280 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13281 tree v16qi_ftype_pcchar
13282 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13283 tree void_ftype_pchar_v16qi
13284 = build_function_type_list (void_type_node,
13285 pchar_type_node, V16QI_type_node, NULL_TREE);
13286 tree v4si_ftype_pcint
13287 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13288 tree void_ftype_pcint_v4si
13289 = build_function_type_list (void_type_node,
13290 pcint_type_node, V4SI_type_node, NULL_TREE);
13291 tree v2di_ftype_v2di
13292 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13295 tree float128_type;
13297 /* The __float80 type. */
13298 if (TYPE_MODE (long_double_type_node) == XFmode)
13299 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13303 /* The __float80 type. */
13304 float80_type = make_node (REAL_TYPE);
13305 TYPE_PRECISION (float80_type) = 96;
13306 layout_type (float80_type);
13307 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13310 float128_type = make_node (REAL_TYPE);
13311 TYPE_PRECISION (float128_type) = 128;
13312 layout_type (float128_type);
13313 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13315 /* Add all builtins that are more or less simple operations on two
13317 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13319 /* Use one of the operands; the target can have a different mode for
13320 mask-generating compares. */
13321 enum machine_mode mode;
13326 mode = insn_data[d->icode].operand[1].mode;
13331 type = v16qi_ftype_v16qi_v16qi;
13334 type = v8hi_ftype_v8hi_v8hi;
13337 type = v4si_ftype_v4si_v4si;
13340 type = v2di_ftype_v2di_v2di;
13343 type = v2df_ftype_v2df_v2df;
13346 type = ti_ftype_ti_ti;
13349 type = v4sf_ftype_v4sf_v4sf;
13352 type = v8qi_ftype_v8qi_v8qi;
13355 type = v4hi_ftype_v4hi_v4hi;
13358 type = v2si_ftype_v2si_v2si;
13361 type = di_ftype_di_di;
13368 /* Override for comparisons. */
13369 if (d->icode == CODE_FOR_maskcmpv4sf3
13370 || d->icode == CODE_FOR_maskncmpv4sf3
13371 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13372 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13373 type = v4si_ftype_v4sf_v4sf;
13375 if (d->icode == CODE_FOR_maskcmpv2df3
13376 || d->icode == CODE_FOR_maskncmpv2df3
13377 || d->icode == CODE_FOR_vmmaskcmpv2df3
13378 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13379 type = v2di_ftype_v2df_v2df;
13381 def_builtin (d->mask, d->name, type, d->code);
13384 /* Add the remaining MMX insns with somewhat more complicated types. */
13385 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13386 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13387 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13388 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13389 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13391 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13392 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13393 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13395 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13396 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13398 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13399 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13401 /* comi/ucomi insns. */
13402 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13403 if (d->mask == MASK_SSE2)
13404 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13406 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13408 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13409 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13410 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13412 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13413 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13414 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13415 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13416 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13417 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13418 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13419 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13420 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13421 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13422 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13424 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13425 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13427 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13429 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13430 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13431 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13432 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13433 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13434 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13436 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13437 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13438 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13439 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13441 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13442 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13443 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13444 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13446 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13448 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13450 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13451 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13452 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13453 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13454 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13455 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13457 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13459 /* Original 3DNow! */
13460 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13461 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13462 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13467 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13468 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13469 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13470 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13472 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13473 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13474 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13475 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13476 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13477 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13478 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13479 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13481 /* 3DNow! extension as used in the Athlon CPU. */
13482 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13483 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13484 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13485 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13486 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13487 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13489 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13492 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13493 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13495 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13496 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13497 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13499 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13500 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13501 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13502 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13503 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13506 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13511 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13515 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13517 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13520 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13522 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13525 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13527 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13531 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13536 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13540 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13541 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13548 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13552 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13557 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13562 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13572 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13600 /* Prescott New Instructions. */
13601 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13602 void_ftype_pcvoid_unsigned_unsigned,
13603 IX86_BUILTIN_MONITOR);
13604 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13605 void_ftype_unsigned_unsigned,
13606 IX86_BUILTIN_MWAIT);
13607 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13609 IX86_BUILTIN_MOVSHDUP);
13610 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13612 IX86_BUILTIN_MOVSLDUP);
13613 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13614 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13615 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13616 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13617 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13618 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13621 /* Errors in the source file can cause expand_expr to return const0_rtx
13622 where we expect a vector. To avoid crashing, use one of the vector
13623 clear instructions. */
13625 safe_vector_operand (rtx x, enum machine_mode mode)
13627 if (x != const0_rtx)
13629 x = gen_reg_rtx (mode);
13631 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13632 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13633 : gen_rtx_SUBREG (DImode, x, 0)));
13635 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13636 : gen_rtx_SUBREG (V4SFmode, x, 0),
13637 CONST0_RTX (V4SFmode)));
13641 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13644 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13647 tree arg0 = TREE_VALUE (arglist);
13648 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13649 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13650 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13651 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13652 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13653 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13655 if (VECTOR_MODE_P (mode0))
13656 op0 = safe_vector_operand (op0, mode0);
13657 if (VECTOR_MODE_P (mode1))
13658 op1 = safe_vector_operand (op1, mode1);
13661 || GET_MODE (target) != tmode
13662 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13663 target = gen_reg_rtx (tmode);
13665 if (GET_MODE (op1) == SImode && mode1 == TImode)
13667 rtx x = gen_reg_rtx (V4SImode);
13668 emit_insn (gen_sse2_loadd (x, op1));
13669 op1 = gen_lowpart (TImode, x);
13672 /* In case the insn wants input operands in modes different from
13673 the result, abort. */
13674 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13675 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13678 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13679 op0 = copy_to_mode_reg (mode0, op0);
13680 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13681 op1 = copy_to_mode_reg (mode1, op1);
13683 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13684 yet one of the two must not be a memory. This is normally enforced
13685 by expanders, but we didn't bother to create one here. */
13686 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13687 op0 = copy_to_mode_reg (mode0, op0);
13689 pat = GEN_FCN (icode) (target, op0, op1);
13696 /* Subroutine of ix86_expand_builtin to take care of stores. */
13699 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13702 tree arg0 = TREE_VALUE (arglist);
13703 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13704 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13705 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13706 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13707 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13709 if (VECTOR_MODE_P (mode1))
13710 op1 = safe_vector_operand (op1, mode1);
13712 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13713 op1 = copy_to_mode_reg (mode1, op1);
13715 pat = GEN_FCN (icode) (op0, op1);
13721 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13724 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13725 rtx target, int do_load)
13728 tree arg0 = TREE_VALUE (arglist);
13729 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13730 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13731 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13734 || GET_MODE (target) != tmode
13735 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13736 target = gen_reg_rtx (tmode);
13738 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13741 if (VECTOR_MODE_P (mode0))
13742 op0 = safe_vector_operand (op0, mode0);
13744 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13745 op0 = copy_to_mode_reg (mode0, op0);
13748 pat = GEN_FCN (icode) (target, op0);
13755 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13756 sqrtss, rsqrtss, rcpss. */
13759 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13762 tree arg0 = TREE_VALUE (arglist);
13763 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13764 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13765 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13768 || GET_MODE (target) != tmode
13769 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13770 target = gen_reg_rtx (tmode);
13772 if (VECTOR_MODE_P (mode0))
13773 op0 = safe_vector_operand (op0, mode0);
13775 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13776 op0 = copy_to_mode_reg (mode0, op0);
13779 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13780 op1 = copy_to_mode_reg (mode0, op1);
13782 pat = GEN_FCN (icode) (target, op0, op1);
13789 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13792 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13796 tree arg0 = TREE_VALUE (arglist);
13797 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13798 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13799 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13801 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13802 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13803 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13804 enum rtx_code comparison = d->comparison;
13806 if (VECTOR_MODE_P (mode0))
13807 op0 = safe_vector_operand (op0, mode0);
13808 if (VECTOR_MODE_P (mode1))
13809 op1 = safe_vector_operand (op1, mode1);
13811 /* Swap operands if we have a comparison that isn't available in
13815 rtx tmp = gen_reg_rtx (mode1);
13816 emit_move_insn (tmp, op1);
13822 || GET_MODE (target) != tmode
13823 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13824 target = gen_reg_rtx (tmode);
13826 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13827 op0 = copy_to_mode_reg (mode0, op0);
13828 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13829 op1 = copy_to_mode_reg (mode1, op1);
13831 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13832 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13839 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13842 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13846 tree arg0 = TREE_VALUE (arglist);
13847 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13848 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13849 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13851 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13852 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13853 enum rtx_code comparison = d->comparison;
13855 if (VECTOR_MODE_P (mode0))
13856 op0 = safe_vector_operand (op0, mode0);
13857 if (VECTOR_MODE_P (mode1))
13858 op1 = safe_vector_operand (op1, mode1);
13860 /* Swap operands if we have a comparison that isn't available in
13869 target = gen_reg_rtx (SImode);
13870 emit_move_insn (target, const0_rtx);
13871 target = gen_rtx_SUBREG (QImode, target, 0);
13873 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13874 op0 = copy_to_mode_reg (mode0, op0);
13875 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13876 op1 = copy_to_mode_reg (mode1, op1);
13878 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13879 pat = GEN_FCN (d->icode) (op0, op1);
13883 emit_insn (gen_rtx_SET (VOIDmode,
13884 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13885 gen_rtx_fmt_ee (comparison, QImode,
13889 return SUBREG_REG (target);
13892 /* Expand an expression EXP that calls a built-in function,
13893 with result going to TARGET if that's convenient
13894 (and in mode MODE if that's convenient).
13895 SUBTARGET may be used as the target for computing one of EXP's operands.
13896 IGNORE is nonzero if the value is to be ignored. */
13899 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13900 enum machine_mode mode ATTRIBUTE_UNUSED,
13901 int ignore ATTRIBUTE_UNUSED)
13903 const struct builtin_description *d;
13905 enum insn_code icode;
13906 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13907 tree arglist = TREE_OPERAND (exp, 1);
13908 tree arg0, arg1, arg2;
13909 rtx op0, op1, op2, pat;
13910 enum machine_mode tmode, mode0, mode1, mode2;
13911 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13915 case IX86_BUILTIN_EMMS:
13916 emit_insn (gen_emms ());
13919 case IX86_BUILTIN_SFENCE:
13920 emit_insn (gen_sfence ());
13923 case IX86_BUILTIN_PEXTRW:
13924 case IX86_BUILTIN_PEXTRW128:
13925 icode = (fcode == IX86_BUILTIN_PEXTRW
13926 ? CODE_FOR_mmx_pextrw
13927 : CODE_FOR_sse2_pextrw);
13928 arg0 = TREE_VALUE (arglist);
13929 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13930 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13931 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13932 tmode = insn_data[icode].operand[0].mode;
13933 mode0 = insn_data[icode].operand[1].mode;
13934 mode1 = insn_data[icode].operand[2].mode;
13936 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13937 op0 = copy_to_mode_reg (mode0, op0);
13938 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13940 error ("selector must be an integer constant in the range 0..%i",
13941 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13942 return gen_reg_rtx (tmode);
13945 || GET_MODE (target) != tmode
13946 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13947 target = gen_reg_rtx (tmode);
13948 pat = GEN_FCN (icode) (target, op0, op1);
13954 case IX86_BUILTIN_PINSRW:
13955 case IX86_BUILTIN_PINSRW128:
13956 icode = (fcode == IX86_BUILTIN_PINSRW
13957 ? CODE_FOR_mmx_pinsrw
13958 : CODE_FOR_sse2_pinsrw);
13959 arg0 = TREE_VALUE (arglist);
13960 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13961 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13962 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13963 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13964 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13965 tmode = insn_data[icode].operand[0].mode;
13966 mode0 = insn_data[icode].operand[1].mode;
13967 mode1 = insn_data[icode].operand[2].mode;
13968 mode2 = insn_data[icode].operand[3].mode;
13970 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13971 op0 = copy_to_mode_reg (mode0, op0);
13972 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13973 op1 = copy_to_mode_reg (mode1, op1);
13974 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13976 error ("selector must be an integer constant in the range 0..%i",
13977 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13981 || GET_MODE (target) != tmode
13982 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13983 target = gen_reg_rtx (tmode);
13984 pat = GEN_FCN (icode) (target, op0, op1, op2);
13990 case IX86_BUILTIN_MASKMOVQ:
13991 case IX86_BUILTIN_MASKMOVDQU:
13992 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13993 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13994 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13995 : CODE_FOR_sse2_maskmovdqu));
13996 /* Note the arg order is different from the operand order. */
13997 arg1 = TREE_VALUE (arglist);
13998 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13999 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14001 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14002 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14003 mode0 = insn_data[icode].operand[0].mode;
14004 mode1 = insn_data[icode].operand[1].mode;
14005 mode2 = insn_data[icode].operand[2].mode;
14007 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14008 op0 = copy_to_mode_reg (mode0, op0);
14009 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14010 op1 = copy_to_mode_reg (mode1, op1);
14011 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14012 op2 = copy_to_mode_reg (mode2, op2);
14013 pat = GEN_FCN (icode) (op0, op1, op2);
14019 case IX86_BUILTIN_SQRTSS:
14020 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14021 case IX86_BUILTIN_RSQRTSS:
14022 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14023 case IX86_BUILTIN_RCPSS:
14024 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14026 case IX86_BUILTIN_LOADAPS:
14027 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14029 case IX86_BUILTIN_LOADUPS:
14030 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14032 case IX86_BUILTIN_STOREAPS:
14033 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14035 case IX86_BUILTIN_STOREUPS:
14036 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14038 case IX86_BUILTIN_LOADSS:
14039 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14041 case IX86_BUILTIN_STORESS:
14042 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14044 case IX86_BUILTIN_LOADHPS:
14045 case IX86_BUILTIN_LOADLPS:
14046 case IX86_BUILTIN_LOADHPD:
14047 case IX86_BUILTIN_LOADLPD:
14048 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14049 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14050 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14051 : CODE_FOR_sse2_movsd);
14052 arg0 = TREE_VALUE (arglist);
14053 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14054 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14055 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14056 tmode = insn_data[icode].operand[0].mode;
14057 mode0 = insn_data[icode].operand[1].mode;
14058 mode1 = insn_data[icode].operand[2].mode;
14060 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14061 op0 = copy_to_mode_reg (mode0, op0);
14062 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14064 || GET_MODE (target) != tmode
14065 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14066 target = gen_reg_rtx (tmode);
14067 pat = GEN_FCN (icode) (target, op0, op1);
14073 case IX86_BUILTIN_STOREHPS:
14074 case IX86_BUILTIN_STORELPS:
14075 case IX86_BUILTIN_STOREHPD:
14076 case IX86_BUILTIN_STORELPD:
14077 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14078 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14079 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14080 : CODE_FOR_sse2_movsd);
14081 arg0 = TREE_VALUE (arglist);
14082 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14083 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14084 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14085 mode0 = insn_data[icode].operand[1].mode;
14086 mode1 = insn_data[icode].operand[2].mode;
14088 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14089 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14090 op1 = copy_to_mode_reg (mode1, op1);
14092 pat = GEN_FCN (icode) (op0, op0, op1);
14098 case IX86_BUILTIN_MOVNTPS:
14099 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14100 case IX86_BUILTIN_MOVNTQ:
14101 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14103 case IX86_BUILTIN_LDMXCSR:
14104 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14105 target = assign_386_stack_local (SImode, 0);
14106 emit_move_insn (target, op0);
14107 emit_insn (gen_ldmxcsr (target));
14110 case IX86_BUILTIN_STMXCSR:
14111 target = assign_386_stack_local (SImode, 0);
14112 emit_insn (gen_stmxcsr (target));
14113 return copy_to_mode_reg (SImode, target);
14115 case IX86_BUILTIN_SHUFPS:
14116 case IX86_BUILTIN_SHUFPD:
14117 icode = (fcode == IX86_BUILTIN_SHUFPS
14118 ? CODE_FOR_sse_shufps
14119 : CODE_FOR_sse2_shufpd);
14120 arg0 = TREE_VALUE (arglist);
14121 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14122 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14123 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14124 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14125 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14126 tmode = insn_data[icode].operand[0].mode;
14127 mode0 = insn_data[icode].operand[1].mode;
14128 mode1 = insn_data[icode].operand[2].mode;
14129 mode2 = insn_data[icode].operand[3].mode;
14131 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14132 op0 = copy_to_mode_reg (mode0, op0);
14133 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14134 op1 = copy_to_mode_reg (mode1, op1);
14135 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14137 /* @@@ better error message */
14138 error ("mask must be an immediate");
14139 return gen_reg_rtx (tmode);
14142 || GET_MODE (target) != tmode
14143 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14144 target = gen_reg_rtx (tmode);
14145 pat = GEN_FCN (icode) (target, op0, op1, op2);
14151 case IX86_BUILTIN_PSHUFW:
14152 case IX86_BUILTIN_PSHUFD:
14153 case IX86_BUILTIN_PSHUFHW:
14154 case IX86_BUILTIN_PSHUFLW:
14155 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14156 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14157 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14158 : CODE_FOR_mmx_pshufw);
14159 arg0 = TREE_VALUE (arglist);
14160 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14161 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14162 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14163 tmode = insn_data[icode].operand[0].mode;
14164 mode1 = insn_data[icode].operand[1].mode;
14165 mode2 = insn_data[icode].operand[2].mode;
14167 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14168 op0 = copy_to_mode_reg (mode1, op0);
14169 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14171 /* @@@ better error message */
14172 error ("mask must be an immediate");
14176 || GET_MODE (target) != tmode
14177 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14178 target = gen_reg_rtx (tmode);
14179 pat = GEN_FCN (icode) (target, op0, op1);
14185 case IX86_BUILTIN_PSLLDQI128:
14186 case IX86_BUILTIN_PSRLDQI128:
14187 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14188 : CODE_FOR_sse2_lshrti3);
14189 arg0 = TREE_VALUE (arglist);
14190 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14191 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14192 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14193 tmode = insn_data[icode].operand[0].mode;
14194 mode1 = insn_data[icode].operand[1].mode;
14195 mode2 = insn_data[icode].operand[2].mode;
14197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14199 op0 = copy_to_reg (op0);
14200 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14202 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14204 error ("shift must be an immediate");
14207 target = gen_reg_rtx (V2DImode);
14208 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14214 case IX86_BUILTIN_FEMMS:
14215 emit_insn (gen_femms ());
14218 case IX86_BUILTIN_PAVGUSB:
14219 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14221 case IX86_BUILTIN_PF2ID:
14222 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14224 case IX86_BUILTIN_PFACC:
14225 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14227 case IX86_BUILTIN_PFADD:
14228 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14230 case IX86_BUILTIN_PFCMPEQ:
14231 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14233 case IX86_BUILTIN_PFCMPGE:
14234 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14236 case IX86_BUILTIN_PFCMPGT:
14237 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14239 case IX86_BUILTIN_PFMAX:
14240 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14242 case IX86_BUILTIN_PFMIN:
14243 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14245 case IX86_BUILTIN_PFMUL:
14246 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14248 case IX86_BUILTIN_PFRCP:
14249 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14251 case IX86_BUILTIN_PFRCPIT1:
14252 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14254 case IX86_BUILTIN_PFRCPIT2:
14255 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14257 case IX86_BUILTIN_PFRSQIT1:
14258 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14260 case IX86_BUILTIN_PFRSQRT:
14261 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14263 case IX86_BUILTIN_PFSUB:
14264 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14266 case IX86_BUILTIN_PFSUBR:
14267 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14269 case IX86_BUILTIN_PI2FD:
14270 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14272 case IX86_BUILTIN_PMULHRW:
14273 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14275 case IX86_BUILTIN_PF2IW:
14276 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14278 case IX86_BUILTIN_PFNACC:
14279 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14281 case IX86_BUILTIN_PFPNACC:
14282 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14284 case IX86_BUILTIN_PI2FW:
14285 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14287 case IX86_BUILTIN_PSWAPDSI:
14288 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14290 case IX86_BUILTIN_PSWAPDSF:
14291 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14293 case IX86_BUILTIN_SSE_ZERO:
14294 target = gen_reg_rtx (V4SFmode);
14295 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14298 case IX86_BUILTIN_MMX_ZERO:
14299 target = gen_reg_rtx (DImode);
14300 emit_insn (gen_mmx_clrdi (target));
14303 case IX86_BUILTIN_CLRTI:
14304 target = gen_reg_rtx (V2DImode);
14305 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14309 case IX86_BUILTIN_SQRTSD:
14310 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14311 case IX86_BUILTIN_LOADAPD:
14312 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14313 case IX86_BUILTIN_LOADUPD:
14314 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14316 case IX86_BUILTIN_STOREAPD:
14317 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14318 case IX86_BUILTIN_STOREUPD:
14319 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14321 case IX86_BUILTIN_LOADSD:
14322 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14324 case IX86_BUILTIN_STORESD:
14325 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14327 case IX86_BUILTIN_SETPD1:
14328 target = assign_386_stack_local (DFmode, 0);
14329 arg0 = TREE_VALUE (arglist);
14330 emit_move_insn (adjust_address (target, DFmode, 0),
14331 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14332 op0 = gen_reg_rtx (V2DFmode);
14333 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14334 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14337 case IX86_BUILTIN_SETPD:
14338 target = assign_386_stack_local (V2DFmode, 0);
14339 arg0 = TREE_VALUE (arglist);
14340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14341 emit_move_insn (adjust_address (target, DFmode, 0),
14342 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14343 emit_move_insn (adjust_address (target, DFmode, 8),
14344 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14345 op0 = gen_reg_rtx (V2DFmode);
14346 emit_insn (gen_sse2_movapd (op0, target));
14349 case IX86_BUILTIN_LOADRPD:
14350 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14351 gen_reg_rtx (V2DFmode), 1);
14352 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14355 case IX86_BUILTIN_LOADPD1:
14356 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14357 gen_reg_rtx (V2DFmode), 1);
14358 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14361 case IX86_BUILTIN_STOREPD1:
14362 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14363 case IX86_BUILTIN_STORERPD:
14364 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14366 case IX86_BUILTIN_CLRPD:
14367 target = gen_reg_rtx (V2DFmode);
14368 emit_insn (gen_sse_clrv2df (target));
14371 case IX86_BUILTIN_MFENCE:
14372 emit_insn (gen_sse2_mfence ());
14374 case IX86_BUILTIN_LFENCE:
14375 emit_insn (gen_sse2_lfence ());
14378 case IX86_BUILTIN_CLFLUSH:
14379 arg0 = TREE_VALUE (arglist);
14380 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14381 icode = CODE_FOR_sse2_clflush;
14382 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14383 op0 = copy_to_mode_reg (Pmode, op0);
14385 emit_insn (gen_sse2_clflush (op0));
14388 case IX86_BUILTIN_MOVNTPD:
14389 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14390 case IX86_BUILTIN_MOVNTDQ:
14391 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14392 case IX86_BUILTIN_MOVNTI:
14393 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14395 case IX86_BUILTIN_LOADDQA:
14396 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14397 case IX86_BUILTIN_LOADDQU:
14398 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14399 case IX86_BUILTIN_LOADD:
14400 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14402 case IX86_BUILTIN_STOREDQA:
14403 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14404 case IX86_BUILTIN_STOREDQU:
14405 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14406 case IX86_BUILTIN_STORED:
14407 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14409 case IX86_BUILTIN_MONITOR:
14410 arg0 = TREE_VALUE (arglist);
14411 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14412 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14413 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14414 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14415 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14417 op0 = copy_to_mode_reg (SImode, op0);
14419 op1 = copy_to_mode_reg (SImode, op1);
14421 op2 = copy_to_mode_reg (SImode, op2);
14422 emit_insn (gen_monitor (op0, op1, op2));
14425 case IX86_BUILTIN_MWAIT:
14426 arg0 = TREE_VALUE (arglist);
14427 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14428 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14429 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14431 op0 = copy_to_mode_reg (SImode, op0);
14433 op1 = copy_to_mode_reg (SImode, op1);
14434 emit_insn (gen_mwait (op0, op1));
14437 case IX86_BUILTIN_LOADDDUP:
14438 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14440 case IX86_BUILTIN_LDDQU:
14441 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14448 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14449 if (d->code == fcode)
14451 /* Compares are treated specially. */
14452 if (d->icode == CODE_FOR_maskcmpv4sf3
14453 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14454 || d->icode == CODE_FOR_maskncmpv4sf3
14455 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14456 || d->icode == CODE_FOR_maskcmpv2df3
14457 || d->icode == CODE_FOR_vmmaskcmpv2df3
14458 || d->icode == CODE_FOR_maskncmpv2df3
14459 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14460 return ix86_expand_sse_compare (d, arglist, target);
14462 return ix86_expand_binop_builtin (d->icode, arglist, target);
14465 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14466 if (d->code == fcode)
14467 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14469 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14470 if (d->code == fcode)
14471 return ix86_expand_sse_comi (d, arglist, target);
14473 /* @@@ Should really do something sensible here. */
14477 /* Store OPERAND to the memory after reload is completed. This means
14478 that we can't easily use assign_stack_local. */
14480 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14483 if (!reload_completed)
14485 if (TARGET_RED_ZONE)
14487 result = gen_rtx_MEM (mode,
14488 gen_rtx_PLUS (Pmode,
14490 GEN_INT (-RED_ZONE_SIZE)));
14491 emit_move_insn (result, operand);
14493 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14499 operand = gen_lowpart (DImode, operand);
14503 gen_rtx_SET (VOIDmode,
14504 gen_rtx_MEM (DImode,
14505 gen_rtx_PRE_DEC (DImode,
14506 stack_pointer_rtx)),
14512 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14521 split_di (&operand, 1, operands, operands + 1);
14523 gen_rtx_SET (VOIDmode,
14524 gen_rtx_MEM (SImode,
14525 gen_rtx_PRE_DEC (Pmode,
14526 stack_pointer_rtx)),
14529 gen_rtx_SET (VOIDmode,
14530 gen_rtx_MEM (SImode,
14531 gen_rtx_PRE_DEC (Pmode,
14532 stack_pointer_rtx)),
14537 /* It is better to store HImodes as SImodes. */
14538 if (!TARGET_PARTIAL_REG_STALL)
14539 operand = gen_lowpart (SImode, operand);
14543 gen_rtx_SET (VOIDmode,
14544 gen_rtx_MEM (GET_MODE (operand),
14545 gen_rtx_PRE_DEC (SImode,
14546 stack_pointer_rtx)),
14552 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14557 /* Free operand from the memory. */
14559 ix86_free_from_memory (enum machine_mode mode)
14561 if (!TARGET_RED_ZONE)
14565 if (mode == DImode || TARGET_64BIT)
14567 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14571 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14572 to pop or add instruction if registers are available. */
14573 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14574 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14579 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14580 QImode must go into class Q_REGS.
14581 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14582 movdf to do mem-to-mem moves through integer regs. */
14584 ix86_preferred_reload_class (rtx x, enum reg_class class)
14586 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14588 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14590 /* SSE can't load any constant directly yet. */
14591 if (SSE_CLASS_P (class))
14593 /* Floats can load 0 and 1. */
14594 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14596 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14597 if (MAYBE_SSE_CLASS_P (class))
14598 return (reg_class_subset_p (class, GENERAL_REGS)
14599 ? GENERAL_REGS : FLOAT_REGS);
14603 /* General regs can load everything. */
14604 if (reg_class_subset_p (class, GENERAL_REGS))
14605 return GENERAL_REGS;
14606 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14607 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14610 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14612 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14617 /* If we are copying between general and FP registers, we need a memory
14618 location. The same is true for SSE and MMX registers.
14620 The macro can't work reliably when one of the CLASSES is class containing
14621 registers from multiple units (SSE, MMX, integer). We avoid this by never
14622 combining those units in single alternative in the machine description.
14623 Ensure that this constraint holds to avoid unexpected surprises.
14625 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14626 enforce these sanity checks. */
14628 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14629 enum machine_mode mode, int strict)
14631 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14632 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14633 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14634 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14635 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14636 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14643 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14644 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14645 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14646 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14647 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14649 /* Return the cost of moving data from a register in class CLASS1 to
14650 one in class CLASS2.
14652 It is not required that the cost always equal 2 when FROM is the same as TO;
14653 on some machines it is expensive to move between registers if they are not
14654 general registers. */
14656 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14657 enum reg_class class2)
14659 /* In case we require secondary memory, compute cost of the store followed
14660 by load. In order to avoid bad register allocation choices, we need
14661 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14663 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14667 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14668 MEMORY_MOVE_COST (mode, class1, 1));
14669 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14670 MEMORY_MOVE_COST (mode, class2, 1));
14672 /* In case of copying from general_purpose_register we may emit multiple
14673 stores followed by single load causing memory size mismatch stall.
14674 Count this as arbitrarily high cost of 20. */
14675 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14678 /* In the case of FP/MMX moves, the registers actually overlap, and we
14679 have to switch modes in order to treat them differently. */
14680 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14681 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14687 /* Moves between SSE/MMX and integer unit are expensive. */
14688 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14689 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14690 return ix86_cost->mmxsse_to_integer;
14691 if (MAYBE_FLOAT_CLASS_P (class1))
14692 return ix86_cost->fp_move;
14693 if (MAYBE_SSE_CLASS_P (class1))
14694 return ix86_cost->sse_move;
14695 if (MAYBE_MMX_CLASS_P (class1))
14696 return ix86_cost->mmx_move;
14700 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14702 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14704 /* Flags and only flags can only hold CCmode values. */
14705 if (CC_REGNO_P (regno))
14706 return GET_MODE_CLASS (mode) == MODE_CC;
14707 if (GET_MODE_CLASS (mode) == MODE_CC
14708 || GET_MODE_CLASS (mode) == MODE_RANDOM
14709 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14711 if (FP_REGNO_P (regno))
14712 return VALID_FP_MODE_P (mode);
14713 if (SSE_REGNO_P (regno))
14714 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14715 if (MMX_REGNO_P (regno))
14717 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14718 /* We handle both integer and floats in the general purpose registers.
14719 In future we should be able to handle vector modes as well. */
14720 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14722 /* Take care for QImode values - they can be in non-QI regs, but then
14723 they do cause partial register stalls. */
14724 if (regno < 4 || mode != QImode || TARGET_64BIT)
14726 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14729 /* Return the cost of moving data of mode M between a
14730 register and memory. A value of 2 is the default; this cost is
14731 relative to those in `REGISTER_MOVE_COST'.
14733 If moving between registers and memory is more expensive than
14734 between two registers, you should define this macro to express the
14737 Model also increased moving costs of QImode registers in non
14741 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14743 if (FLOAT_CLASS_P (class))
14760 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14762 if (SSE_CLASS_P (class))
14765 switch (GET_MODE_SIZE (mode))
14779 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14781 if (MMX_CLASS_P (class))
14784 switch (GET_MODE_SIZE (mode))
14795 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14797 switch (GET_MODE_SIZE (mode))
14801 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14802 : ix86_cost->movzbl_load);
14804 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14805 : ix86_cost->int_store[0] + 4);
14808 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14810 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14811 if (mode == TFmode)
14813 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14814 * (((int) GET_MODE_SIZE (mode)
14815 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14819 /* Compute a (partial) cost for rtx X. Return true if the complete
14820 cost has been computed, and false if subexpressions should be
14821 scanned. In either case, *TOTAL contains the cost result. */
14824 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14826 enum machine_mode mode = GET_MODE (x);
14834 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14836 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14838 else if (flag_pic && SYMBOLIC_CONST (x)
14840 || (!GET_CODE (x) != LABEL_REF
14841 && (GET_CODE (x) != SYMBOL_REF
14842 || !SYMBOL_REF_LOCAL_P (x)))))
14849 if (mode == VOIDmode)
14852 switch (standard_80387_constant_p (x))
14857 default: /* Other constants */
14862 /* Start with (MEM (SYMBOL_REF)), since that's where
14863 it'll probably end up. Add a penalty for size. */
14864 *total = (COSTS_N_INSNS (1)
14865 + (flag_pic != 0 && !TARGET_64BIT)
14866 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14872 /* The zero extensions is often completely free on x86_64, so make
14873 it as cheap as possible. */
14874 if (TARGET_64BIT && mode == DImode
14875 && GET_MODE (XEXP (x, 0)) == SImode)
14877 else if (TARGET_ZERO_EXTEND_WITH_AND)
14878 *total = COSTS_N_INSNS (ix86_cost->add);
14880 *total = COSTS_N_INSNS (ix86_cost->movzx);
14884 *total = COSTS_N_INSNS (ix86_cost->movsx);
14888 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14889 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14891 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14894 *total = COSTS_N_INSNS (ix86_cost->add);
14897 if ((value == 2 || value == 3)
14898 && !TARGET_DECOMPOSE_LEA
14899 && ix86_cost->lea <= ix86_cost->shift_const)
14901 *total = COSTS_N_INSNS (ix86_cost->lea);
14911 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14913 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14915 if (INTVAL (XEXP (x, 1)) > 32)
14916 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14918 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14922 if (GET_CODE (XEXP (x, 1)) == AND)
14923 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14925 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14930 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14931 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14933 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14938 if (FLOAT_MODE_P (mode))
14940 *total = COSTS_N_INSNS (ix86_cost->fmul);
14945 rtx op0 = XEXP (x, 0);
14946 rtx op1 = XEXP (x, 1);
14948 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14950 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14951 for (nbits = 0; value != 0; value &= value - 1)
14955 /* This is arbitrary. */
14958 /* Compute costs correctly for widening multiplication. */
14959 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14960 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14961 == GET_MODE_SIZE (mode))
14963 int is_mulwiden = 0;
14964 enum machine_mode inner_mode = GET_MODE (op0);
14966 if (GET_CODE (op0) == GET_CODE (op1))
14967 is_mulwiden = 1, op1 = XEXP (op1, 0);
14968 else if (GET_CODE (op1) == CONST_INT)
14970 if (GET_CODE (op0) == SIGN_EXTEND)
14971 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14974 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14978 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14981 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14982 + nbits * ix86_cost->mult_bit)
14983 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14992 if (FLOAT_MODE_P (mode))
14993 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14995 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14999 if (FLOAT_MODE_P (mode))
15000 *total = COSTS_N_INSNS (ix86_cost->fadd);
15001 else if (!TARGET_DECOMPOSE_LEA
15002 && GET_MODE_CLASS (mode) == MODE_INT
15003 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15005 if (GET_CODE (XEXP (x, 0)) == PLUS
15006 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15007 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15008 && CONSTANT_P (XEXP (x, 1)))
15010 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15011 if (val == 2 || val == 4 || val == 8)
15013 *total = COSTS_N_INSNS (ix86_cost->lea);
15014 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15015 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15017 *total += rtx_cost (XEXP (x, 1), outer_code);
15021 else if (GET_CODE (XEXP (x, 0)) == MULT
15022 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15024 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15025 if (val == 2 || val == 4 || val == 8)
15027 *total = COSTS_N_INSNS (ix86_cost->lea);
15028 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15029 *total += rtx_cost (XEXP (x, 1), outer_code);
15033 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15035 *total = COSTS_N_INSNS (ix86_cost->lea);
15036 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15037 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15038 *total += rtx_cost (XEXP (x, 1), outer_code);
15045 if (FLOAT_MODE_P (mode))
15047 *total = COSTS_N_INSNS (ix86_cost->fadd);
15055 if (!TARGET_64BIT && mode == DImode)
15057 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15058 + (rtx_cost (XEXP (x, 0), outer_code)
15059 << (GET_MODE (XEXP (x, 0)) != DImode))
15060 + (rtx_cost (XEXP (x, 1), outer_code)
15061 << (GET_MODE (XEXP (x, 1)) != DImode)));
15067 if (FLOAT_MODE_P (mode))
15069 *total = COSTS_N_INSNS (ix86_cost->fchs);
15075 if (!TARGET_64BIT && mode == DImode)
15076 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15078 *total = COSTS_N_INSNS (ix86_cost->add);
15082 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15087 if (FLOAT_MODE_P (mode))
15088 *total = COSTS_N_INSNS (ix86_cost->fabs);
15092 if (FLOAT_MODE_P (mode))
15093 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15097 if (XINT (x, 1) == UNSPEC_TP)
15106 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15108 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15111 fputs ("\tpushl $", asm_out_file);
15112 assemble_name (asm_out_file, XSTR (symbol, 0));
15113 fputc ('\n', asm_out_file);
15119 static int current_machopic_label_num;
15121 /* Given a symbol name and its associated stub, write out the
15122 definition of the stub. */
15125 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15127 unsigned int length;
15128 char *binder_name, *symbol_name, lazy_ptr_name[32];
15129 int label = ++current_machopic_label_num;
15131 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15132 symb = (*targetm.strip_name_encoding) (symb);
15134 length = strlen (stub);
15135 binder_name = alloca (length + 32);
15136 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15138 length = strlen (symb);
15139 symbol_name = alloca (length + 32);
15140 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15142 sprintf (lazy_ptr_name, "L%d$lz", label);
15145 machopic_picsymbol_stub_section ();
15147 machopic_symbol_stub_section ();
15149 fprintf (file, "%s:\n", stub);
15150 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15154 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15155 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15156 fprintf (file, "\tjmp %%edx\n");
15159 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15161 fprintf (file, "%s:\n", binder_name);
15165 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15166 fprintf (file, "\tpushl %%eax\n");
15169 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15171 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15173 machopic_lazy_symbol_ptr_section ();
15174 fprintf (file, "%s:\n", lazy_ptr_name);
15175 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15176 fprintf (file, "\t.long %s\n", binder_name);
15178 #endif /* TARGET_MACHO */
15180 /* Order the registers for register allocator. */
15183 x86_order_regs_for_local_alloc (void)
15188 /* First allocate the local general purpose registers. */
15189 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15190 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15191 reg_alloc_order [pos++] = i;
15193 /* Global general purpose registers. */
15194 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15195 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15196 reg_alloc_order [pos++] = i;
15198 /* x87 registers come first in case we are doing FP math
15200 if (!TARGET_SSE_MATH)
15201 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15202 reg_alloc_order [pos++] = i;
15204 /* SSE registers. */
15205 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15206 reg_alloc_order [pos++] = i;
15207 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15208 reg_alloc_order [pos++] = i;
15210 /* x87 registers. */
15211 if (TARGET_SSE_MATH)
15212 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15213 reg_alloc_order [pos++] = i;
15215 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15216 reg_alloc_order [pos++] = i;
15218 /* Initialize the rest of array as we do not allocate some registers
15220 while (pos < FIRST_PSEUDO_REGISTER)
15221 reg_alloc_order [pos++] = 0;
15224 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15225 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15228 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15229 struct attribute_spec.handler. */
15231 ix86_handle_struct_attribute (tree *node, tree name,
15232 tree args ATTRIBUTE_UNUSED,
15233 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15236 if (DECL_P (*node))
15238 if (TREE_CODE (*node) == TYPE_DECL)
15239 type = &TREE_TYPE (*node);
15244 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15245 || TREE_CODE (*type) == UNION_TYPE)))
15247 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15248 *no_add_attrs = true;
15251 else if ((is_attribute_p ("ms_struct", name)
15252 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15253 || ((is_attribute_p ("gcc_struct", name)
15254 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15256 warning ("`%s' incompatible attribute ignored",
15257 IDENTIFIER_POINTER (name));
15258 *no_add_attrs = true;
15265 ix86_ms_bitfield_layout_p (tree record_type)
15267 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15268 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15269 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15272 /* Returns an expression indicating where the this parameter is
15273 located on entry to the FUNCTION. */
15276 x86_this_parameter (tree function)
15278 tree type = TREE_TYPE (function);
15282 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15283 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15286 if (ix86_function_regparm (type, function) > 0)
15290 parm = TYPE_ARG_TYPES (type);
15291 /* Figure out whether or not the function has a variable number of
15293 for (; parm; parm = TREE_CHAIN (parm))
15294 if (TREE_VALUE (parm) == void_type_node)
15296 /* If not, the this parameter is in the first argument. */
15300 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15302 return gen_rtx_REG (SImode, regno);
15306 if (aggregate_value_p (TREE_TYPE (type), type))
15307 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15309 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15312 /* Determine whether x86_output_mi_thunk can succeed. */
15315 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15316 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15317 HOST_WIDE_INT vcall_offset, tree function)
15319 /* 64-bit can handle anything. */
15323 /* For 32-bit, everything's fine if we have one free register. */
15324 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15327 /* Need a free register for vcall_offset. */
15331 /* Need a free register for GOT references. */
15332 if (flag_pic && !(*targetm.binds_local_p) (function))
15335 /* Otherwise ok. */
15339 /* Output the assembler code for a thunk function. THUNK_DECL is the
15340 declaration for the thunk function itself, FUNCTION is the decl for
15341 the target function. DELTA is an immediate constant offset to be
15342 added to THIS. If VCALL_OFFSET is nonzero, the word at
15343 *(*this + vcall_offset) should be added to THIS. */
15346 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15347 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15348 HOST_WIDE_INT vcall_offset, tree function)
15351 rtx this = x86_this_parameter (function);
15354 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15355 pull it in now and let DELTA benefit. */
15358 else if (vcall_offset)
15360 /* Put the this parameter into %eax. */
15362 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15363 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15366 this_reg = NULL_RTX;
15368 /* Adjust the this parameter by a fixed constant. */
15371 xops[0] = GEN_INT (delta);
15372 xops[1] = this_reg ? this_reg : this;
15375 if (!x86_64_general_operand (xops[0], DImode))
15377 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15379 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15383 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15386 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15389 /* Adjust the this parameter by a value stored in the vtable. */
15393 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15396 int tmp_regno = 2 /* ECX */;
15397 if (lookup_attribute ("fastcall",
15398 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15399 tmp_regno = 0 /* EAX */;
15400 tmp = gen_rtx_REG (SImode, tmp_regno);
15403 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15406 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15408 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15410 /* Adjust the this parameter. */
15411 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15412 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15414 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15415 xops[0] = GEN_INT (vcall_offset);
15417 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15418 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15420 xops[1] = this_reg;
15422 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15424 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15427 /* If necessary, drop THIS back to its stack slot. */
15428 if (this_reg && this_reg != this)
15430 xops[0] = this_reg;
15432 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15435 xops[0] = XEXP (DECL_RTL (function), 0);
15438 if (!flag_pic || (*targetm.binds_local_p) (function))
15439 output_asm_insn ("jmp\t%P0", xops);
15442 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15443 tmp = gen_rtx_CONST (Pmode, tmp);
15444 tmp = gen_rtx_MEM (QImode, tmp);
15446 output_asm_insn ("jmp\t%A0", xops);
15451 if (!flag_pic || (*targetm.binds_local_p) (function))
15452 output_asm_insn ("jmp\t%P0", xops);
15457 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15458 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15459 tmp = gen_rtx_MEM (QImode, tmp);
15461 output_asm_insn ("jmp\t%0", xops);
15464 #endif /* TARGET_MACHO */
15466 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15467 output_set_got (tmp);
15470 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15471 output_asm_insn ("jmp\t{*}%1", xops);
15477 x86_file_start (void)
15479 default_file_start ();
15480 if (X86_FILE_START_VERSION_DIRECTIVE)
15481 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15482 if (X86_FILE_START_FLTUSED)
15483 fputs ("\t.global\t__fltused\n", asm_out_file);
15484 if (ix86_asm_dialect == ASM_INTEL)
15485 fputs ("\t.intel_syntax\n", asm_out_file);
15489 x86_field_alignment (tree field, int computed)
15491 enum machine_mode mode;
15492 tree type = TREE_TYPE (field);
15494 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15496 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15497 ? get_inner_array_type (type) : type);
15498 if (mode == DFmode || mode == DCmode
15499 || GET_MODE_CLASS (mode) == MODE_INT
15500 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15501 return MIN (32, computed);
15505 /* Output assembler code to FILE to increment profiler label # LABELNO
15506 for profiling a function entry. */
15508 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15513 #ifndef NO_PROFILE_COUNTERS
15514 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15516 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15520 #ifndef NO_PROFILE_COUNTERS
15521 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15523 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15527 #ifndef NO_PROFILE_COUNTERS
15528 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15529 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15531 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15535 #ifndef NO_PROFILE_COUNTERS
15536 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15537 PROFILE_COUNT_REGISTER);
15539 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15543 /* We don't have exact information about the insn sizes, but we may assume
15544 quite safely that we are informed about all 1 byte insns and memory
15545 address sizes. This is enough to eliminate unnecessary padding in
15549 min_insn_size (rtx insn)
15553 if (!INSN_P (insn) || !active_insn_p (insn))
15556 /* Discard alignments we've emit and jump instructions. */
15557 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15558 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15560 if (GET_CODE (insn) == JUMP_INSN
15561 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15562 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15565 /* Important case - calls are always 5 bytes.
15566 It is common to have many calls in the row. */
15567 if (GET_CODE (insn) == CALL_INSN
15568 && symbolic_reference_mentioned_p (PATTERN (insn))
15569 && !SIBLING_CALL_P (insn))
15571 if (get_attr_length (insn) <= 1)
15574 /* For normal instructions we may rely on the sizes of addresses
15575 and the presence of symbol to require 4 bytes of encoding.
15576 This is not the case for jumps where references are PC relative. */
15577 if (GET_CODE (insn) != JUMP_INSN)
15579 l = get_attr_length_address (insn);
15580 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15589 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15593 ix86_avoid_jump_misspredicts (void)
15595 rtx insn, start = get_insns ();
15596 int nbytes = 0, njumps = 0;
15599 /* Look for all minimal intervals of instructions containing 4 jumps.
15600 The intervals are bounded by START and INSN. NBYTES is the total
15601 size of instructions in the interval including INSN and not including
15602 START. When the NBYTES is smaller than 16 bytes, it is possible
15603 that the end of START and INSN ends up in the same 16byte page.
15605 The smallest offset in the page INSN can start is the case where START
15606 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15607 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15609 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15612 nbytes += min_insn_size (insn);
15614 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15615 INSN_UID (insn), min_insn_size (insn));
15616 if ((GET_CODE (insn) == JUMP_INSN
15617 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15618 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15619 || GET_CODE (insn) == CALL_INSN)
15626 start = NEXT_INSN (start);
15627 if ((GET_CODE (start) == JUMP_INSN
15628 && GET_CODE (PATTERN (start)) != ADDR_VEC
15629 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15630 || GET_CODE (start) == CALL_INSN)
15631 njumps--, isjump = 1;
15634 nbytes -= min_insn_size (start);
15639 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15640 INSN_UID (start), INSN_UID (insn), nbytes);
15642 if (njumps == 3 && isjump && nbytes < 16)
15644 int padsize = 15 - nbytes + min_insn_size (insn);
15647 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15648 INSN_UID (insn), padsize);
15649 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15654 /* AMD Athlon works faster
15655 when RET is not destination of conditional jump or directly preceded
15656 by other jump instruction. We avoid the penalty by inserting NOP just
15657 before the RET instructions in such cases. */
15659 ix86_pad_returns (void)
15663 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15665 basic_block bb = e->src;
15666 rtx ret = BB_END (bb);
15668 bool replace = false;
15670 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15671 || !maybe_hot_bb_p (bb))
15673 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15674 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15676 if (prev && GET_CODE (prev) == CODE_LABEL)
15679 for (e = bb->pred; e; e = e->pred_next)
15680 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15681 && !(e->flags & EDGE_FALLTHRU))
15686 prev = prev_active_insn (ret);
15688 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15689 || GET_CODE (prev) == CALL_INSN))
15691 /* Empty functions get branch mispredict even when the jump destination
15692 is not visible to us. */
15693 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15698 emit_insn_before (gen_return_internal_long (), ret);
15704 /* Implement machine specific optimizations. We implement padding of returns
15705 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15709 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15710 ix86_pad_returns ();
15711 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15712 ix86_avoid_jump_misspredicts ();
15715 /* Return nonzero when QImode register that must be represented via REX prefix
15718 x86_extended_QIreg_mentioned_p (rtx insn)
15721 extract_insn_cached (insn);
15722 for (i = 0; i < recog_data.n_operands; i++)
15723 if (REG_P (recog_data.operand[i])
15724 && REGNO (recog_data.operand[i]) >= 4)
15729 /* Return nonzero when P points to register encoded via REX prefix.
15730 Called via for_each_rtx. */
15732 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15734 unsigned int regno;
15737 regno = REGNO (*p);
15738 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15741 /* Return true when INSN mentions register that must be encoded using REX
15744 x86_extended_reg_mentioned_p (rtx insn)
15746 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15749 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15750 optabs would emit if we didn't have TFmode patterns. */
15753 x86_emit_floatuns (rtx operands[2])
15755 rtx neglab, donelab, i0, i1, f0, in, out;
15756 enum machine_mode mode, inmode;
15758 inmode = GET_MODE (operands[1]);
15759 if (inmode != SImode
15760 && inmode != DImode)
15764 in = force_reg (inmode, operands[1]);
15765 mode = GET_MODE (out);
15766 neglab = gen_label_rtx ();
15767 donelab = gen_label_rtx ();
15768 i1 = gen_reg_rtx (Pmode);
15769 f0 = gen_reg_rtx (mode);
15771 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15773 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15774 emit_jump_insn (gen_jump (donelab));
15777 emit_label (neglab);
15779 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15780 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15781 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15782 expand_float (f0, i0, 0);
15783 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15785 emit_label (donelab);
15788 /* Return if we do not know how to pass TYPE solely in registers. */
15790 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15792 if (default_must_pass_in_stack (mode, type))
15794 return (!TARGET_64BIT && type && mode == TImode);
15797 /* Initialize vector TARGET via VALS. */
15799 ix86_expand_vector_init (rtx target, rtx vals)
15801 enum machine_mode mode = GET_MODE (target);
15802 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15803 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15806 for (i = n_elts - 1; i >= 0; i--)
15807 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15808 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15811 /* Few special cases first...
15812 ... constants are best loaded from constant pool. */
15815 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15819 /* ... values where only first field is non-constant are best loaded
15820 from the pool and overwritten via move later. */
15823 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15824 GET_MODE_INNER (mode), 0);
15826 op = force_reg (mode, op);
15827 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15828 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15829 switch (GET_MODE (target))
15832 emit_insn (gen_sse2_movsd (target, target, op));
15835 emit_insn (gen_sse_movss (target, target, op));
15843 /* And the busy sequence doing rotations. */
15844 switch (GET_MODE (target))
15849 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15851 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15853 vecop0 = force_reg (V2DFmode, vecop0);
15854 vecop1 = force_reg (V2DFmode, vecop1);
15855 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15861 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15863 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15865 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15867 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15868 rtx tmp1 = gen_reg_rtx (V4SFmode);
15869 rtx tmp2 = gen_reg_rtx (V4SFmode);
15871 vecop0 = force_reg (V4SFmode, vecop0);
15872 vecop1 = force_reg (V4SFmode, vecop1);
15873 vecop2 = force_reg (V4SFmode, vecop2);
15874 vecop3 = force_reg (V4SFmode, vecop3);
15875 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15876 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15877 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15885 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15887 We do this in the new i386 backend to maintain source compatibility
15888 with the old cc0-based compiler. */
15891 ix86_md_asm_clobbers (tree clobbers)
15893 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15895 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15897 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15902 /* Worker function for REVERSE_CONDITION. */
15905 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15907 return (mode != CCFPmode && mode != CCFPUmode
15908 ? reverse_condition (code)
15909 : reverse_condition_maybe_unordered (code));
15912 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15916 output_387_reg_move (rtx insn, rtx *operands)
15918 if (REG_P (operands[1])
15919 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15921 if (REGNO (operands[0]) == FIRST_STACK_REG
15922 && TARGET_USE_FFREEP)
15923 return "ffreep\t%y0";
15924 return "fstp\t%y0";
15926 if (STACK_TOP_P (operands[0]))
15927 return "fld%z1\t%y1";
15931 #include "gt-i386.h"